blob: e3012a71667b0eeda7bf73ddd31860239a41fb3a [file] [log] [blame]
Yann Collet32fb4072017-08-18 16:52:05 -07001/*
W. Felix Handte5d693cc2022-12-20 12:49:47 -05002 * Copyright (c) Meta Platforms, Inc. and affiliates.
Yann Collet4ded9e52016-08-30 10:04:33 -07003 * All rights reserved.
4 *
Yann Collet32fb4072017-08-18 16:52:05 -07005 * This source code is licensed under both the BSD-style license (found in the
6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7 * in the COPYING file in the root directory of this source tree).
Yann Collet3128e032017-09-08 00:09:23 -07008 * You may select, at your option, one of the above-listed licenses.
Yann Collet4ded9e52016-08-30 10:04:33 -07009 */
Yann Collet4856a002015-01-24 01:58:16 +010010
Yann Collet8dafb1a2017-01-25 17:01:13 -080011
Yann Colleteeb8ba12015-10-22 16:55:40 +010012/* *************************************
Yann Collet4856a002015-01-24 01:58:16 +010013* Compiler Options
Yann Colleteeb8ba12015-10-22 16:55:40 +010014***************************************/
Yann Collet94ca85d2016-08-14 01:19:12 +020015#ifdef _MSC_VER /* Visual */
Przemyslaw Skibinskie6797412016-12-21 13:47:11 +010016# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
Yann Collet94ca85d2016-08-14 01:19:12 +020017# pragma warning(disable : 4204) /* non-constant aggregate initializer */
18#endif
Przemyslaw Skibinski2f6ccee2016-12-21 13:23:34 +010019#if defined(__MINGW32__) && !defined(_POSIX_SOURCE)
20# define _POSIX_SOURCE 1 /* disable %llu warnings with MinGW on Windows */
21#endif
Yann Collet4856a002015-01-24 01:58:16 +010022
Yann Collet6f3acba2016-02-12 20:19:48 +010023/*-*************************************
Yann Collet4856a002015-01-24 01:58:16 +010024* Includes
Yann Colleteeb8ba12015-10-22 16:55:40 +010025***************************************/
Przemyslaw Skibinski7a8a03c2016-12-21 15:08:44 +010026#include "platform.h" /* Large Files support, SET_BINARY_MODE */
shakeelraodca73db2019-03-28 17:50:34 -070027#include "util.h" /* UTIL_getFileSize, UTIL_isRegularFile, UTIL_isSameFile */
W. Felix Handteb87f97b2021-03-08 17:39:14 -050028#include <stdio.h> /* fprintf, open, fdopen, fread, _fileno, stdin, stdout */
Yann Collet9f432922015-11-09 17:42:17 +010029#include <stdlib.h> /* malloc, free */
30#include <string.h> /* strcmp, strlen */
Yann Collet8b130002023-01-06 15:25:36 -080031#include <time.h> /* clock_t, to measure process time */
W. Felix Handteb87f97b2021-03-08 17:39:14 -050032#include <fcntl.h> /* O_WRONLY */
Yann Collet3ca62612018-10-02 15:59:11 -070033#include <assert.h>
Yann Collet9f432922015-11-09 17:42:17 +010034#include <errno.h> /* errno */
Nick Magerko2d39b432019-08-19 16:49:25 -070035#include <limits.h> /* INT_MAX */
Casey McGintyd4337b62018-09-11 11:39:49 -070036#include <signal.h>
Yann Collet59a71162019-04-10 12:37:03 -070037#include "timefn.h" /* UTIL_getTime, UTIL_clockSpanMicro */
inikepd5ff2c32016-04-28 14:40:45 +020038
Sean Purcell279be202017-04-06 12:56:40 -070039#if defined (_MSC_VER)
40# include <sys/stat.h>
41# include <io.h>
42#endif
43
Yann Collet4856a002015-01-24 01:58:16 +010044#include "fileio.h"
Yonatan Komornik70df5de2022-01-24 14:43:02 -080045#include "fileio_asyncio.h"
46#include "fileio_common.h"
47
48FIO_display_prefs_t g_display_prefs = {2, FIO_ps_auto};
49UTIL_time_t g_displayClock = UTIL_TIME_INITIALIZER;
Yann Collet2bfc79a2018-02-01 16:13:04 -080050
Yann Colletd3b7f8d2016-06-04 19:47:02 +020051#define ZSTD_STATIC_LINKING_ONLY /* ZSTD_magicNumber, ZSTD_frameHeaderSize_max */
W. Felix Handte7dcca6b2020-05-01 16:20:40 -040052#include "../lib/zstd.h"
Nick Terrell09149be2021-04-30 15:02:12 -070053#include "../lib/zstd_errors.h" /* ZSTD_error_frameParameter_windowTooLarge */
Yann Collet2bfc79a2018-02-01 16:13:04 -080054
Przemyslaw Skibinski02018c82017-02-08 16:54:23 +010055#if defined(ZSTD_GZCOMPRESS) || defined(ZSTD_GZDECOMPRESS)
Nick Terrell2cb8ee82017-02-06 11:32:13 -080056# include <zlib.h>
Yann Collet500014a2017-01-19 16:59:56 -080057# if !defined(z_const)
58# define z_const
59# endif
Przemyslaw Skibinskiabfb51f2016-11-30 15:05:54 +010060#endif
Yann Collet2bfc79a2018-02-01 16:13:04 -080061
Nick Terrellaa8bcf32017-03-13 18:11:07 -070062#if defined(ZSTD_LZMACOMPRESS) || defined(ZSTD_LZMADECOMPRESS)
63# include <lzma.h>
64#endif
Yann Collet4856a002015-01-24 01:58:16 +010065
Sean Purcell4de86322017-04-24 16:48:25 -070066#define LZ4_MAGICNUMBER 0x184D2204
67#if defined(ZSTD_LZ4COMPRESS) || defined(ZSTD_LZ4DECOMPRESS)
W. Felix Handte36023872017-09-21 11:29:35 -070068# define LZ4F_ENABLE_OBSOLETE_ENUMS
Sean Purcell4de86322017-04-24 16:48:25 -070069# include <lz4frame.h>
Sean Purcell2c4b6fe2017-04-25 11:00:54 -070070# include <lz4.h>
Sean Purcell4de86322017-04-24 16:48:25 -070071#endif
72
Nick Terrelle60eba52022-01-26 17:55:26 -080073char const* FIO_zlibVersion(void)
74{
75#if defined(ZSTD_GZCOMPRESS) || defined(ZSTD_GZDECOMPRESS)
76 return zlibVersion();
77#else
78 return "Unsupported";
79#endif
80}
81
82char const* FIO_lz4Version(void)
83{
84#if defined(ZSTD_LZ4COMPRESS) || defined(ZSTD_LZ4DECOMPRESS)
85 /* LZ4_versionString() added in v1.7.3 */
86# if LZ4_VERSION_NUMBER >= 10703
87 return LZ4_versionString();
88# else
89# define ZSTD_LZ4_VERSION LZ4_VERSION_MAJOR.LZ4_VERSION_MINOR.LZ4_VERSION_RELEASE
90# define ZSTD_LZ4_VERSION_STRING ZSTD_EXPAND_AND_QUOTE(ZSTD_LZ4_VERSION)
91 return ZSTD_LZ4_VERSION_STRING;
92# endif
93#else
94 return "Unsupported";
95#endif
96}
97
98char const* FIO_lzmaVersion(void)
99{
100#if defined(ZSTD_LZMACOMPRESS) || defined(ZSTD_LZMADECOMPRESS)
101 return lzma_version_string();
102#else
103 return "Unsupported";
104#endif
105}
106
Yann Collet4856a002015-01-24 01:58:16 +0100107
Yann Collet6f3acba2016-02-12 20:19:48 +0100108/*-*************************************
Yann Collet4856a002015-01-24 01:58:16 +0100109* Constants
Yann Colleteeb8ba12015-10-22 16:55:40 +0100110***************************************/
Yann Collet0853f862018-08-13 13:10:42 -0700111#define ADAPT_WINDOWLOG_DEFAULT 23 /* 8 MB */
Yann Collet0e300592017-04-11 14:41:02 -0700112#define DICTSIZE_MAX (32 MB) /* protection against large input (attack scenario) */
Yann Collet6f3acba2016-02-12 20:19:48 +0100113
inikep3c7c3522016-04-22 13:59:05 +0200114#define FNSPACE 30
115
W. Felix Handteb87f97b2021-03-08 17:39:14 -0500116/* Default file permissions 0666 (modulated by umask) */
W. Felix Handte1e3eba62023-01-17 15:08:15 -0800117/* Temporary restricted file permissions are used when we're going to
118 * chmod/chown at the end of the operation. */
W. Felix Handteda619182021-04-06 11:29:28 -0400119#if !defined(_WIN32)
120/* These macros aren't defined on windows. */
W. Felix Handteb87f97b2021-03-08 17:39:14 -0500121#define DEFAULT_FILE_PERMISSIONS (S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)
W. Felix Handte1e3eba62023-01-17 15:08:15 -0800122#define TEMPORARY_FILE_PERMISSIONS (S_IRUSR|S_IWUSR)
W. Felix Handte45c49182021-03-09 01:24:11 -0500123#else
W. Felix Handteda619182021-04-06 11:29:28 -0400124#define DEFAULT_FILE_PERMISSIONS (0666)
W. Felix Handte1e3eba62023-01-17 15:08:15 -0800125#define TEMPORARY_FILE_PERMISSIONS (0600)
W. Felix Handte45c49182021-03-09 01:24:11 -0500126#endif
W. Felix Handteb87f97b2021-03-08 17:39:14 -0500127
Yann Collet00fc1ba2017-10-01 12:10:26 -0700128/*-************************************
129* Signal (Ctrl-C trapping)
130**************************************/
Yann Collet7f580f92017-10-02 11:39:05 -0700131static const char* g_artefact = NULL;
132static void INThandler(int sig)
Yann Collet00fc1ba2017-10-01 12:10:26 -0700133{
Yann Collet6e7ba3d2017-10-02 00:19:47 -0700134 assert(sig==SIGINT); (void)sig;
Yann Colletfe5444b2017-10-02 02:02:16 -0700135#if !defined(_MSC_VER)
136 signal(sig, SIG_IGN); /* this invocation generates a buggy warning in Visual Studio */
Yann Collet82bc2002017-10-02 00:02:24 -0700137#endif
Nick Terrell282ad052018-01-05 11:44:45 -0800138 if (g_artefact) {
139 assert(UTIL_isRegularFile(g_artefact));
140 remove(g_artefact);
141 }
Yann Collet00fc1ba2017-10-01 12:10:26 -0700142 DISPLAY("\n");
Yann Colletbd180952017-10-01 15:32:48 -0700143 exit(2);
Yann Collet00fc1ba2017-10-01 12:10:26 -0700144}
Nick Terrella6052af2017-11-17 16:38:56 -0800145static void addHandler(char const* dstFileName)
146{
147 if (UTIL_isRegularFile(dstFileName)) {
148 g_artefact = dstFileName;
149 signal(SIGINT, INThandler);
150 } else {
151 g_artefact = NULL;
152 }
153}
154/* Idempotent */
155static void clearHandler(void)
156{
157 if (g_artefact) signal(SIGINT, SIG_DFL);
158 g_artefact = NULL;
159}
Yann Collet00fc1ba2017-10-01 12:10:26 -0700160
161
Casey McGintyd4337b62018-09-11 11:39:49 -0700162/*-*********************************************************
163* Termination signal trapping (Print debug stack trace)
164***********************************************************/
Julian Fessard0ea286f2018-10-09 17:24:48 -0700165#if defined(__has_feature) && !defined(BACKTRACE_ENABLE) /* Clang compiler */
166# if (__has_feature(address_sanitizer))
167# define BACKTRACE_ENABLE 0
168# endif /* __has_feature(address_sanitizer) */
169#elif defined(__SANITIZE_ADDRESS__) && !defined(BACKTRACE_ENABLE) /* GCC compiler */
170# define BACKTRACE_ENABLE 0
171#endif
172
Yann Colletb304b672018-10-09 17:56:59 -0700173#if !defined(BACKTRACE_ENABLE)
174/* automatic detector : backtrace enabled by default on linux+glibc and osx */
Rosen Penev23b5ee12019-05-06 15:02:47 -0700175# if (defined(__linux__) && (defined(__GLIBC__) && !defined(__UCLIBC__))) \
Yann Colletb304b672018-10-09 17:56:59 -0700176 || (defined(__APPLE__) && defined(__MACH__))
177# define BACKTRACE_ENABLE 1
178# else
179# define BACKTRACE_ENABLE 0
180# endif
Yann Collete0ab6b62018-10-09 17:12:21 -0700181#endif
182
Yann Colletb304b672018-10-09 17:56:59 -0700183/* note : after this point, BACKTRACE_ENABLE is necessarily defined */
Yann Collete0ab6b62018-10-09 17:12:21 -0700184
Yann Colletb304b672018-10-09 17:56:59 -0700185
186#if BACKTRACE_ENABLE
Yann Collet54001f32018-09-21 14:46:09 -0700187
Yann Collete0ab6b62018-10-09 17:12:21 -0700188#include <execinfo.h> /* backtrace, backtrace_symbols */
189
Casey McGintyd4337b62018-09-11 11:39:49 -0700190#define MAX_STACK_FRAMES 50
191
Casey McGintyd4337b62018-09-11 11:39:49 -0700192static void ABRThandler(int sig) {
Casey McGintyb9118ec2018-09-11 14:49:47 -0700193 const char* name;
194 void* addrlist[MAX_STACK_FRAMES];
195 char** symbollist;
Yann Collet07e04782019-01-25 14:42:44 -0800196 int addrlen, i;
Casey McGintyd4337b62018-09-11 11:39:49 -0700197
Casey McGintyb9118ec2018-09-11 14:49:47 -0700198 switch (sig) {
199 case SIGABRT: name = "SIGABRT"; break;
200 case SIGFPE: name = "SIGFPE"; break;
201 case SIGILL: name = "SIGILL"; break;
202 case SIGINT: name = "SIGINT"; break;
203 case SIGSEGV: name = "SIGSEGV"; break;
204 default: name = "UNKNOWN";
205 }
Casey McGintyd4337b62018-09-11 11:39:49 -0700206
Casey McGintyb9118ec2018-09-11 14:49:47 -0700207 DISPLAY("Caught %s signal, printing stack:\n", name);
208 /* Retrieve current stack addresses. */
209 addrlen = backtrace(addrlist, MAX_STACK_FRAMES);
210 if (addrlen == 0) {
211 DISPLAY("\n");
212 return;
213 }
214 /* Create readable strings to each frame. */
215 symbollist = backtrace_symbols(addrlist, addrlen);
216 /* Print the stack trace, excluding calls handling the signal. */
217 for (i = ZSTD_START_SYMBOLLIST_FRAME; i < addrlen; i++) {
218 DISPLAY("%s\n", symbollist[i]);
219 }
220 free(symbollist);
221 /* Reset and raise the signal so default handler runs. */
222 signal(sig, SIG_DFL);
223 raise(sig);
Casey McGintyd4337b62018-09-11 11:39:49 -0700224}
225#endif
226
Qiongsi Wub1bbb0e2022-07-29 15:21:59 -0400227void FIO_addAbortHandler(void)
Casey McGintyd4337b62018-09-11 11:39:49 -0700228{
Yann Colletb304b672018-10-09 17:56:59 -0700229#if BACKTRACE_ENABLE
Casey McGintyd4337b62018-09-11 11:39:49 -0700230 signal(SIGABRT, ABRThandler);
231 signal(SIGFPE, ABRThandler);
232 signal(SIGILL, ABRThandler);
233 signal(SIGSEGV, ABRThandler);
234 signal(SIGBUS, ABRThandler);
235#endif
236}
237
senhuang42d54566f2020-08-28 11:01:04 -0400238/*-*************************************
senhuang423a7d6252020-09-01 12:52:18 -0400239* Parameters: FIO_ctx_t
senhuang42d54566f2020-08-28 11:01:04 -0400240***************************************/
241
242/* typedef'd to FIO_ctx_t within fileio.h */
243struct FIO_ctx_s {
244
senhuang42a480b022020-09-03 09:26:30 -0400245 /* file i/o info */
senhuang42a6414f12020-09-01 12:32:18 -0400246 int nbFilesTotal;
senhuang42432186c2020-09-24 15:55:30 -0400247 int hasStdinInput;
senhuang421ebe3602020-10-07 13:42:34 -0400248 int hasStdoutOutput;
senhuang42a480b022020-09-03 09:26:30 -0400249
250 /* file i/o state */
senhuang42b6abbc32020-08-26 11:35:07 -0400251 int currFileIdx;
senhuang42a6414f12020-09-01 12:32:18 -0400252 int nbFilesProcessed;
senhuang42d54566f2020-08-28 11:01:04 -0400253 size_t totalBytesInput;
254 size_t totalBytesOutput;
Karl Ostmo5e220bf2019-01-22 17:31:13 -0800255};
256
Nick Terrellfbff7822022-01-07 15:07:28 -0800257static int FIO_shouldDisplayFileSummary(FIO_ctx_t const* fCtx)
258{
259 return fCtx->nbFilesTotal <= 1 || g_display_prefs.displayLevel >= 3;
260}
261
262static int FIO_shouldDisplayMultipleFileSummary(FIO_ctx_t const* fCtx)
263{
264 int const shouldDisplay = (fCtx->nbFilesProcessed >= 1 && fCtx->nbFilesTotal > 1);
265 assert(shouldDisplay || FIO_shouldDisplayFileSummary(fCtx) || fCtx->nbFilesProcessed == 0);
266 return shouldDisplay;
267}
268
Karl Ostmo5e220bf2019-01-22 17:31:13 -0800269
270/*-*************************************
271* Parameters: Initialization
272***************************************/
273
274#define FIO_OVERLAP_LOG_NOTSET 9999
275#define FIO_LDM_PARAM_NOTSET 9999
276
277
278FIO_prefs_t* FIO_createPreferences(void)
279{
280 FIO_prefs_t* const ret = (FIO_prefs_t*)malloc(sizeof(FIO_prefs_t));
281 if (!ret) EXM_THROW(21, "Allocation error : not enough memory");
282
283 ret->compressionType = FIO_zstdCompression;
284 ret->overwrite = 0;
285 ret->sparseFileSupport = ZSTD_SPARSE_DEFAULT;
286 ret->dictIDFlag = 1;
287 ret->checksumFlag = 1;
288 ret->removeSrcFile = 0;
289 ret->memLimit = 0;
290 ret->nbWorkers = 1;
291 ret->blockSize = 0;
292 ret->overlapLog = FIO_OVERLAP_LOG_NOTSET;
293 ret->adaptiveMode = 0;
294 ret->rsyncable = 0;
295 ret->minAdaptLevel = -50; /* initializing this value requires a constant, so ZSTD_minCLevel() doesn't work */
296 ret->maxAdaptLevel = 22; /* initializing this value requires a constant, so ZSTD_maxCLevel() doesn't work */
297 ret->ldmFlag = 0;
298 ret->ldmHashLog = 0;
299 ret->ldmMinMatch = 0;
300 ret->ldmBucketSizeLog = FIO_LDM_PARAM_NOTSET;
301 ret->ldmHashRateLog = FIO_LDM_PARAM_NOTSET;
Nick Magerkoaf0c9502019-08-15 23:57:55 -0700302 ret->streamSrcSize = 0;
Ephraim Park90077012019-06-24 13:40:52 -0700303 ret->targetCBlockSize = 0;
Nick Magerkodffbac52019-08-19 08:52:08 -0700304 ret->srcSizeHint = 0;
Yann Collet0ee36092019-10-17 16:09:53 -0700305 ret->testMode = 0;
senhuang42b5c35d72021-09-20 09:04:07 -0400306 ret->literalCompressionMode = ZSTD_ps_auto;
Shashank Tavildar0f2bff22019-10-28 18:21:47 -0700307 ret->excludeCompressedFiles = 0;
W. Felix Handte33f3e292021-05-04 16:24:46 -0400308 ret->allowBlockDevices = 0;
Yonatan Komornikcc0657f2022-01-31 15:43:41 -0800309 ret->asyncIO = AIO_supported();
Nick Terrell03cc84f2022-08-04 17:15:59 -0700310 ret->passThrough = -1;
senhuang42d54566f2020-08-28 11:01:04 -0400311 return ret;
312}
313
314FIO_ctx_t* FIO_createContext(void)
315{
316 FIO_ctx_t* const ret = (FIO_ctx_t*)malloc(sizeof(FIO_ctx_t));
317 if (!ret) EXM_THROW(21, "Allocation error : not enough memory");
318
senhuang42b6abbc32020-08-26 11:35:07 -0400319 ret->currFileIdx = 0;
senhuang42432186c2020-09-24 15:55:30 -0400320 ret->hasStdinInput = 0;
senhuang421ebe3602020-10-07 13:42:34 -0400321 ret->hasStdoutOutput = 0;
senhuang42a6414f12020-09-01 12:32:18 -0400322 ret->nbFilesTotal = 1;
323 ret->nbFilesProcessed = 0;
senhuang42d54566f2020-08-28 11:01:04 -0400324 ret->totalBytesInput = 0;
325 ret->totalBytesOutput = 0;
Karl Ostmo5e220bf2019-01-22 17:31:13 -0800326 return ret;
327}
328
329void FIO_freePreferences(FIO_prefs_t* const prefs)
330{
331 free(prefs);
332}
333
senhuang42d54566f2020-08-28 11:01:04 -0400334void FIO_freeContext(FIO_ctx_t* const fCtx)
335{
336 free(fCtx);
337}
338
Karl Ostmo5e220bf2019-01-22 17:31:13 -0800339
340/*-*************************************
341* Parameters: Display Options
342***************************************/
343
Yann Collet07e04782019-01-25 14:42:44 -0800344void FIO_setNotificationLevel(int level) { g_display_prefs.displayLevel=level; }
Karl Ostmo5e220bf2019-01-22 17:31:13 -0800345
sen6030cdf2021-05-06 14:50:28 -0400346void FIO_setProgressSetting(FIO_progressSetting_e setting) { g_display_prefs.progressSetting = setting; }
Karl Ostmo5e220bf2019-01-22 17:31:13 -0800347
348
349/*-*************************************
350* Parameters: Setters
351***************************************/
352
senhuang42d54566f2020-08-28 11:01:04 -0400353/* FIO_prefs_t functions */
354
Karl Ostmo5e220bf2019-01-22 17:31:13 -0800355void FIO_setCompressionType(FIO_prefs_t* const prefs, FIO_compressionType_t compressionType) { prefs->compressionType = compressionType; }
356
357void FIO_overwriteMode(FIO_prefs_t* const prefs) { prefs->overwrite = 1; }
358
Yann Collet3dfcafa2022-08-03 21:39:35 +0200359void FIO_setSparseWrite(FIO_prefs_t* const prefs, int sparse) { prefs->sparseFileSupport = sparse; }
Karl Ostmo5e220bf2019-01-22 17:31:13 -0800360
Yann Collet07e04782019-01-25 14:42:44 -0800361void FIO_setDictIDFlag(FIO_prefs_t* const prefs, int dictIDFlag) { prefs->dictIDFlag = dictIDFlag; }
Karl Ostmo5e220bf2019-01-22 17:31:13 -0800362
Yann Collet07e04782019-01-25 14:42:44 -0800363void FIO_setChecksumFlag(FIO_prefs_t* const prefs, int checksumFlag) { prefs->checksumFlag = checksumFlag; }
Karl Ostmo5e220bf2019-01-22 17:31:13 -0800364
Yann Colletcee6bec2023-01-20 17:53:05 -0800365void FIO_setRemoveSrcFile(FIO_prefs_t* const prefs, int flag) { prefs->removeSrcFile = (flag!=0); }
Karl Ostmo5e220bf2019-01-22 17:31:13 -0800366
367void FIO_setMemLimit(FIO_prefs_t* const prefs, unsigned memLimit) { prefs->memLimit = memLimit; }
368
Yann Collet07e04782019-01-25 14:42:44 -0800369void FIO_setNbWorkers(FIO_prefs_t* const prefs, int nbWorkers) {
Yann Collet500014a2017-01-19 16:59:56 -0800370#ifndef ZSTD_MULTITHREAD
Yann Collet209df522018-02-01 19:29:30 -0800371 if (nbWorkers > 0) DISPLAYLEVEL(2, "Note : multi-threading is disabled \n");
Yann Collet500014a2017-01-19 16:59:56 -0800372#endif
Karl Ostmo5e220bf2019-01-22 17:31:13 -0800373 prefs->nbWorkers = nbWorkers;
Yann Collet500014a2017-01-19 16:59:56 -0800374}
Karl Ostmo5e220bf2019-01-22 17:31:13 -0800375
Shashank Tavildar0f2bff22019-10-28 18:21:47 -0700376void FIO_setExcludeCompressedFile(FIO_prefs_t* const prefs, int excludeCompressedFiles) { prefs->excludeCompressedFiles = excludeCompressedFiles; }
377
W. Felix Handte33f3e292021-05-04 16:24:46 -0400378void FIO_setAllowBlockDevices(FIO_prefs_t* const prefs, int allowBlockDevices) { prefs->allowBlockDevices = allowBlockDevices; }
379
Yann Collet07e04782019-01-25 14:42:44 -0800380void FIO_setBlockSize(FIO_prefs_t* const prefs, int blockSize) {
Karl Ostmo5e220bf2019-01-22 17:31:13 -0800381 if (blockSize && prefs->nbWorkers==0)
Yann Collet512cbe82017-01-24 17:02:26 -0800382 DISPLAYLEVEL(2, "Setting block size is useless in single-thread mode \n");
Karl Ostmo5e220bf2019-01-22 17:31:13 -0800383 prefs->blockSize = blockSize;
Yann Collet512cbe82017-01-24 17:02:26 -0800384}
Karl Ostmo5e220bf2019-01-22 17:31:13 -0800385
Yann Collet07e04782019-01-25 14:42:44 -0800386void FIO_setOverlapLog(FIO_prefs_t* const prefs, int overlapLog){
Karl Ostmo5e220bf2019-01-22 17:31:13 -0800387 if (overlapLog && prefs->nbWorkers==0)
Yann Collet6be23372017-01-30 11:17:26 -0800388 DISPLAYLEVEL(2, "Setting overlapLog is useless in single-thread mode \n");
Karl Ostmo5e220bf2019-01-22 17:31:13 -0800389 prefs->overlapLog = overlapLog;
Yann Collet6be23372017-01-30 11:17:26 -0800390}
Karl Ostmo5e220bf2019-01-22 17:31:13 -0800391
Yann Collet3dfcafa2022-08-03 21:39:35 +0200392void FIO_setAdaptiveMode(FIO_prefs_t* const prefs, int adapt) {
Karl Ostmo5e220bf2019-01-22 17:31:13 -0800393 if ((adapt>0) && (prefs->nbWorkers==0))
Yann Collet89bc3092018-09-19 14:49:13 -0700394 EXM_THROW(1, "Adaptive mode is not compatible with single thread mode \n");
Karl Ostmo5e220bf2019-01-22 17:31:13 -0800395 prefs->adaptiveMode = adapt;
Yann Collet89bc3092018-09-19 14:49:13 -0700396}
Karl Ostmo5e220bf2019-01-22 17:31:13 -0800397
Nick Terrell46944232020-11-02 17:52:29 -0800398void FIO_setUseRowMatchFinder(FIO_prefs_t* const prefs, int useRowMatchFinder) {
399 prefs->useRowMatchFinder = useRowMatchFinder;
400}
401
Yann Collet07e04782019-01-25 14:42:44 -0800402void FIO_setRsyncable(FIO_prefs_t* const prefs, int rsyncable) {
Karl Ostmo5e220bf2019-01-22 17:31:13 -0800403 if ((rsyncable>0) && (prefs->nbWorkers==0))
Nick Terrellf9a671a2018-11-12 19:59:42 -0800404 EXM_THROW(1, "Rsyncable mode is not compatible with single thread mode \n");
Karl Ostmo5e220bf2019-01-22 17:31:13 -0800405 prefs->rsyncable = rsyncable;
Nick Terrellf9a671a2018-11-12 19:59:42 -0800406}
Karl Ostmo5e220bf2019-01-22 17:31:13 -0800407
Nick Magerkoaf0c9502019-08-15 23:57:55 -0700408void FIO_setStreamSrcSize(FIO_prefs_t* const prefs, size_t streamSrcSize) {
409 prefs->streamSrcSize = streamSrcSize;
410}
411
Ephraim Park90077012019-06-24 13:40:52 -0700412void FIO_setTargetCBlockSize(FIO_prefs_t* const prefs, size_t targetCBlockSize) {
413 prefs->targetCBlockSize = targetCBlockSize;
414}
415
Nick Magerkodffbac52019-08-19 08:52:08 -0700416void FIO_setSrcSizeHint(FIO_prefs_t* const prefs, size_t srcSizeHint) {
Nick Magerko2d39b432019-08-19 16:49:25 -0700417 prefs->srcSizeHint = (int)MIN((size_t)INT_MAX, srcSizeHint);
Nick Magerkodffbac52019-08-19 08:52:08 -0700418}
419
Yann Collet0ee36092019-10-17 16:09:53 -0700420void FIO_setTestMode(FIO_prefs_t* const prefs, int testMode) {
421 prefs->testMode = (testMode!=0);
422}
423
Nick Terrell0c53c5a2019-02-15 14:15:36 -0800424void FIO_setLiteralCompressionMode(
425 FIO_prefs_t* const prefs,
senhuang42b5c35d72021-09-20 09:04:07 -0400426 ZSTD_paramSwitch_e mode) {
Nick Terrell0c53c5a2019-02-15 14:15:36 -0800427 prefs->literalCompressionMode = mode;
428}
429
Karl Ostmo5e220bf2019-01-22 17:31:13 -0800430void FIO_setAdaptMin(FIO_prefs_t* const prefs, int minCLevel)
Yann Collet6c51bf42018-09-24 18:16:08 -0700431{
432#ifndef ZSTD_NOCOMPRESS
433 assert(minCLevel >= ZSTD_minCLevel());
434#endif
Karl Ostmo5e220bf2019-01-22 17:31:13 -0800435 prefs->minAdaptLevel = minCLevel;
Yann Collet6c51bf42018-09-24 18:16:08 -0700436}
Karl Ostmo5e220bf2019-01-22 17:31:13 -0800437
438void FIO_setAdaptMax(FIO_prefs_t* const prefs, int maxCLevel)
Yann Collet6c51bf42018-09-24 18:16:08 -0700439{
Karl Ostmo5e220bf2019-01-22 17:31:13 -0800440 prefs->maxAdaptLevel = maxCLevel;
Yann Collet6c51bf42018-09-24 18:16:08 -0700441}
442
Karl Ostmo5e220bf2019-01-22 17:31:13 -0800443void FIO_setLdmFlag(FIO_prefs_t* const prefs, unsigned ldmFlag) {
444 prefs->ldmFlag = (ldmFlag>0);
Stella Laua1f04d52017-09-01 14:52:51 -0700445}
Stella Lau67d4a612017-09-02 21:10:36 -0700446
Yann Collet07e04782019-01-25 14:42:44 -0800447void FIO_setLdmHashLog(FIO_prefs_t* const prefs, int ldmHashLog) {
Karl Ostmo5e220bf2019-01-22 17:31:13 -0800448 prefs->ldmHashLog = ldmHashLog;
Stella Lau67d4a612017-09-02 21:10:36 -0700449}
450
Yann Collet07e04782019-01-25 14:42:44 -0800451void FIO_setLdmMinMatch(FIO_prefs_t* const prefs, int ldmMinMatch) {
Karl Ostmo5e220bf2019-01-22 17:31:13 -0800452 prefs->ldmMinMatch = ldmMinMatch;
Nick Terrellbdfcaec2018-12-13 17:17:32 -0800453}
Stella Laua1f04d52017-09-01 14:52:51 -0700454
Yann Collet07e04782019-01-25 14:42:44 -0800455void FIO_setLdmBucketSizeLog(FIO_prefs_t* const prefs, int ldmBucketSizeLog) {
Karl Ostmo5e220bf2019-01-22 17:31:13 -0800456 prefs->ldmBucketSizeLog = ldmBucketSizeLog;
457}
458
459
Yann Collet07e04782019-01-25 14:42:44 -0800460void FIO_setLdmHashRateLog(FIO_prefs_t* const prefs, int ldmHashRateLog) {
Karl Ostmo5e220bf2019-01-22 17:31:13 -0800461 prefs->ldmHashRateLog = ldmHashRateLog;
462}
Yann Collet4856a002015-01-24 01:58:16 +0100463
Bimba Shresthaf25a6e92020-01-10 14:25:24 -0800464void FIO_setPatchFromMode(FIO_prefs_t* const prefs, int value)
465{
466 prefs->patchFromMode = value != 0;
467}
Yann Collet4856a002015-01-24 01:58:16 +0100468
Bimba Shrestha6d8e7612020-03-09 14:12:52 -0500469void FIO_setContentSize(FIO_prefs_t* const prefs, int value)
Bimba Shrestha167244a2020-03-09 13:07:29 -0500470{
Bimba Shrestha6d8e7612020-03-09 14:12:52 -0500471 prefs->contentSize = value != 0;
Bimba Shrestha167244a2020-03-09 13:07:29 -0500472}
473
Yann Collet3dfcafa2022-08-03 21:39:35 +0200474void FIO_setAsyncIOFlag(FIO_prefs_t* const prefs, int value) {
Yonatan Komornik70df5de2022-01-24 14:43:02 -0800475#ifdef ZSTD_MULTITHREAD
Yonatan Komornik1598e6c2022-01-21 13:55:41 -0800476 prefs->asyncIO = value;
Yonatan Komornik70df5de2022-01-24 14:43:02 -0800477#else
478 (void) prefs;
479 (void) value;
480 DISPLAYLEVEL(2, "Note : asyncio is disabled (lack of multithreading support) \n");
481#endif
Yonatan Komornik1598e6c2022-01-21 13:55:41 -0800482}
483
Nick Terrell03cc84f2022-08-04 17:15:59 -0700484void FIO_setPassThroughFlag(FIO_prefs_t* const prefs, int value) {
485 prefs->passThrough = (value != 0);
486}
487
Danielle Rozenblit96e55c12023-03-08 08:06:10 -0800488void FIO_setMMapDict(FIO_prefs_t* const prefs, ZSTD_paramSwitch_e value)
Danielle Rozenblit2d8afd92023-02-14 09:42:23 -0800489{
490 prefs->mmapDict = value;
491}
492
senhuang42d54566f2020-08-28 11:01:04 -0400493/* FIO_ctx_t functions */
senhuang42da388912020-08-25 16:46:47 -0400494
senhuang421ebe3602020-10-07 13:42:34 -0400495void FIO_setHasStdoutOutput(FIO_ctx_t* const fCtx, int value) {
496 fCtx->hasStdoutOutput = value;
497}
498
senhuang423a7d6252020-09-01 12:52:18 -0400499void FIO_setNbFilesTotal(FIO_ctx_t* const fCtx, int value)
senhuang42b6abbc32020-08-26 11:35:07 -0400500{
senhuang42a6414f12020-09-01 12:32:18 -0400501 fCtx->nbFilesTotal = value;
senhuang42b6abbc32020-08-26 11:35:07 -0400502}
503
senhuang42432186c2020-09-24 15:55:30 -0400504void FIO_determineHasStdinInput(FIO_ctx_t* const fCtx, const FileNamesTable* const filenames) {
senhuang429f7212a2020-09-24 16:44:33 -0400505 size_t i = 0;
senhuang42432186c2020-09-24 15:55:30 -0400506 for ( ; i < filenames->tableSize; ++i) {
507 if (!strcmp(stdinmark, filenames->fileNames[i])) {
508 fCtx->hasStdinInput = 1;
509 return;
510 }
511 }
512}
513
Yann Collet459a6b72016-02-15 20:37:23 +0100514/*-*************************************
Yann Collet4856a002015-01-24 01:58:16 +0100515* Functions
Yann Colleteeb8ba12015-10-22 16:55:40 +0100516***************************************/
W. Felix Handteb02cdf62020-08-10 15:39:14 -0400517/** FIO_removeFile() :
Sean Purcell279be202017-04-06 12:56:40 -0700518 * @result : Unlink `fileName`, even if it's read-only */
W. Felix Handteb02cdf62020-08-10 15:39:14 -0400519static int FIO_removeFile(const char* path)
Sean Purcell279be202017-04-06 12:56:40 -0700520{
W. Felix Handtec1449142020-08-05 12:10:42 -0400521 stat_t statbuf;
522 if (!UTIL_stat(path, &statbuf)) {
523 DISPLAYLEVEL(2, "zstd: Failed to stat %s while trying to remove it\n", path);
524 return 0;
525 }
526 if (!UTIL_isRegularFileStat(&statbuf)) {
527 DISPLAYLEVEL(2, "zstd: Refusing to remove non-regular file %s\n", path);
Nick Terrell82bc8fe2017-12-13 12:04:46 -0800528 return 0;
529 }
Dimitri Papadopoulos585aaa02023-09-23 19:03:18 +0200530#if defined(_WIN32)
Yann Collet01a1abf2017-05-05 19:15:24 -0700531 /* windows doesn't allow remove read-only files,
532 * so try to make it writable first */
W. Felix Handte953f0a02020-08-10 17:28:34 -0400533 if (!(statbuf.st_mode & _S_IWRITE)) {
W. Felix Handtec1449142020-08-05 12:10:42 -0400534 UTIL_chmod(path, &statbuf, _S_IWRITE);
535 }
Sean Purcell279be202017-04-06 12:56:40 -0700536#endif
537 return remove(path);
538}
539
Yann Colletcdff19c2016-11-11 17:26:54 -0800540/** FIO_openSrcFile() :
W. Felix Handte33f3e292021-05-04 16:24:46 -0400541 * condition : `srcFileName` must be non-NULL. `prefs` may be NULL.
Yann Colleteac42532017-10-19 11:56:14 -0700542 * @result : FILE* to `srcFileName`, or NULL if it fails */
W. Felix Handte2ad68552023-01-17 14:01:06 -0800543static FILE* FIO_openSrcFile(const FIO_prefs_t* const prefs, const char* srcFileName, stat_t* statbuf)
Yann Colletf0624362016-02-12 15:56:46 +0100544{
W. Felix Handte33f3e292021-05-04 16:24:46 -0400545 int allowBlockDevices = prefs != NULL ? prefs->allowBlockDevices : 0;
Yann Colleteac42532017-10-19 11:56:14 -0700546 assert(srcFileName != NULL);
W. Felix Handte2ad68552023-01-17 14:01:06 -0800547 assert(statbuf != NULL);
Yann Colletf0624362016-02-12 15:56:46 +0100548 if (!strcmp (srcFileName, stdinmark)) {
Yann Collet105fa952018-12-20 09:16:40 -0800549 DISPLAYLEVEL(4,"Using stdin for input \n");
Yann Colletf0624362016-02-12 15:56:46 +0100550 SET_BINARY_MODE(stdin);
Yann Colleteac42532017-10-19 11:56:14 -0700551 return stdin;
Yann Colletf0624362016-02-12 15:56:46 +0100552 }
553
W. Felix Handte2ad68552023-01-17 14:01:06 -0800554 if (!UTIL_stat(srcFileName, statbuf)) {
Yann Collet105fa952018-12-20 09:16:40 -0800555 DISPLAYLEVEL(1, "zstd: can't stat %s : %s -- ignored \n",
556 srcFileName, strerror(errno));
Yann Collet173ef9d2018-12-19 18:30:57 -0800557 return NULL;
558 }
559
W. Felix Handte2ad68552023-01-17 14:01:06 -0800560 if (!UTIL_isRegularFileStat(statbuf)
561 && !UTIL_isFIFOStat(statbuf)
562 && !(allowBlockDevices && UTIL_isBlockDevStat(statbuf))
Bimba Shrestha0b52d872019-10-25 14:06:50 -0700563 ) {
Yann Colleteac42532017-10-19 11:56:14 -0700564 DISPLAYLEVEL(1, "zstd: %s is not a regular file -- ignored \n",
565 srcFileName);
566 return NULL;
567 }
568
569 { FILE* const f = fopen(srcFileName, "rb");
570 if (f == NULL)
571 DISPLAYLEVEL(1, "zstd: %s: %s \n", srcFileName, strerror(errno));
572 return f;
573 }
Yann Colletf0624362016-02-12 15:56:46 +0100574}
575
Yann Collet43eeea42016-09-15 15:38:44 +0200576/** FIO_openDstFile() :
Yann Colleteac42532017-10-19 11:56:14 -0700577 * condition : `dstFileName` must be non-NULL.
Yann Collet43eeea42016-09-15 15:38:44 +0200578 * @result : FILE* to `dstFileName`, or NULL if it fails */
Yann Colletcaf40d02019-10-17 16:58:49 -0700579static FILE*
senhuang420e8ac6b2020-09-24 15:49:30 -0400580FIO_openDstFile(FIO_ctx_t* fCtx, FIO_prefs_t* const prefs,
W. Felix Handteb87f97b2021-03-08 17:39:14 -0500581 const char* srcFileName, const char* dstFileName,
582 const int mode)
Yann Colletf0624362016-02-12 15:56:46 +0100583{
Yann Collet14d0cd52023-03-31 13:09:52 -0700584 int isDstRegFile;
585
Yann Collet0a24d4e2019-10-17 16:39:47 -0700586 if (prefs->testMode) return NULL; /* do not open file in test mode */
587
Yann Colleteac42532017-10-19 11:56:14 -0700588 assert(dstFileName != NULL);
Yann Colletf0624362016-02-12 15:56:46 +0100589 if (!strcmp (dstFileName, stdoutmark)) {
Yann Collet105fa952018-12-20 09:16:40 -0800590 DISPLAYLEVEL(4,"Using stdout for output \n");
Yann Colletf0624362016-02-12 15:56:46 +0100591 SET_BINARY_MODE(stdout);
Karl Ostmo5e220bf2019-01-22 17:31:13 -0800592 if (prefs->sparseFileSupport == 1) {
593 prefs->sparseFileSupport = 0;
Yann Collet75424d12016-05-23 16:56:56 +0200594 DISPLAYLEVEL(4, "Sparse File Support is automatically disabled on stdout ; try --sparse \n");
595 }
Yann Colleteac42532017-10-19 11:56:14 -0700596 return stdout;
Yann Colletf0624362016-02-12 15:56:46 +0100597 }
Yann Collet72dbf1b2018-12-20 12:27:12 -0800598
shakeelrao12909332019-03-23 21:53:13 -0700599 /* ensure dst is not the same as src */
shakeelraoe5811e52019-03-23 19:04:56 -0700600 if (srcFileName != NULL && UTIL_isSameFile(srcFileName, dstFileName)) {
601 DISPLAYLEVEL(1, "zstd: Refusing to open an output file which will overwrite the input file \n");
602 return NULL;
Yann Collet6b7a1d62018-12-26 15:51:34 -0800603 }
Yann Colletb71adf42016-07-02 01:05:31 +0200604
Yann Collet14d0cd52023-03-31 13:09:52 -0700605 isDstRegFile = UTIL_isRegularFile(dstFileName); /* invoke once */
Karl Ostmo5e220bf2019-01-22 17:31:13 -0800606 if (prefs->sparseFileSupport == 1) {
Yann Collet2e297282023-04-03 09:45:11 -0700607 prefs->sparseFileSupport = ZSTD_SPARSE_DEFAULT;
Yann Collet14d0cd52023-03-31 13:09:52 -0700608 if (!isDstRegFile) {
Yann Collet5bf13592023-03-31 11:13:52 -0700609 prefs->sparseFileSupport = 0;
610 DISPLAYLEVEL(4, "Sparse File Support is disabled when output is not a file \n");
611 }
Yann Colleteac42532017-10-19 11:56:14 -0700612 }
613
Yann Collet14d0cd52023-03-31 13:09:52 -0700614 if (isDstRegFile) {
Yann Colleteac42532017-10-19 11:56:14 -0700615 /* Check if destination file already exists */
Yann Collet0a24d4e2019-10-17 16:39:47 -0700616#if !defined(_WIN32)
617 /* this test does not work on Windows :
618 * `NUL` and `nul` are detected as regular files */
Yann Collet105fa952018-12-20 09:16:40 -0800619 if (!strcmp(dstFileName, nulmark)) {
620 EXM_THROW(40, "%s is unexpectedly categorized as a regular file",
621 dstFileName);
622 }
Yann Collet0a24d4e2019-10-17 16:39:47 -0700623#endif
W. Felix Handte1fb10ba2021-03-08 17:49:20 -0500624 if (!prefs->overwrite) {
625 if (g_display_prefs.displayLevel <= 1) {
626 /* No interaction possible */
Yann Collet8c85b292023-01-23 18:55:51 -0800627 DISPLAYLEVEL(1, "zstd: %s already exists; not overwritten \n",
W. Felix Handte1fb10ba2021-03-08 17:49:20 -0500628 dstFileName);
629 return NULL;
senhuang42aab11ce2020-08-25 11:25:49 -0400630 }
W. Felix Handte1fb10ba2021-03-08 17:49:20 -0500631 DISPLAY("zstd: %s already exists; ", dstFileName);
632 if (UTIL_requireUserConfirmation("overwrite (y/n) ? ", "Not overwritten \n", "yY", fCtx->hasStdinInput))
633 return NULL;
634 }
635 /* need to unlink */
636 FIO_removeFile(dstFileName);
637 }
Yann Colleteac42532017-10-19 11:56:14 -0700638
W. Felix Handte45c49182021-03-09 01:24:11 -0500639 {
640#if defined(_WIN32)
641 /* Windows requires opening the file as a "binary" file to avoid
642 * mangling. This macro doesn't exist on unix. */
643 const int openflags = O_WRONLY|O_CREAT|O_TRUNC|O_BINARY;
W. Felix Handte4f9c6fd2021-05-05 13:13:56 -0400644 const int fd = _open(dstFileName, openflags, mode);
645 FILE* f = NULL;
646 if (fd != -1) {
647 f = _fdopen(fd, "wb");
648 }
W. Felix Handte45c49182021-03-09 01:24:11 -0500649#else
650 const int openflags = O_WRONLY|O_CREAT|O_TRUNC;
W. Felix Handte45c49182021-03-09 01:24:11 -0500651 const int fd = open(dstFileName, openflags, mode);
W. Felix Handteb87f97b2021-03-08 17:39:14 -0500652 FILE* f = NULL;
653 if (fd != -1) {
654 f = fdopen(fd, "wb");
655 }
W. Felix Handte4f9c6fd2021-05-05 13:13:56 -0400656#endif
Mike Swansonaf80f6d2019-06-09 01:52:45 -0700657 if (f == NULL) {
Yann Colleteac42532017-10-19 11:56:14 -0700658 DISPLAYLEVEL(1, "zstd: %s: %s\n", dstFileName, strerror(errno));
W. Felix Handtec4c3e112023-03-09 12:47:40 -0500659 } else {
660 /* An increased buffer size can provide a significant performance
661 * boost on some platforms. Note that providing a NULL buf with a
662 * size that's not 0 is not defined in ANSI C, but is defined in an
663 * extension. There are three possibilities here:
664 * 1. Libc supports the extended version and everything is good.
665 * 2. Libc ignores the size when buf is NULL, in which case
666 * everything will continue as if we didn't call `setvbuf()`.
667 * 3. We fail the call and execution continues but a warning
668 * message might be shown.
669 * In all cases due execution continues. For now, I believe that
670 * this is a more cost-effective solution than managing the buffers
671 * allocations ourselves (will require an API change).
672 */
673 if (setvbuf(f, NULL, _IOFBF, 1 MB)) {
674 DISPLAYLEVEL(2, "Warning: setvbuf failed for %s\n", dstFileName);
675 }
Mike Swansonaf80f6d2019-06-09 01:52:45 -0700676 }
Yann Colleteac42532017-10-19 11:56:14 -0700677 return f;
678 }
Yann Colletf0624362016-02-12 15:56:46 +0100679}
680
Danielle Rozenblit8a189b12023-02-13 15:23:06 -0800681
682/* FIO_getDictFileStat() :
683 */
684static void FIO_getDictFileStat(const char* fileName, stat_t* dictFileStat) {
685 assert(dictFileStat != NULL);
686 if (fileName == NULL) return;
687
688 if (!UTIL_stat(fileName, dictFileStat)) {
689 EXM_THROW(31, "Stat failed on dictionary file %s: %s", fileName, strerror(errno));
690 }
691
692 if (!UTIL_isRegularFileStat(dictFileStat)) {
693 EXM_THROW(32, "Dictionary %s must be a regular file.", fileName);
694 }
695}
696
daniellerozenblitb2ad17a2023-03-28 19:44:53 -0400697/* FIO_setDictBufferMalloc() :
698 * allocates a buffer, pointed by `dict->dictBuffer`,
Yann Collet0e300592017-04-11 14:41:02 -0700699 * loads `filename` content into it, up to DICTSIZE_MAX bytes.
Yann Colleteac42532017-10-19 11:56:14 -0700700 * @return : loaded size
Yann Collet0e300592017-04-11 14:41:02 -0700701 * if fileName==NULL, returns 0 and a NULL pointer
702 */
daniellerozenblitb2ad17a2023-03-28 19:44:53 -0400703static size_t FIO_setDictBufferMalloc(FIO_Dict_t* dict, const char* fileName, FIO_prefs_t* const prefs, stat_t* dictFileStat)
Yann Colletdeb078b2015-12-17 20:30:14 +0100704{
705 FILE* fileHandle;
Yann Colletdeb078b2015-12-17 20:30:14 +0100706 U64 fileSize;
daniellerozenblitb2ad17a2023-03-28 19:44:53 -0400707 void** bufferPtr = &dict->dictBuffer;
Yann Colletdeb078b2015-12-17 20:30:14 +0100708
Yann Colleteac42532017-10-19 11:56:14 -0700709 assert(bufferPtr != NULL);
W. Felix Handte03820762023-01-17 14:50:31 -0800710 assert(dictFileStat != NULL);
Yann Colletdeb078b2015-12-17 20:30:14 +0100711 *bufferPtr = NULL;
Yann Collet2ce49232016-02-02 14:36:49 +0100712 if (fileName == NULL) return 0;
Yann Colletdeb078b2015-12-17 20:30:14 +0100713
714 DISPLAYLEVEL(4,"Loading %s as dictionary \n", fileName);
Yann Colleted2fb6b2018-12-20 17:20:07 -0800715
W. Felix Handte9985e102021-12-06 13:47:18 -0500716 fileHandle = fopen(fileName, "rb");
717
718 if (fileHandle == NULL) {
719 EXM_THROW(33, "Couldn't open dictionary %s: %s", fileName, strerror(errno));
720 }
721
W. Felix Handte03820762023-01-17 14:50:31 -0800722 fileSize = UTIL_getFileSizeStat(dictFileStat);
Bimba Shresthaf25a6e92020-01-10 14:25:24 -0800723 {
724 size_t const dictSizeMax = prefs->patchFromMode ? prefs->memLimit : DICTSIZE_MAX;
725 if (fileSize > dictSizeMax) {
W. Felix Handte9985e102021-12-06 13:47:18 -0500726 EXM_THROW(34, "Dictionary file %s is too large (> %u bytes)",
Bimba Shresthaf25a6e92020-01-10 14:25:24 -0800727 fileName, (unsigned)dictSizeMax); /* avoid extreme cases */
728 }
Yann Collet56f1f0e2017-09-26 11:21:36 -0700729 }
Yann Colletb71adf42016-07-02 01:05:31 +0200730 *bufferPtr = malloc((size_t)fileSize);
Yann Collet6d4fef32017-05-17 18:36:15 -0700731 if (*bufferPtr==NULL) EXM_THROW(34, "%s", strerror(errno));
Yann Collet56f1f0e2017-09-26 11:21:36 -0700732 { size_t const readSize = fread(*bufferPtr, 1, (size_t)fileSize, fileHandle);
W. Felix Handte9985e102021-12-06 13:47:18 -0500733 if (readSize != fileSize) {
Yann Collet0f2d4432018-12-19 17:25:58 -0800734 EXM_THROW(35, "Error reading dictionary file %s : %s",
735 fileName, strerror(errno));
W. Felix Handte9985e102021-12-06 13:47:18 -0500736 }
Yann Collet56f1f0e2017-09-26 11:21:36 -0700737 }
Yann Colletdeb078b2015-12-17 20:30:14 +0100738 fclose(fileHandle);
739 return (size_t)fileSize;
740}
741
Danielle Rozenblit4373c5a2023-02-13 07:26:22 -0800742#if (PLATFORM_POSIX_VERSION > 0)
743#include <sys/mman.h>
daniellerozenblitb2ad17a2023-03-28 19:44:53 -0400744static void FIO_munmap(FIO_Dict_t* dict)
Danielle Rozenblit4373c5a2023-02-13 07:26:22 -0800745{
daniellerozenblitb2ad17a2023-03-28 19:44:53 -0400746 munmap(dict->dictBuffer, dict->dictBufferSize);
747 dict->dictBuffer = NULL;
748 dict->dictBufferSize = 0;
Danielle Rozenblit4373c5a2023-02-13 07:26:22 -0800749}
daniellerozenblitb2ad17a2023-03-28 19:44:53 -0400750static size_t FIO_setDictBufferMMap(FIO_Dict_t* dict, const char* fileName, FIO_prefs_t* const prefs, stat_t* dictFileStat)
Danielle Rozenblit610c8b92023-02-09 07:37:37 -0800751{
752 int fileHandle;
753 U64 fileSize;
daniellerozenblitb2ad17a2023-03-28 19:44:53 -0400754 void** bufferPtr = &dict->dictBuffer;
Danielle Rozenblit610c8b92023-02-09 07:37:37 -0800755
756 assert(bufferPtr != NULL);
757 assert(dictFileStat != NULL);
758 *bufferPtr = NULL;
759 if (fileName == NULL) return 0;
760
761 DISPLAYLEVEL(4,"Loading %s as dictionary \n", fileName);
762
Danielle Rozenblit610c8b92023-02-09 07:37:37 -0800763 fileHandle = open(fileName, O_RDONLY);
764
765 if (fileHandle == -1) {
766 EXM_THROW(33, "Couldn't open dictionary %s: %s", fileName, strerror(errno));
767 }
768
769 fileSize = UTIL_getFileSizeStat(dictFileStat);
Danielle Rozenblit610c8b92023-02-09 07:37:37 -0800770 {
771 size_t const dictSizeMax = prefs->patchFromMode ? prefs->memLimit : DICTSIZE_MAX;
772 if (fileSize > dictSizeMax) {
773 EXM_THROW(34, "Dictionary file %s is too large (> %u bytes)",
774 fileName, (unsigned)dictSizeMax); /* avoid extreme cases */
775 }
776 }
777
daniellerozenblitb2ad17a2023-03-28 19:44:53 -0400778 *bufferPtr = mmap(NULL, (size_t)fileSize, PROT_READ, MAP_PRIVATE, fileHandle, 0);
779 if (*bufferPtr==NULL) EXM_THROW(34, "%s", strerror(errno));
Danielle Rozenblit610c8b92023-02-09 07:37:37 -0800780
781 close(fileHandle);
782 return (size_t)fileSize;
783}
daniellerozenblitb2ad17a2023-03-28 19:44:53 -0400784#elif defined(_MSC_VER) || defined(_WIN32)
785#include <windows.h>
786static void FIO_munmap(FIO_Dict_t* dict)
Danielle Rozenblit4373c5a2023-02-13 07:26:22 -0800787{
daniellerozenblitb2ad17a2023-03-28 19:44:53 -0400788 UnmapViewOfFile(dict->dictBuffer);
789 CloseHandle(dict->dictHandle);
790 dict->dictBuffer = NULL;
791 dict->dictBufferSize = 0;
Danielle Rozenblit4373c5a2023-02-13 07:26:22 -0800792}
daniellerozenblitb2ad17a2023-03-28 19:44:53 -0400793static size_t FIO_setDictBufferMMap(FIO_Dict_t* dict, const char* fileName, FIO_prefs_t* const prefs, stat_t* dictFileStat)
794{
795 HANDLE fileHandle, mapping;
796 U64 fileSize;
797 void** bufferPtr = &dict->dictBuffer;
798
799 assert(bufferPtr != NULL);
800 assert(dictFileStat != NULL);
801 *bufferPtr = NULL;
802 if (fileName == NULL) return 0;
803
804 DISPLAYLEVEL(4,"Loading %s as dictionary \n", fileName);
805
806 fileHandle = CreateFileA(fileName, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_READONLY, NULL);
807
808 if (fileHandle == INVALID_HANDLE_VALUE) {
809 EXM_THROW(33, "Couldn't open dictionary %s: %s", fileName, strerror(errno));
810 }
811
812 fileSize = UTIL_getFileSizeStat(dictFileStat);
813 {
814 size_t const dictSizeMax = prefs->patchFromMode ? prefs->memLimit : DICTSIZE_MAX;
815 if (fileSize > dictSizeMax) {
816 EXM_THROW(34, "Dictionary file %s is too large (> %u bytes)",
817 fileName, (unsigned)dictSizeMax); /* avoid extreme cases */
818 }
819 }
820
821 mapping = CreateFileMapping(fileHandle, NULL, PAGE_READONLY, 0, 0, NULL);
822 if (mapping == NULL) {
823 EXM_THROW(35, "Couldn't map dictionary %s: %s", fileName, strerror(errno));
824 }
825
826 *bufferPtr = MapViewOfFile(mapping, FILE_MAP_READ, 0, 0, (DWORD)fileSize); /* we can only cast to DWORD here because dictSize <= 2GB */
827 if (*bufferPtr==NULL) EXM_THROW(36, "%s", strerror(errno));
828
829 dict->dictHandle = fileHandle;
830 return (size_t)fileSize;
831}
832#else
833static size_t FIO_setDictBufferMMap(FIO_Dict_t* dict, const char* fileName, FIO_prefs_t* const prefs, stat_t* dictFileStat)
834{
835 return FIO_setDictBufferMalloc(dict, fileName, prefs, dictFileStat);
836}
837static void FIO_munmap(FIO_Dict_t* dict) {
838 free(dict->dictBuffer);
839 dict->dictBuffer = NULL;
840 dict->dictBufferSize = 0;
Danielle Rozenblit4373c5a2023-02-13 07:26:22 -0800841}
842#endif
Danielle Rozenblit610c8b92023-02-09 07:37:37 -0800843
daniellerozenblitb2ad17a2023-03-28 19:44:53 -0400844static void FIO_freeDict(FIO_Dict_t* dict) {
Danielle Rozenblit96e55c12023-03-08 08:06:10 -0800845 if (dict->dictBufferType == FIO_mallocDict) {
846 free(dict->dictBuffer);
daniellerozenblitb2ad17a2023-03-28 19:44:53 -0400847 dict->dictBuffer = NULL;
848 dict->dictBufferSize = 0;
Danielle Rozenblit96e55c12023-03-08 08:06:10 -0800849 } else if (dict->dictBufferType == FIO_mmapDict) {
daniellerozenblitb2ad17a2023-03-28 19:44:53 -0400850 FIO_munmap(dict);
851 } else {
852 assert(0); /* Should not reach this case */
853 }
854}
855
856static void FIO_initDict(FIO_Dict_t* dict, const char* fileName, FIO_prefs_t* const prefs, stat_t* dictFileStat, FIO_dictBufferType_t dictBufferType) {
857 dict->dictBufferType = dictBufferType;
858 if (dict->dictBufferType == FIO_mallocDict) {
859 dict->dictBufferSize = FIO_setDictBufferMalloc(dict, fileName, prefs, dictFileStat);
860 } else if (dict->dictBufferType == FIO_mmapDict) {
861 dict->dictBufferSize = FIO_setDictBufferMMap(dict, fileName, prefs, dictFileStat);
Danielle Rozenblit70850eb2023-03-08 16:54:57 -0800862 } else {
863 assert(0); /* Should not reach this case */
Danielle Rozenblit96e55c12023-03-08 08:06:10 -0800864 }
865}
Sen Huang64bc4412019-10-03 13:53:04 -0400866
867
868/* FIO_checkFilenameCollisions() :
869 * Checks for and warns if there are any files that would have the same output path
870 */
871int FIO_checkFilenameCollisions(const char** filenameTable, unsigned nbFiles) {
Xin Xie9a8ccd42020-06-19 19:35:51 -0700872 const char **filenameTableSorted, *prevElem, *filename;
Sen Huang64bc4412019-10-03 13:53:04 -0400873 unsigned u;
874
Sen Huang64bc4412019-10-03 13:53:04 -0400875 filenameTableSorted = (const char**) malloc(sizeof(char*) * nbFiles);
876 if (!filenameTableSorted) {
Yann Collet8c85b292023-01-23 18:55:51 -0800877 DISPLAYLEVEL(1, "Allocation error during filename collision checking \n");
Sen Huang64bc4412019-10-03 13:53:04 -0400878 return 1;
879 }
Yann Collet17951332019-10-17 15:32:03 -0700880
Sen Huang64bc4412019-10-03 13:53:04 -0400881 for (u = 0; u < nbFiles; ++u) {
Xin Xie9a8ccd42020-06-19 19:35:51 -0700882 filename = strrchr(filenameTable[u], PATH_SEP);
Sen Huang64bc4412019-10-03 13:53:04 -0400883 if (filename == NULL) {
884 filenameTableSorted[u] = filenameTable[u];
885 } else {
886 filenameTableSorted[u] = filename+1;
887 }
888 }
889
Sen Huang6e406b52019-10-08 09:54:59 -0400890 qsort((void*)filenameTableSorted, nbFiles, sizeof(char*), UTIL_compareStr);
Sen Huang64bc4412019-10-03 13:53:04 -0400891 prevElem = filenameTableSorted[0];
892 for (u = 1; u < nbFiles; ++u) {
893 if (strcmp(prevElem, filenameTableSorted[u]) == 0) {
Yann Collet8c85b292023-01-23 18:55:51 -0800894 DISPLAYLEVEL(2, "WARNING: Two files have same filename: %s\n", prevElem);
Sen Huang64bc4412019-10-03 13:53:04 -0400895 }
896 prevElem = filenameTableSorted[u];
897 }
898
Sen Huang6e406b52019-10-08 09:54:59 -0400899 free((void*)filenameTableSorted);
Sen Huang64bc4412019-10-03 13:53:04 -0400900 return 0;
901}
902
Yann Colletad86a5d2019-10-18 11:15:10 -0700903static const char*
904extractFilename(const char* path, char separator)
905{
906 const char* search = strrchr(path, separator);
907 if (search == NULL) return path;
908 return search+1;
909}
910
Sen Huang6b81bfb2019-10-03 15:23:49 -0400911/* FIO_createFilename_fromOutDir() :
Sen Huang64bc4412019-10-03 13:53:04 -0400912 * Takes a source file name and specified output directory, and
913 * allocates memory for and returns a pointer to final path.
914 * This function never returns an error (it may abort() in case of pb)
915 */
916static char*
Yann Colletad86a5d2019-10-18 11:15:10 -0700917FIO_createFilename_fromOutDir(const char* path, const char* outDirName, const size_t suffixLen)
Sen Huang64bc4412019-10-03 13:53:04 -0400918{
Yann Colletad86a5d2019-10-18 11:15:10 -0700919 const char* filenameStart;
920 char separator;
921 char* result;
Sen Huang64bc4412019-10-03 13:53:04 -0400922
Yann Colletad86a5d2019-10-18 11:15:10 -0700923#if defined(_MSC_VER) || defined(__MINGW32__) || defined (__MSVCRT__) /* windows support */
924 separator = '\\';
925#else
926 separator = '/';
927#endif
Sen Huang64bc4412019-10-03 13:53:04 -0400928
Yann Colletad86a5d2019-10-18 11:15:10 -0700929 filenameStart = extractFilename(path, separator);
930#if defined(_MSC_VER) || defined(__MINGW32__) || defined (__MSVCRT__) /* windows support */
931 filenameStart = extractFilename(filenameStart, '/'); /* sometimes, '/' separator is also used on Windows (mingw+msys2) */
932#endif
Sen Huang64bc4412019-10-03 13:53:04 -0400933
Yann Colletad86a5d2019-10-18 11:15:10 -0700934 result = (char*) calloc(1, strlen(outDirName) + 1 + strlen(filenameStart) + suffixLen + 1);
Sen Huang64bc4412019-10-03 13:53:04 -0400935 if (!result) {
Yann Colletad86a5d2019-10-18 11:15:10 -0700936 EXM_THROW(30, "zstd: FIO_createFilename_fromOutDir: %s", strerror(errno));
Sen Huang64bc4412019-10-03 13:53:04 -0400937 }
938
Yann Colletad86a5d2019-10-18 11:15:10 -0700939 memcpy(result, outDirName, strlen(outDirName));
940 if (outDirName[strlen(outDirName)-1] == separator) {
941 memcpy(result + strlen(outDirName), filenameStart, strlen(filenameStart));
Yann Collet17951332019-10-17 15:32:03 -0700942 } else {
Yann Colletad86a5d2019-10-18 11:15:10 -0700943 memcpy(result + strlen(outDirName), &separator, 1);
944 memcpy(result + strlen(outDirName) + 1, filenameStart, strlen(filenameStart));
Sen Huang64bc4412019-10-03 13:53:04 -0400945 }
946
Sen Huang64bc4412019-10-03 13:53:04 -0400947 return result;
948}
949
Bimba Shresthaf25a6e92020-01-10 14:25:24 -0800950/* FIO_highbit64() :
951 * gives position of highest bit.
952 * note : only works for v > 0 !
953 */
954static unsigned FIO_highbit64(unsigned long long v)
955{
956 unsigned count = 0;
957 assert(v != 0);
958 v >>= 1;
959 while (v) { v >>= 1; count++; }
960 return count;
961}
962
Bimba Shrestha5b0a4522020-04-17 15:58:53 -0500963static void FIO_adjustMemLimitForPatchFromMode(FIO_prefs_t* const prefs,
Bimba Shrestha659ff852020-04-21 21:12:50 -0500964 unsigned long long const dictSize,
Bimba Shrestha5b0a4522020-04-17 15:58:53 -0500965 unsigned long long const maxSrcFileSize)
966{
967 unsigned long long maxSize = MAX(prefs->memLimit, MAX(dictSize, maxSrcFileSize));
Bimba Shresthaf8479092020-05-26 09:23:26 -0700968 unsigned const maxWindowSize = (1U << ZSTD_WINDOWLOG_MAX);
Bimba Shresthab0671082020-06-18 09:31:06 -0700969 if (maxSize == UTIL_FILESIZE_UNKNOWN)
970 EXM_THROW(42, "Using --patch-from with stdin requires --stream-size");
Bimba Shrestha5b0a4522020-04-17 15:58:53 -0500971 assert(maxSize != UTIL_FILESIZE_UNKNOWN);
Bimba Shresthaf8479092020-05-26 09:23:26 -0700972 if (maxSize > maxWindowSize)
973 EXM_THROW(42, "Can't handle files larger than %u GB\n", maxWindowSize/(1 GB));
Bimba Shrestha5b0a4522020-04-17 15:58:53 -0500974 FIO_setMemLimit(prefs, (unsigned)maxSize);
975}
Bimba Shresthaf25a6e92020-01-10 14:25:24 -0800976
Yann Collet8c85b292023-01-23 18:55:51 -0800977/* FIO_multiFilesConcatWarning() :
978 * This function handles logic when processing multiple files with -o or -c, displaying the appropriate warnings/prompts.
senhuang427991c552020-08-26 16:50:20 -0400979 * Returns 1 if the console should abort, 0 if console should proceed.
Yann Collet0d793a62021-01-06 01:35:52 -0800980 *
Yann Collet8c85b292023-01-23 18:55:51 -0800981 * If output is stdout or test mode is active, check that `--rm` disabled.
982 *
983 * If there is just 1 file to process, zstd will proceed as usual.
984 * If each file get processed into its own separate destination file, proceed as usual.
985 *
986 * When multiple files are processed into a single output,
987 * display a warning message, then disable --rm if it's set.
988 *
989 * If -f is specified or if output is stdout, just proceed.
990 * If output is set with -o, prompt for confirmation.
senhuang427991c552020-08-26 16:50:20 -0400991 */
Yann Collet8c85b292023-01-23 18:55:51 -0800992static int FIO_multiFilesConcatWarning(const FIO_ctx_t* fCtx, FIO_prefs_t* prefs, const char* outFileName, int displayLevelCutoff)
senhuang427991c552020-08-26 16:50:20 -0400993{
Yann Collet02434e02023-01-25 16:18:20 -0800994 if (fCtx->hasStdoutOutput) {
995 if (prefs->removeSrcFile)
996 /* this should not happen ; hard fail, to protect user's data
997 * note: this should rather be an assert(), but we want to be certain that user's data will not be wiped out in case it nonetheless happen */
998 EXM_THROW(43, "It's not allowed to remove input files when processed output is piped to stdout. "
999 "This scenario is not supposed to be possible. "
1000 "This is a programming error. File an issue for it to be fixed.");
1001 }
Yann Collet8c85b292023-01-23 18:55:51 -08001002 if (prefs->testMode) {
Yann Collet02434e02023-01-25 16:18:20 -08001003 if (prefs->removeSrcFile)
1004 /* this should not happen ; hard fail, to protect user's data
1005 * note: this should rather be an assert(), but we want to be certain that user's data will not be wiped out in case it nonetheless happen */
1006 EXM_THROW(43, "Test mode shall not remove input files! "
1007 "This scenario is not supposed to be possible. "
1008 "This is a programming error. File an issue for it to be fixed.");
Yann Collet8c85b292023-01-23 18:55:51 -08001009 return 0;
senhuang427991c552020-08-26 16:50:20 -04001010 }
Yann Collet8c85b292023-01-23 18:55:51 -08001011
1012 if (fCtx->nbFilesTotal == 1) return 0;
1013 assert(fCtx->nbFilesTotal > 1);
1014
1015 if (!outFileName) return 0;
1016
1017 if (fCtx->hasStdoutOutput) {
1018 DISPLAYLEVEL(2, "zstd: WARNING: all input files will be processed and concatenated into stdout. \n");
1019 } else {
1020 DISPLAYLEVEL(2, "zstd: WARNING: all input files will be processed and concatenated into a single output file: %s \n", outFileName);
1021 }
1022 DISPLAYLEVEL(2, "The concatenated output CANNOT regenerate original file names nor directory structure. \n")
1023
1024 /* multi-input into single output : --rm is not allowed */
1025 if (prefs->removeSrcFile) {
1026 DISPLAYLEVEL(2, "Since it's a destructive operation, input files will not be removed. \n");
1027 prefs->removeSrcFile = 0;
1028 }
1029
1030 if (fCtx->hasStdoutOutput) return 0;
1031 if (prefs->overwrite) return 0;
1032
1033 /* multiple files concatenated into single destination file using -o without -f */
1034 if (g_display_prefs.displayLevel <= displayLevelCutoff) {
1035 /* quiet mode => no prompt => fail automatically */
1036 DISPLAYLEVEL(1, "Concatenating multiple processed inputs into a single output loses file metadata. \n");
1037 DISPLAYLEVEL(1, "Aborting. \n");
1038 return 1;
1039 }
1040 /* normal mode => prompt */
1041 return UTIL_requireUserConfirmation("Proceed? (y/n): ", "Aborting...", "yY", fCtx->hasStdinInput);
senhuang427991c552020-08-26 16:50:20 -04001042}
1043
Yann Colletea684c32023-01-18 15:38:36 -08001044static ZSTD_inBuffer setInBuffer(const void* buf, size_t s, size_t pos)
1045{
1046 ZSTD_inBuffer i;
1047 i.src = buf;
1048 i.size = s;
1049 i.pos = pos;
1050 return i;
1051}
1052
1053static ZSTD_outBuffer setOutBuffer(void* buf, size_t s, size_t pos)
1054{
1055 ZSTD_outBuffer o;
1056 o.dst = buf;
1057 o.size = s;
1058 o.pos = pos;
1059 return o;
1060}
1061
inikep3c7c3522016-04-22 13:59:05 +02001062#ifndef ZSTD_NOCOMPRESS
Yann Collet4f137032015-12-17 02:23:58 +01001063
Yann Collet01082a32018-03-22 17:49:46 -07001064/* **********************************************************************
1065 * Compression
1066 ************************************************************************/
Yann Collet4f137032015-12-17 02:23:58 +01001067typedef struct {
Danielle Rozenblit96e55c12023-03-08 08:06:10 -08001068 FIO_Dict_t dict;
shakeelrao12909332019-03-23 21:53:13 -07001069 const char* dictFileName;
W. Felix Handte03820762023-01-17 14:50:31 -08001070 stat_t dictFileStat;
Yann Collet6263ba52016-08-13 23:45:45 +02001071 ZSTD_CStream* cctx;
Yonatan Komornikcc0657f2022-01-31 15:43:41 -08001072 WritePoolCtx_t *writeCtx;
1073 ReadPoolCtx_t *readCtx;
Yann Collet4f137032015-12-17 02:23:58 +01001074} cRess_t;
1075
Yann Collet0d793a62021-01-06 01:35:52 -08001076/** ZSTD_cycleLog() :
1077 * condition for correct operation : hashLog > 1 */
1078static U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat)
1079{
1080 U32 const btScale = ((U32)strat >= (U32)ZSTD_btlazy2);
1081 assert(hashLog > 1);
1082 return hashLog - btScale;
1083}
1084
Bimba Shrestha659ff852020-04-21 21:12:50 -05001085static void FIO_adjustParamsForPatchFromMode(FIO_prefs_t* const prefs,
Bimba Shrestha5b0a4522020-04-17 15:58:53 -05001086 ZSTD_compressionParameters* comprParams,
Bimba Shrestha659ff852020-04-21 21:12:50 -05001087 unsigned long long const dictSize,
Bimba Shrestha5b0a4522020-04-17 15:58:53 -05001088 unsigned long long const maxSrcFileSize,
1089 int cLevel)
1090{
1091 unsigned const fileWindowLog = FIO_highbit64(maxSrcFileSize) + 1;
1092 ZSTD_compressionParameters const cParams = ZSTD_getCParams(cLevel, (size_t)maxSrcFileSize, (size_t)dictSize);
1093 FIO_adjustMemLimitForPatchFromMode(prefs, dictSize, maxSrcFileSize);
1094 if (fileWindowLog > ZSTD_WINDOWLOG_MAX)
1095 DISPLAYLEVEL(1, "Max window log exceeded by file (compression ratio will suffer)\n");
Olivier Perretd4548c92021-05-12 22:11:15 +02001096 comprParams->windowLog = MAX(ZSTD_WINDOWLOG_MIN, MIN(ZSTD_WINDOWLOG_MAX, fileWindowLog));
senhuang42a39614d2020-10-13 13:00:27 -04001097 if (fileWindowLog > ZSTD_cycleLog(cParams.chainLog, cParams.strategy)) {
Bimba Shrestha5b0a4522020-04-17 15:58:53 -05001098 if (!prefs->ldmFlag)
sergeyandreenko1f87c882024-02-06 12:07:11 -08001099 DISPLAYLEVEL(2, "long mode automatically triggered\n");
Bimba Shrestha5b0a4522020-04-17 15:58:53 -05001100 FIO_setLdmFlag(prefs, 1);
1101 }
1102 if (cParams.strategy >= ZSTD_btopt) {
sergeyandreenko1f87c882024-02-06 12:07:11 -08001103 DISPLAYLEVEL(3, "[Optimal parser notes] Consider the following to improve patch size at the cost of speed:\n");
1104 DISPLAYLEVEL(3, "- Use --single-thread mode in the zstd cli\n");
1105 DISPLAYLEVEL(3, "- Set a larger targetLength (e.g. --zstd=targetLength=4096)\n");
1106 DISPLAYLEVEL(3, "- Set a larger chainLog (e.g. --zstd=chainLog=%u)\n", ZSTD_CHAINLOG_MAX);
1107 DISPLAYLEVEL(3, "Also consider playing around with searchLog and hashLog\n");
Bimba Shrestha5b0a4522020-04-17 15:58:53 -05001108 }
1109}
1110
Karl Ostmo5e220bf2019-01-22 17:31:13 -08001111static cRess_t FIO_createCResources(FIO_prefs_t* const prefs,
Bimba Shrestha5b0a4522020-04-17 15:58:53 -05001112 const char* dictFileName, unsigned long long const maxSrcFileSize,
Bimba Shresthaf25a6e92020-01-10 14:25:24 -08001113 int cLevel, ZSTD_compressionParameters comprParams) {
Danielle Rozenblit96e55c12023-03-08 08:06:10 -08001114 int useMMap = prefs->mmapDict == ZSTD_ps_enable;
1115 int forceNoUseMMap = prefs->mmapDict == ZSTD_ps_disable;
daniellerozenblitb2ad17a2023-03-28 19:44:53 -04001116 FIO_dictBufferType_t dictBufferType;
Yann Collet4f137032015-12-17 02:23:58 +01001117 cRess_t ress;
Yann Collet5e80dd32016-07-13 17:38:39 +02001118 memset(&ress, 0, sizeof(ress));
Yann Collet4f137032015-12-17 02:23:58 +01001119
Yann Collet6a9b41b2018-03-11 19:56:48 -07001120 DISPLAYLEVEL(6, "FIO_createCResources \n");
Yann Collet6d4fef32017-05-17 18:36:15 -07001121 ress.cctx = ZSTD_createCCtx();
1122 if (ress.cctx == NULL)
Yann Collet0f2d4432018-12-19 17:25:58 -08001123 EXM_THROW(30, "allocation error (%s): can't create ZSTD_CCtx",
1124 strerror(errno));
Bimba Shrestha5b0a4522020-04-17 15:58:53 -05001125
Danielle Rozenblit8a189b12023-02-13 15:23:06 -08001126 FIO_getDictFileStat(dictFileName, &ress.dictFileStat);
1127
Bimba Shrestha5b0a4522020-04-17 15:58:53 -05001128 /* need to update memLimit before calling createDictBuffer
1129 * because of memLimit check inside it */
Bimba Shrestha66533212020-06-18 09:28:18 -07001130 if (prefs->patchFromMode) {
Danielle Rozenblit8a189b12023-02-13 15:23:06 -08001131 U64 const dictSize = UTIL_getFileSizeStat(&ress.dictFileStat);
Bimba Shrestha66533212020-06-18 09:28:18 -07001132 unsigned long long const ssSize = (unsigned long long)prefs->streamSrcSize;
Danielle Rozenblit96e55c12023-03-08 08:06:10 -08001133 useMMap |= dictSize > prefs->memLimit;
Danielle Rozenblit610c8b92023-02-09 07:37:37 -08001134 FIO_adjustParamsForPatchFromMode(prefs, &comprParams, dictSize, ssSize > 0 ? ssSize : maxSrcFileSize, cLevel);
Bimba Shrestha66533212020-06-18 09:28:18 -07001135 }
Danielle Rozenblit610c8b92023-02-09 07:37:37 -08001136
daniellerozenblitb2ad17a2023-03-28 19:44:53 -04001137 dictBufferType = (useMMap && !forceNoUseMMap) ? FIO_mmapDict : FIO_mallocDict;
1138 FIO_initDict(&ress.dict, dictFileName, prefs, &ress.dictFileStat, dictBufferType); /* works with dictFileName==NULL */
Yonatan Komornikcc0657f2022-01-31 15:43:41 -08001139
1140 ress.writeCtx = AIO_WritePool_create(prefs, ZSTD_CStreamOutSize());
1141 ress.readCtx = AIO_ReadPool_create(prefs, ZSTD_CStreamInSize());
Yann Collet4f137032015-12-17 02:23:58 +01001142
Yann Collet4b6a94f2018-02-01 17:07:27 -08001143 /* Advanced parameters, including dictionary */
Danielle Rozenblit96e55c12023-03-08 08:06:10 -08001144 if (dictFileName && (ress.dict.dictBuffer==NULL))
Bimba Shrestha5b0a4522020-04-17 15:58:53 -05001145 EXM_THROW(32, "allocation error : can't create dictBuffer");
1146 ress.dictFileName = dictFileName;
Yann Collet9e6a2ea2017-06-11 18:39:46 -07001147
Bimba Shrestha5b0a4522020-04-17 15:58:53 -05001148 if (prefs->adaptiveMode && !prefs->ldmFlag && !comprParams.windowLog)
1149 comprParams.windowLog = ADAPT_WINDOWLOG_DEFAULT;
Yann Collet0853f862018-08-13 13:10:42 -07001150
Bimba Shrestha5b0a4522020-04-17 15:58:53 -05001151 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_contentSizeFlag, prefs->contentSize) ); /* always enable content size when available (note: supposed to be default) */
1152 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_dictIDFlag, prefs->dictIDFlag) );
1153 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_checksumFlag, prefs->checksumFlag) );
1154 /* compression level */
1155 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_compressionLevel, cLevel) );
1156 /* max compressed block size */
1157 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_targetCBlockSize, (int)prefs->targetCBlockSize) );
1158 /* source size hint */
1159 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_srcSizeHint, (int)prefs->srcSizeHint) );
1160 /* long distance matching */
1161 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_enableLongDistanceMatching, prefs->ldmFlag) );
1162 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_ldmHashLog, prefs->ldmHashLog) );
1163 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_ldmMinMatch, prefs->ldmMinMatch) );
1164 if (prefs->ldmBucketSizeLog != FIO_LDM_PARAM_NOTSET) {
1165 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_ldmBucketSizeLog, prefs->ldmBucketSizeLog) );
Yann Collet43eeea42016-09-15 15:38:44 +02001166 }
Bimba Shrestha5b0a4522020-04-17 15:58:53 -05001167 if (prefs->ldmHashRateLog != FIO_LDM_PARAM_NOTSET) {
1168 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_ldmHashRateLog, prefs->ldmHashRateLog) );
1169 }
Nick Terrell46944232020-11-02 17:52:29 -08001170 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_useRowMatchFinder, prefs->useRowMatchFinder));
Bimba Shrestha5b0a4522020-04-17 15:58:53 -05001171 /* compression parameters */
1172 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_windowLog, (int)comprParams.windowLog) );
1173 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_chainLog, (int)comprParams.chainLog) );
1174 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_hashLog, (int)comprParams.hashLog) );
1175 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_searchLog, (int)comprParams.searchLog) );
1176 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_minMatch, (int)comprParams.minMatch) );
1177 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_targetLength, (int)comprParams.targetLength) );
Yann Collet0d793a62021-01-06 01:35:52 -08001178 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_strategy, (int)comprParams.strategy) );
Bimba Shrestha5b0a4522020-04-17 15:58:53 -05001179 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_literalCompressionMode, (int)prefs->literalCompressionMode) );
W. Felix Handted4630602020-08-17 12:37:58 -04001180 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_enableDedicatedDictSearch, 1) );
Bimba Shrestha5b0a4522020-04-17 15:58:53 -05001181 /* multi-threading */
1182#ifdef ZSTD_MULTITHREAD
1183 DISPLAYLEVEL(5,"set nb workers = %u \n", prefs->nbWorkers);
1184 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_nbWorkers, prefs->nbWorkers) );
1185 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_jobSize, prefs->blockSize) );
1186 if (prefs->overlapLog != FIO_OVERLAP_LOG_NOTSET) {
1187 DISPLAYLEVEL(3,"set overlapLog = %u \n", prefs->overlapLog);
1188 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_overlapLog, prefs->overlapLog) );
1189 }
1190 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_rsyncable, prefs->rsyncable) );
1191#endif
1192 /* dictionary */
1193 if (prefs->patchFromMode) {
Danielle Rozenblit96e55c12023-03-08 08:06:10 -08001194 CHECK( ZSTD_CCtx_refPrefix(ress.cctx, ress.dict.dictBuffer, ress.dict.dictBufferSize) );
Bimba Shrestha5b0a4522020-04-17 15:58:53 -05001195 } else {
Danielle Rozenblit96e55c12023-03-08 08:06:10 -08001196 CHECK( ZSTD_CCtx_loadDictionary_byReference(ress.cctx, ress.dict.dictBuffer, ress.dict.dictBufferSize) );
Bimba Shrestha5b0a4522020-04-17 15:58:53 -05001197 }
Bimba Shrestha659ff852020-04-21 21:12:50 -05001198
Yann Collet4f137032015-12-17 02:23:58 +01001199 return ress;
1200}
1201
daniellerozenblitb2ad17a2023-03-28 19:44:53 -04001202static void FIO_freeCResources(cRess_t* const ress)
Yann Collet4f137032015-12-17 02:23:58 +01001203{
Danielle Rozenblit96e55c12023-03-08 08:06:10 -08001204 FIO_freeDict(&(ress->dict));
Yonatan Komornikcc0657f2022-01-31 15:43:41 -08001205 AIO_WritePool_free(ress->writeCtx);
1206 AIO_ReadPool_free(ress->readCtx);
senhuang42043b9342020-10-14 20:19:46 -04001207 ZSTD_freeCStream(ress->cctx); /* never fails */
Yann Collet4f137032015-12-17 02:23:58 +01001208}
1209
1210
Przemyslaw Skibinski02018c82017-02-08 16:54:23 +01001211#ifdef ZSTD_GZCOMPRESS
Yann Collet89bc3092018-09-19 14:49:13 -07001212static unsigned long long
Yann Collet632e0772019-10-21 12:14:59 -07001213FIO_compressGzFrame(const cRess_t* ress, /* buffers & handlers are used, but not changed */
Yann Collete12ae022017-05-16 17:32:33 -07001214 const char* srcFileName, U64 const srcFileSize,
1215 int compressionLevel, U64* readsize)
Przemyslaw Skibinski02018c82017-02-08 16:54:23 +01001216{
1217 unsigned long long inFileSize = 0, outFileSize = 0;
1218 z_stream strm;
Yonatan Komornikcc0657f2022-01-31 15:43:41 -08001219 IOJob_t *writeJob = NULL;
Przemyslaw Skibinski02018c82017-02-08 16:54:23 +01001220
Yann Collete12ae022017-05-16 17:32:33 -07001221 if (compressionLevel > Z_BEST_COMPRESSION)
1222 compressionLevel = Z_BEST_COMPRESSION;
Przemyslaw Skibinski64f72212017-02-13 21:00:41 +01001223
Przemyslaw Skibinski02018c82017-02-08 16:54:23 +01001224 strm.zalloc = Z_NULL;
1225 strm.zfree = Z_NULL;
1226 strm.opaque = Z_NULL;
Przemyslaw Skibinskicb563062017-02-08 17:37:14 +01001227
Yann Collet632e0772019-10-21 12:14:59 -07001228 { int const ret = deflateInit2(&strm, compressionLevel, Z_DEFLATED,
Yann Collete12ae022017-05-16 17:32:33 -07001229 15 /* maxWindowLogSize */ + 16 /* gzip only */,
Danielle Rozenblit4dffc352022-12-14 06:58:35 -08001230 8, Z_DEFAULT_STRATEGY); /* see https://www.zlib.net/manual.html */
Yann Collet632e0772019-10-21 12:14:59 -07001231 if (ret != Z_OK) {
1232 EXM_THROW(71, "zstd: %s: deflateInit2 error %d \n", srcFileName, ret);
1233 } }
Przemyslaw Skibinskicb563062017-02-08 17:37:14 +01001234
Yonatan Komornikcc0657f2022-01-31 15:43:41 -08001235 writeJob = AIO_WritePool_acquireJob(ress->writeCtx);
Przemyslaw Skibinski02018c82017-02-08 16:54:23 +01001236 strm.next_in = 0;
Przemyslaw Skibinski862698f2017-02-27 13:21:05 +01001237 strm.avail_in = 0;
Yonatan Komornikcc0657f2022-01-31 15:43:41 -08001238 strm.next_out = (Bytef*)writeJob->buffer;
1239 strm.avail_out = (uInt)writeJob->bufferSize;
Przemyslaw Skibinski02018c82017-02-08 16:54:23 +01001240
1241 while (1) {
Yann Collet632e0772019-10-21 12:14:59 -07001242 int ret;
Przemyslaw Skibinski02018c82017-02-08 16:54:23 +01001243 if (strm.avail_in == 0) {
Yonatan Komornikcc0657f2022-01-31 15:43:41 -08001244 AIO_ReadPool_fillBuffer(ress->readCtx, ZSTD_CStreamInSize());
1245 if (ress->readCtx->srcBufferLoaded == 0) break;
1246 inFileSize += ress->readCtx->srcBufferLoaded;
1247 strm.next_in = (z_const unsigned char*)ress->readCtx->srcBuffer;
1248 strm.avail_in = (uInt)ress->readCtx->srcBufferLoaded;
Przemyslaw Skibinski02018c82017-02-08 16:54:23 +01001249 }
Yonatan Komornikcc0657f2022-01-31 15:43:41 -08001250
1251 {
1252 size_t const availBefore = strm.avail_in;
1253 ret = deflate(&strm, Z_NO_FLUSH);
1254 AIO_ReadPool_consumeBytes(ress->readCtx, availBefore - strm.avail_in);
1255 }
1256
Yann Collete12ae022017-05-16 17:32:33 -07001257 if (ret != Z_OK)
1258 EXM_THROW(72, "zstd: %s: deflate error %d \n", srcFileName, ret);
Yonatan Komornikcc0657f2022-01-31 15:43:41 -08001259 { size_t const cSize = writeJob->bufferSize - strm.avail_out;
Yann Collet632e0772019-10-21 12:14:59 -07001260 if (cSize) {
Yonatan Komornikcc0657f2022-01-31 15:43:41 -08001261 writeJob->usedBufferSize = cSize;
1262 AIO_WritePool_enqueueAndReacquireWriteJob(&writeJob);
Yann Collet632e0772019-10-21 12:14:59 -07001263 outFileSize += cSize;
Yonatan Komornikcc0657f2022-01-31 15:43:41 -08001264 strm.next_out = (Bytef*)writeJob->buffer;
1265 strm.avail_out = (uInt)writeJob->bufferSize;
1266 } }
Yann Collet632e0772019-10-21 12:14:59 -07001267 if (srcFileSize == UTIL_FILESIZE_UNKNOWN) {
Nick Terrellfbff7822022-01-07 15:07:28 -08001268 DISPLAYUPDATE_PROGRESS(
1269 "\rRead : %u MB ==> %.2f%% ",
1270 (unsigned)(inFileSize>>20),
Nick Terrell40a71882022-12-21 16:09:25 -08001271 (double)outFileSize/(double)inFileSize*100)
Yann Collet632e0772019-10-21 12:14:59 -07001272 } else {
Nick Terrellfbff7822022-01-07 15:07:28 -08001273 DISPLAYUPDATE_PROGRESS(
1274 "\rRead : %u / %u MB ==> %.2f%% ",
1275 (unsigned)(inFileSize>>20), (unsigned)(srcFileSize>>20),
Nick Terrell40a71882022-12-21 16:09:25 -08001276 (double)outFileSize/(double)inFileSize*100);
Nick Terrellfbff7822022-01-07 15:07:28 -08001277 } }
Przemyslaw Skibinski02018c82017-02-08 16:54:23 +01001278
1279 while (1) {
Yann Collet632e0772019-10-21 12:14:59 -07001280 int const ret = deflate(&strm, Z_FINISH);
Yonatan Komornikcc0657f2022-01-31 15:43:41 -08001281 { size_t const cSize = writeJob->bufferSize - strm.avail_out;
Yann Collet632e0772019-10-21 12:14:59 -07001282 if (cSize) {
Yonatan Komornikcc0657f2022-01-31 15:43:41 -08001283 writeJob->usedBufferSize = cSize;
1284 AIO_WritePool_enqueueAndReacquireWriteJob(&writeJob);
Yann Collet632e0772019-10-21 12:14:59 -07001285 outFileSize += cSize;
Yonatan Komornikcc0657f2022-01-31 15:43:41 -08001286 strm.next_out = (Bytef*)writeJob->buffer;
1287 strm.avail_out = (uInt)writeJob->bufferSize;
1288 } }
Przemyslaw Skibinski02018c82017-02-08 16:54:23 +01001289 if (ret == Z_STREAM_END) break;
Yann Collete12ae022017-05-16 17:32:33 -07001290 if (ret != Z_BUF_ERROR)
1291 EXM_THROW(77, "zstd: %s: deflate error %d \n", srcFileName, ret);
Przemyslaw Skibinski02018c82017-02-08 16:54:23 +01001292 }
1293
Yann Collet632e0772019-10-21 12:14:59 -07001294 { int const ret = deflateEnd(&strm);
1295 if (ret != Z_OK) {
1296 EXM_THROW(79, "zstd: %s: deflateEnd error %d \n", srcFileName, ret);
1297 } }
Przemyslaw Skibinski02018c82017-02-08 16:54:23 +01001298 *readsize = inFileSize;
Yonatan Komornikcc0657f2022-01-31 15:43:41 -08001299 AIO_WritePool_releaseIoJob(writeJob);
1300 AIO_WritePool_sparseWriteEnd(ress->writeCtx);
Przemyslaw Skibinski02018c82017-02-08 16:54:23 +01001301 return outFileSize;
1302}
1303#endif
1304
1305
Nick Terrellaa8bcf32017-03-13 18:11:07 -07001306#ifdef ZSTD_LZMACOMPRESS
Yann Collet89bc3092018-09-19 14:49:13 -07001307static unsigned long long
1308FIO_compressLzmaFrame(cRess_t* ress,
1309 const char* srcFileName, U64 const srcFileSize,
1310 int compressionLevel, U64* readsize, int plain_lzma)
Nick Terrellaa8bcf32017-03-13 18:11:07 -07001311{
1312 unsigned long long inFileSize = 0, outFileSize = 0;
1313 lzma_stream strm = LZMA_STREAM_INIT;
1314 lzma_action action = LZMA_RUN;
1315 lzma_ret ret;
Yonatan Komornikcc0657f2022-01-31 15:43:41 -08001316 IOJob_t *writeJob = NULL;
Nick Terrellaa8bcf32017-03-13 18:11:07 -07001317
1318 if (compressionLevel < 0) compressionLevel = 0;
1319 if (compressionLevel > 9) compressionLevel = 9;
1320
1321 if (plain_lzma) {
1322 lzma_options_lzma opt_lzma;
Yann Collete12ae022017-05-16 17:32:33 -07001323 if (lzma_lzma_preset(&opt_lzma, compressionLevel))
Yann Colletcaf40d02019-10-17 16:58:49 -07001324 EXM_THROW(81, "zstd: %s: lzma_lzma_preset error", srcFileName);
Nick Terrellaa8bcf32017-03-13 18:11:07 -07001325 ret = lzma_alone_encoder(&strm, &opt_lzma); /* LZMA */
Yann Collete12ae022017-05-16 17:32:33 -07001326 if (ret != LZMA_OK)
Yann Colletcaf40d02019-10-17 16:58:49 -07001327 EXM_THROW(82, "zstd: %s: lzma_alone_encoder error %d", srcFileName, ret);
Nick Terrellaa8bcf32017-03-13 18:11:07 -07001328 } else {
1329 ret = lzma_easy_encoder(&strm, compressionLevel, LZMA_CHECK_CRC64); /* XZ */
Yann Collete12ae022017-05-16 17:32:33 -07001330 if (ret != LZMA_OK)
Yann Colletcaf40d02019-10-17 16:58:49 -07001331 EXM_THROW(83, "zstd: %s: lzma_easy_encoder error %d", srcFileName, ret);
Nick Terrellaa8bcf32017-03-13 18:11:07 -07001332 }
1333
Yonatan Komornikcc0657f2022-01-31 15:43:41 -08001334 writeJob =AIO_WritePool_acquireJob(ress->writeCtx);
Alex Xu886de7b2023-02-14 00:30:56 +00001335 strm.next_out = (BYTE*)writeJob->buffer;
1336 strm.avail_out = writeJob->bufferSize;
Nick Terrellaa8bcf32017-03-13 18:11:07 -07001337 strm.next_in = 0;
1338 strm.avail_in = 0;
Nick Terrellaa8bcf32017-03-13 18:11:07 -07001339
1340 while (1) {
1341 if (strm.avail_in == 0) {
Yonatan Komornikcc0657f2022-01-31 15:43:41 -08001342 size_t const inSize = AIO_ReadPool_fillBuffer(ress->readCtx, ZSTD_CStreamInSize());
1343 if (ress->readCtx->srcBufferLoaded == 0) action = LZMA_FINISH;
Nick Terrellaa8bcf32017-03-13 18:11:07 -07001344 inFileSize += inSize;
Yonatan Komornikcc0657f2022-01-31 15:43:41 -08001345 strm.next_in = (BYTE const*)ress->readCtx->srcBuffer;
1346 strm.avail_in = ress->readCtx->srcBufferLoaded;
Nick Terrellaa8bcf32017-03-13 18:11:07 -07001347 }
1348
Yonatan Komornikcc0657f2022-01-31 15:43:41 -08001349 {
1350 size_t const availBefore = strm.avail_in;
1351 ret = lzma_code(&strm, action);
1352 AIO_ReadPool_consumeBytes(ress->readCtx, availBefore - strm.avail_in);
1353 }
1354
Nick Terrellaa8bcf32017-03-13 18:11:07 -07001355
Yann Collete12ae022017-05-16 17:32:33 -07001356 if (ret != LZMA_OK && ret != LZMA_STREAM_END)
Yann Colletcaf40d02019-10-17 16:58:49 -07001357 EXM_THROW(84, "zstd: %s: lzma_code encoding error %d", srcFileName, ret);
Yonatan Komornikcc0657f2022-01-31 15:43:41 -08001358 { size_t const compBytes = writeJob->bufferSize - strm.avail_out;
Nick Terrellaa8bcf32017-03-13 18:11:07 -07001359 if (compBytes) {
Yonatan Komornikcc0657f2022-01-31 15:43:41 -08001360 writeJob->usedBufferSize = compBytes;
1361 AIO_WritePool_enqueueAndReacquireWriteJob(&writeJob);
Nick Terrellaa8bcf32017-03-13 18:11:07 -07001362 outFileSize += compBytes;
Alex Xu886de7b2023-02-14 00:30:56 +00001363 strm.next_out = (BYTE*)writeJob->buffer;
Yonatan Komornikcc0657f2022-01-31 15:43:41 -08001364 strm.avail_out = writeJob->bufferSize;
Yann Collete12ae022017-05-16 17:32:33 -07001365 } }
Yann Collet300e1df2017-10-18 11:41:52 -07001366 if (srcFileSize == UTIL_FILESIZE_UNKNOWN)
Nick Terrellfbff7822022-01-07 15:07:28 -08001367 DISPLAYUPDATE_PROGRESS("\rRead : %u MB ==> %.2f%%",
Yann Colletededcfc2018-12-21 16:19:44 -08001368 (unsigned)(inFileSize>>20),
Nick Terrell40a71882022-12-21 16:09:25 -08001369 (double)outFileSize/(double)inFileSize*100)
Yann Collete12ae022017-05-16 17:32:33 -07001370 else
Nick Terrellfbff7822022-01-07 15:07:28 -08001371 DISPLAYUPDATE_PROGRESS("\rRead : %u / %u MB ==> %.2f%%",
Yann Colletededcfc2018-12-21 16:19:44 -08001372 (unsigned)(inFileSize>>20), (unsigned)(srcFileSize>>20),
Nick Terrell40a71882022-12-21 16:09:25 -08001373 (double)outFileSize/(double)inFileSize*100);
Nick Terrellaa8bcf32017-03-13 18:11:07 -07001374 if (ret == LZMA_STREAM_END) break;
1375 }
1376
1377 lzma_end(&strm);
1378 *readsize = inFileSize;
1379
Yonatan Komornikcc0657f2022-01-31 15:43:41 -08001380 AIO_WritePool_releaseIoJob(writeJob);
1381 AIO_WritePool_sparseWriteEnd(ress->writeCtx);
1382
Nick Terrellaa8bcf32017-03-13 18:11:07 -07001383 return outFileSize;
1384}
1385#endif
1386
Sean Purcell4de86322017-04-24 16:48:25 -07001387#ifdef ZSTD_LZ4COMPRESS
Yann Collet07e04782019-01-25 14:42:44 -08001388
W. Felix Handtebaff9dd2017-10-17 01:19:29 -04001389#if LZ4_VERSION_NUMBER <= 10600
1390#define LZ4F_blockLinked blockLinked
1391#define LZ4F_max64KB max64KB
1392#endif
Yann Collet07e04782019-01-25 14:42:44 -08001393
Sean Purcell4de86322017-04-24 16:48:25 -07001394static int FIO_LZ4_GetBlockSize_FromBlockId (int id) { return (1 << (8 + (2 * id))); }
Yann Collet07e04782019-01-25 14:42:44 -08001395
Yann Collet89bc3092018-09-19 14:49:13 -07001396static unsigned long long
1397FIO_compressLz4Frame(cRess_t* ress,
1398 const char* srcFileName, U64 const srcFileSize,
Yann Collet07e04782019-01-25 14:42:44 -08001399 int compressionLevel, int checksumFlag,
1400 U64* readsize)
Sean Purcell4de86322017-04-24 16:48:25 -07001401{
W. Felix Handtebaff9dd2017-10-17 01:19:29 -04001402 const size_t blockSize = FIO_LZ4_GetBlockSize_FromBlockId(LZ4F_max64KB);
Sean Purcell4de86322017-04-24 16:48:25 -07001403 unsigned long long inFileSize = 0, outFileSize = 0;
1404
1405 LZ4F_preferences_t prefs;
Sean Purcell2c4b6fe2017-04-25 11:00:54 -07001406 LZ4F_compressionContext_t ctx;
Sean Purcell4de86322017-04-24 16:48:25 -07001407
Yonatan Komornikcc0657f2022-01-31 15:43:41 -08001408 IOJob_t* writeJob = AIO_WritePool_acquireJob(ress->writeCtx);
1409
Sean Purcell4de86322017-04-24 16:48:25 -07001410 LZ4F_errorCode_t const errorCode = LZ4F_createCompressionContext(&ctx, LZ4F_VERSION);
Yann Collete12ae022017-05-16 17:32:33 -07001411 if (LZ4F_isError(errorCode))
1412 EXM_THROW(31, "zstd: failed to create lz4 compression context");
Sean Purcell4de86322017-04-24 16:48:25 -07001413
1414 memset(&prefs, 0, sizeof(prefs));
1415
Yonatan Komornikcc0657f2022-01-31 15:43:41 -08001416 assert(blockSize <= ress->readCtx->base.jobBufferSize);
Sean Purcelleab41c12017-04-26 10:17:38 -07001417
Yonatan Komornikcc0657f2022-01-31 15:43:41 -08001418 /* autoflush off to mitigate a bug in lz4<=1.9.3 for compression level 12 */
1419 prefs.autoFlush = 0;
Sean Purcell4de86322017-04-24 16:48:25 -07001420 prefs.compressionLevel = compressionLevel;
W. Felix Handtebaff9dd2017-10-17 01:19:29 -04001421 prefs.frameInfo.blockMode = LZ4F_blockLinked;
1422 prefs.frameInfo.blockSizeID = LZ4F_max64KB;
Yann Collet07e04782019-01-25 14:42:44 -08001423 prefs.frameInfo.contentChecksumFlag = (contentChecksum_t)checksumFlag;
Sean Purcell2c4b6fe2017-04-25 11:00:54 -07001424#if LZ4_VERSION_NUMBER >= 10600
Yann Collet18b79532017-10-17 16:14:25 -07001425 prefs.frameInfo.contentSize = (srcFileSize==UTIL_FILESIZE_UNKNOWN) ? 0 : srcFileSize;
Sean Purcell2c4b6fe2017-04-25 11:00:54 -07001426#endif
Yonatan Komornikcc0657f2022-01-31 15:43:41 -08001427 assert(LZ4F_compressBound(blockSize, &prefs) <= writeJob->bufferSize);
Sean Purcell4de86322017-04-24 16:48:25 -07001428
W. Felix Handtebaff9dd2017-10-17 01:19:29 -04001429 {
Yonatan Komornikcc0657f2022-01-31 15:43:41 -08001430 size_t headerSize = LZ4F_compressBegin(ctx, writeJob->buffer, writeJob->bufferSize, &prefs);
Yann Collete12ae022017-05-16 17:32:33 -07001431 if (LZ4F_isError(headerSize))
1432 EXM_THROW(33, "File header generation failed : %s",
1433 LZ4F_getErrorName(headerSize));
Yonatan Komornikcc0657f2022-01-31 15:43:41 -08001434 writeJob->usedBufferSize = headerSize;
1435 AIO_WritePool_enqueueAndReacquireWriteJob(&writeJob);
Sean Purcell4de86322017-04-24 16:48:25 -07001436 outFileSize += headerSize;
1437
1438 /* Read first block */
Yonatan Komornikcc0657f2022-01-31 15:43:41 -08001439 inFileSize += AIO_ReadPool_fillBuffer(ress->readCtx, blockSize);
Sean Purcell4de86322017-04-24 16:48:25 -07001440
1441 /* Main Loop */
Yonatan Komornikcc0657f2022-01-31 15:43:41 -08001442 while (ress->readCtx->srcBufferLoaded) {
1443 size_t inSize = MIN(blockSize, ress->readCtx->srcBufferLoaded);
1444 size_t const outSize = LZ4F_compressUpdate(ctx, writeJob->buffer, writeJob->bufferSize,
1445 ress->readCtx->srcBuffer, inSize, NULL);
Yann Collete12ae022017-05-16 17:32:33 -07001446 if (LZ4F_isError(outSize))
1447 EXM_THROW(35, "zstd: %s: lz4 compression failed : %s",
1448 srcFileName, LZ4F_getErrorName(outSize));
Sean Purcell4de86322017-04-24 16:48:25 -07001449 outFileSize += outSize;
Yann Collet173ef9d2018-12-19 18:30:57 -08001450 if (srcFileSize == UTIL_FILESIZE_UNKNOWN) {
Nick Terrellfbff7822022-01-07 15:07:28 -08001451 DISPLAYUPDATE_PROGRESS("\rRead : %u MB ==> %.2f%%",
Yann Colletededcfc2018-12-21 16:19:44 -08001452 (unsigned)(inFileSize>>20),
Nick Terrell40a71882022-12-21 16:09:25 -08001453 (double)outFileSize/(double)inFileSize*100)
Yann Collet173ef9d2018-12-19 18:30:57 -08001454 } else {
Nick Terrellfbff7822022-01-07 15:07:28 -08001455 DISPLAYUPDATE_PROGRESS("\rRead : %u / %u MB ==> %.2f%%",
Yann Colletededcfc2018-12-21 16:19:44 -08001456 (unsigned)(inFileSize>>20), (unsigned)(srcFileSize>>20),
Nick Terrell40a71882022-12-21 16:09:25 -08001457 (double)outFileSize/(double)inFileSize*100);
Yann Collet173ef9d2018-12-19 18:30:57 -08001458 }
Sean Purcell4de86322017-04-24 16:48:25 -07001459
1460 /* Write Block */
Yonatan Komornikcc0657f2022-01-31 15:43:41 -08001461 writeJob->usedBufferSize = outSize;
1462 AIO_WritePool_enqueueAndReacquireWriteJob(&writeJob);
Sean Purcell4de86322017-04-24 16:48:25 -07001463
1464 /* Read next block */
Yonatan Komornikcc0657f2022-01-31 15:43:41 -08001465 AIO_ReadPool_consumeBytes(ress->readCtx, inSize);
1466 inFileSize += AIO_ReadPool_fillBuffer(ress->readCtx, blockSize);
Sean Purcell4de86322017-04-24 16:48:25 -07001467 }
Sean Purcell4de86322017-04-24 16:48:25 -07001468
1469 /* End of Stream mark */
Yonatan Komornikcc0657f2022-01-31 15:43:41 -08001470 headerSize = LZ4F_compressEnd(ctx, writeJob->buffer, writeJob->bufferSize, NULL);
Yann Collete12ae022017-05-16 17:32:33 -07001471 if (LZ4F_isError(headerSize))
1472 EXM_THROW(38, "zstd: %s: lz4 end of file generation failed : %s",
1473 srcFileName, LZ4F_getErrorName(headerSize));
Sean Purcell4de86322017-04-24 16:48:25 -07001474
Yonatan Komornikcc0657f2022-01-31 15:43:41 -08001475 writeJob->usedBufferSize = headerSize;
1476 AIO_WritePool_enqueueAndReacquireWriteJob(&writeJob);
Sean Purcell4de86322017-04-24 16:48:25 -07001477 outFileSize += headerSize;
1478 }
1479
1480 *readsize = inFileSize;
1481 LZ4F_freeCompressionContext(ctx);
Yonatan Komornikcc0657f2022-01-31 15:43:41 -08001482 AIO_WritePool_releaseIoJob(writeJob);
1483 AIO_WritePool_sparseWriteEnd(ress->writeCtx);
Sean Purcell4de86322017-04-24 16:48:25 -07001484
1485 return outFileSize;
1486}
1487#endif
1488
Yann Collet90eca312018-02-02 14:24:56 -08001489static unsigned long long
senhuang4251234962020-09-07 13:13:05 -04001490FIO_compressZstdFrame(FIO_ctx_t* const fCtx,
1491 FIO_prefs_t* const prefs,
Karl Ostmo5e220bf2019-01-22 17:31:13 -08001492 const cRess_t* ressPtr,
Yann Collet90eca312018-02-02 14:24:56 -08001493 const char* srcFileName, U64 fileSize,
1494 int compressionLevel, U64* readsize)
Yann Collet4f137032015-12-17 02:23:58 +01001495{
Yann Collet90eca312018-02-02 14:24:56 -08001496 cRess_t const ress = *ressPtr;
Yonatan Komornikcc0657f2022-01-31 15:43:41 -08001497 IOJob_t *writeJob = AIO_WritePool_acquireJob(ressPtr->writeCtx);
1498
Yann Collet4f137032015-12-17 02:23:58 +01001499 U64 compressedfilesize = 0;
Nick Terrellf48d34e2017-12-14 13:00:20 -08001500 ZSTD_EndDirective directive = ZSTD_e_continue;
Kevin Svetlitskib3888192021-11-01 13:31:03 -07001501 U64 pledgedSrcSize = ZSTD_CONTENTSIZE_UNKNOWN;
Yann Collet9d26cb62018-08-09 17:44:30 -07001502
Yann Collete7a49c62018-08-11 20:48:06 -07001503 /* stats */
Yann Colletca02ebe2018-09-19 15:09:45 -07001504 ZSTD_frameProgression previous_zfp_update = { 0, 0, 0, 0, 0, 0 };
1505 ZSTD_frameProgression previous_zfp_correction = { 0, 0, 0, 0, 0, 0 };
Yann Collet9d26cb62018-08-09 17:44:30 -07001506 typedef enum { noChange, slower, faster } speedChange_e;
1507 speedChange_e speedChange = noChange;
Yann Collet105677c2018-08-17 18:11:54 -07001508 unsigned flushWaiting = 0;
Yann Collete7a49c62018-08-11 20:48:06 -07001509 unsigned inputPresented = 0;
Yann Collet2dd76032018-08-09 15:51:30 -07001510 unsigned inputBlocked = 0;
1511 unsigned lastJobID = 0;
Nick Terrell15f32ad2022-12-14 15:17:05 -08001512 UTIL_time_t lastAdaptTime = UTIL_getTime();
1513 U64 const adaptEveryMicro = REFRESH_RATE;
1514
W. Felix Handte87e94e32021-06-10 12:31:42 -04001515 UTIL_HumanReadableSize_t const file_hrs = UTIL_makeHumanReadableSize(fileSize);
Yann Collet2dd76032018-08-09 15:51:30 -07001516
Yann Collet90eca312018-02-02 14:24:56 -08001517 DISPLAYLEVEL(6, "compression using zstd format \n");
Przemyslaw Skibinski02018c82017-02-08 16:54:23 +01001518
Yann Collet4f137032015-12-17 02:23:58 +01001519 /* init */
Nick Terrell3841dba2018-06-14 16:24:18 -07001520 if (fileSize != UTIL_FILESIZE_UNKNOWN) {
Kevin Svetlitskib3888192021-11-01 13:31:03 -07001521 pledgedSrcSize = fileSize;
Nick Terrell3841dba2018-06-14 16:24:18 -07001522 CHECK(ZSTD_CCtx_setPledgedSrcSize(ress.cctx, fileSize));
Nick Magerkoc403b122019-08-19 09:01:31 -07001523 } else if (prefs->streamSrcSize > 0) {
Nick Magerko30bfa222019-08-19 11:20:28 -07001524 /* unknown source size; use the declared stream size */
Kevin Svetlitskib3888192021-11-01 13:31:03 -07001525 pledgedSrcSize = prefs->streamSrcSize;
Nick Magerkoc403b122019-08-19 09:01:31 -07001526 CHECK( ZSTD_CCtx_setPledgedSrcSize(ress.cctx, prefs->streamSrcSize) );
Nick Terrell3841dba2018-06-14 16:24:18 -07001527 }
W. Felix Handte87e94e32021-06-10 12:31:42 -04001528
Kevin Svetlitskib3888192021-11-01 13:31:03 -07001529 {
1530 int windowLog;
1531 UTIL_HumanReadableSize_t windowSize;
1532 CHECK(ZSTD_CCtx_getParameter(ress.cctx, ZSTD_c_windowLog, &windowLog));
1533 if (windowLog == 0) {
Jonathan McDowell470eb832022-02-01 03:20:30 -08001534 if (prefs->ldmFlag) {
1535 /* If long mode is set without a window size libzstd will set this size internally */
1536 windowLog = ZSTD_WINDOWLOG_LIMIT_DEFAULT;
1537 } else {
1538 const ZSTD_compressionParameters cParams = ZSTD_getCParams(compressionLevel, fileSize, 0);
Yann Collet3dfcafa2022-08-03 21:39:35 +02001539 windowLog = (int)cParams.windowLog;
Jonathan McDowell470eb832022-02-01 03:20:30 -08001540 }
Kevin Svetlitskib3888192021-11-01 13:31:03 -07001541 }
1542 windowSize = UTIL_makeHumanReadableSize(MAX(1ULL, MIN(1ULL << windowLog, pledgedSrcSize)));
1543 DISPLAYLEVEL(4, "Decompression will require %.*f%s of memory\n", windowSize.precision, windowSize.value, windowSize.suffix);
1544 }
Yann Colletca02ebe2018-09-19 15:09:45 -07001545 (void)srcFileName;
Yann Collet4f137032015-12-17 02:23:58 +01001546
1547 /* Main compression loop */
Nick Terrellf48d34e2017-12-14 13:00:20 -08001548 do {
Yann Collet2dd76032018-08-09 15:51:30 -07001549 size_t stillToFlush;
Yann Collet4f137032015-12-17 02:23:58 +01001550 /* Fill input Buffer */
Yonatan Komornikcc0657f2022-01-31 15:43:41 -08001551 size_t const inSize = AIO_ReadPool_fillBuffer(ress.readCtx, ZSTD_CStreamInSize());
Yann Colletea684c32023-01-18 15:38:36 -08001552 ZSTD_inBuffer inBuff = setInBuffer( ress.readCtx->srcBuffer, ress.readCtx->srcBufferLoaded, 0 );
Yann Colletededcfc2018-12-21 16:19:44 -08001553 DISPLAYLEVEL(6, "fread %u bytes from source \n", (unsigned)inSize);
Yann Collet90eca312018-02-02 14:24:56 -08001554 *readsize += inSize;
Yann Collet4f137032015-12-17 02:23:58 +01001555
Yonatan Komornikcc0657f2022-01-31 15:43:41 -08001556 if ((ress.readCtx->srcBufferLoaded == 0) || (*readsize == fileSize))
Nick Terrellf48d34e2017-12-14 13:00:20 -08001557 directive = ZSTD_e_end;
1558
Yann Collet2dd76032018-08-09 15:51:30 -07001559 stillToFlush = 1;
Yann Collet79a35ac2018-08-09 15:16:31 -07001560 while ((inBuff.pos != inBuff.size) /* input buffer must be entirely ingested */
Yann Collet2dd76032018-08-09 15:51:30 -07001561 || (directive == ZSTD_e_end && stillToFlush != 0) ) {
Yann Collet9d26cb62018-08-09 17:44:30 -07001562
Yann Collet2dd76032018-08-09 15:51:30 -07001563 size_t const oldIPos = inBuff.pos;
Yann Colletea684c32023-01-18 15:38:36 -08001564 ZSTD_outBuffer outBuff = setOutBuffer( writeJob->buffer, writeJob->bufferSize, 0 );
Yann Collet105677c2018-08-17 18:11:54 -07001565 size_t const toFlushNow = ZSTD_toFlushNow(ress.cctx);
Yann Colletd8e215c2018-11-30 11:16:26 -08001566 CHECK_V(stillToFlush, ZSTD_compressStream2(ress.cctx, &outBuff, &inBuff, directive));
Yonatan Komornikcc0657f2022-01-31 15:43:41 -08001567 AIO_ReadPool_consumeBytes(ress.readCtx, inBuff.pos - oldIPos);
Yann Collet2dd76032018-08-09 15:51:30 -07001568
1569 /* count stats */
Yann Collete7a49c62018-08-11 20:48:06 -07001570 inputPresented++;
Yann Collet89bc3092018-09-19 14:49:13 -07001571 if (oldIPos == inBuff.pos) inputBlocked++; /* input buffer is full and can't take any more : input speed is faster than consumption rate */
Yann Collet105677c2018-08-17 18:11:54 -07001572 if (!toFlushNow) flushWaiting = 1;
Yann Collet4f137032015-12-17 02:23:58 +01001573
Yann Collet6d4fef32017-05-17 18:36:15 -07001574 /* Write compressed stream */
Yann Collet2dd76032018-08-09 15:51:30 -07001575 DISPLAYLEVEL(6, "ZSTD_compress_generic(end:%u) => input pos(%u)<=(%u)size ; output generated %u bytes \n",
Yonatan Komornikcc0657f2022-01-31 15:43:41 -08001576 (unsigned)directive, (unsigned)inBuff.pos, (unsigned)inBuff.size, (unsigned)outBuff.pos);
Yann Collet6d4fef32017-05-17 18:36:15 -07001577 if (outBuff.pos) {
Yonatan Komornikcc0657f2022-01-31 15:43:41 -08001578 writeJob->usedBufferSize = outBuff.pos;
1579 AIO_WritePool_enqueueAndReacquireWriteJob(&writeJob);
Yann Collet6d4fef32017-05-17 18:36:15 -07001580 compressedfilesize += outBuff.pos;
Nick Terrellf48d34e2017-12-14 13:00:20 -08001581 }
Yann Collet9d26cb62018-08-09 17:44:30 -07001582
Nick Terrell15f32ad2022-12-14 15:17:05 -08001583 /* adaptive mode : statistics measurement and speed correction */
1584 if (prefs->adaptiveMode && UTIL_clockSpanMicro(lastAdaptTime) > adaptEveryMicro) {
1585 ZSTD_frameProgression const zfp = ZSTD_getFrameProgression(ress.cctx);
1586
1587 lastAdaptTime = UTIL_getTime();
1588
1589 /* check output speed */
1590 if (zfp.currentJobID > 1) { /* only possible if nbWorkers >= 1 */
1591
1592 unsigned long long newlyProduced = zfp.produced - previous_zfp_update.produced;
1593 unsigned long long newlyFlushed = zfp.flushed - previous_zfp_update.flushed;
1594 assert(zfp.produced >= previous_zfp_update.produced);
1595 assert(prefs->nbWorkers >= 1);
1596
1597 /* test if compression is blocked
1598 * either because output is slow and all buffers are full
1599 * or because input is slow and no job can start while waiting for at least one buffer to be filled.
1600 * note : exclude starting part, since currentJobID > 1 */
1601 if ( (zfp.consumed == previous_zfp_update.consumed) /* no data compressed : no data available, or no more buffer to compress to, OR compression is really slow (compression of a single block is slower than update rate)*/
1602 && (zfp.nbActiveWorkers == 0) /* confirmed : no compression ongoing */
1603 ) {
1604 DISPLAYLEVEL(6, "all buffers full : compression stopped => slow down \n")
1605 speedChange = slower;
1606 }
1607
1608 previous_zfp_update = zfp;
1609
1610 if ( (newlyProduced > (newlyFlushed * 9 / 8)) /* compression produces more data than output can flush (though production can be spiky, due to work unit : (N==4)*block sizes) */
1611 && (flushWaiting == 0) /* flush speed was never slowed by lack of production, so it's operating at max capacity */
1612 ) {
1613 DISPLAYLEVEL(6, "compression faster than flush (%llu > %llu), and flushed was never slowed down by lack of production => slow down \n", newlyProduced, newlyFlushed);
1614 speedChange = slower;
1615 }
1616 flushWaiting = 0;
1617 }
1618
1619 /* course correct only if there is at least one new job completed */
1620 if (zfp.currentJobID > lastJobID) {
1621 DISPLAYLEVEL(6, "compression level adaptation check \n")
1622
1623 /* check input speed */
1624 if (zfp.currentJobID > (unsigned)(prefs->nbWorkers+1)) { /* warm up period, to fill all workers */
1625 if (inputBlocked <= 0) {
1626 DISPLAYLEVEL(6, "input is never blocked => input is slower than ingestion \n");
1627 speedChange = slower;
1628 } else if (speedChange == noChange) {
1629 unsigned long long newlyIngested = zfp.ingested - previous_zfp_correction.ingested;
1630 unsigned long long newlyConsumed = zfp.consumed - previous_zfp_correction.consumed;
1631 unsigned long long newlyProduced = zfp.produced - previous_zfp_correction.produced;
1632 unsigned long long newlyFlushed = zfp.flushed - previous_zfp_correction.flushed;
1633 previous_zfp_correction = zfp;
1634 assert(inputPresented > 0);
1635 DISPLAYLEVEL(6, "input blocked %u/%u(%.2f) - ingested:%u vs %u:consumed - flushed:%u vs %u:produced \n",
1636 inputBlocked, inputPresented, (double)inputBlocked/inputPresented*100,
1637 (unsigned)newlyIngested, (unsigned)newlyConsumed,
1638 (unsigned)newlyFlushed, (unsigned)newlyProduced);
1639 if ( (inputBlocked > inputPresented / 8) /* input is waiting often, because input buffers is full : compression or output too slow */
1640 && (newlyFlushed * 33 / 32 > newlyProduced) /* flush everything that is produced */
1641 && (newlyIngested * 33 / 32 > newlyConsumed) /* input speed as fast or faster than compression speed */
1642 ) {
1643 DISPLAYLEVEL(6, "recommend faster as in(%llu) >= (%llu)comp(%llu) <= out(%llu) \n",
1644 newlyIngested, newlyConsumed, newlyProduced, newlyFlushed);
1645 speedChange = faster;
1646 }
1647 }
1648 inputBlocked = 0;
1649 inputPresented = 0;
1650 }
1651
1652 if (speedChange == slower) {
1653 DISPLAYLEVEL(6, "slower speed , higher compression \n")
1654 compressionLevel ++;
1655 if (compressionLevel > ZSTD_maxCLevel()) compressionLevel = ZSTD_maxCLevel();
1656 if (compressionLevel > prefs->maxAdaptLevel) compressionLevel = prefs->maxAdaptLevel;
1657 compressionLevel += (compressionLevel == 0); /* skip 0 */
1658 ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_compressionLevel, compressionLevel);
1659 }
1660 if (speedChange == faster) {
1661 DISPLAYLEVEL(6, "faster speed , lighter compression \n")
1662 compressionLevel --;
1663 if (compressionLevel < prefs->minAdaptLevel) compressionLevel = prefs->minAdaptLevel;
1664 compressionLevel -= (compressionLevel == 0); /* skip 0 */
1665 ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_compressionLevel, compressionLevel);
1666 }
1667 speedChange = noChange;
1668
1669 lastJobID = zfp.currentJobID;
1670 } /* if (zfp.currentJobID > lastJobID) */
1671 } /* if (prefs->adaptiveMode && UTIL_clockSpanMicro(lastAdaptTime) > adaptEveryMicro) */
1672
1673 /* display notification */
1674 if (SHOULD_DISPLAY_PROGRESS() && READY_FOR_UPDATE()) {
Yann Collet70f81d62018-01-19 10:01:40 -08001675 ZSTD_frameProgression const zfp = ZSTD_getFrameProgression(ress.cctx);
Yann Collet0d793a62021-01-06 01:35:52 -08001676 double const cShare = (double)zfp.produced / (double)(zfp.consumed + !zfp.consumed/*avoid div0*/) * 100;
W. Felix Handte87e94e32021-06-10 12:31:42 -04001677 UTIL_HumanReadableSize_t const buffered_hrs = UTIL_makeHumanReadableSize(zfp.ingested - zfp.consumed);
1678 UTIL_HumanReadableSize_t const consumed_hrs = UTIL_makeHumanReadableSize(zfp.consumed);
1679 UTIL_HumanReadableSize_t const produced_hrs = UTIL_makeHumanReadableSize(zfp.produced);
Yann Collet2dd76032018-08-09 15:51:30 -07001680
Nick Terrell15f32ad2022-12-14 15:17:05 -08001681 DELAY_NEXT_UPDATE();
1682
Yann Colletca02ebe2018-09-19 15:09:45 -07001683 /* display progress notifications */
Nick Terrellfbff7822022-01-07 15:07:28 -08001684 DISPLAY_PROGRESS("\r%79s\r", ""); /* Clear out the current displayed line */
Karl Ostmo5e220bf2019-01-22 17:31:13 -08001685 if (g_display_prefs.displayLevel >= 3) {
Nick Terrellfbff7822022-01-07 15:07:28 -08001686 /* Verbose progress update */
1687 DISPLAY_PROGRESS(
1688 "(L%i) Buffered:%5.*f%s - Consumed:%5.*f%s - Compressed:%5.*f%s => %.2f%% ",
1689 compressionLevel,
1690 buffered_hrs.precision, buffered_hrs.value, buffered_hrs.suffix,
1691 consumed_hrs.precision, consumed_hrs.value, consumed_hrs.suffix,
1692 produced_hrs.precision, produced_hrs.value, produced_hrs.suffix,
1693 cShare );
1694 } else {
sen6030cdf2021-05-06 14:50:28 -04001695 /* Require level 2 or forcibly displayed progress counter for summarized updates */
senhuang42a6414f12020-09-01 12:32:18 -04001696 if (fCtx->nbFilesTotal > 1) {
senhuang42cad6bf92020-09-15 13:01:46 -04001697 size_t srcFileNameSize = strlen(srcFileName);
1698 /* Ensure that the string we print is roughly the same size each time */
senhuang42ab0d3322020-09-15 15:53:32 -04001699 if (srcFileNameSize > 18) {
1700 const char* truncatedSrcFileName = srcFileName + srcFileNameSize - 15;
Nick Terrellfbff7822022-01-07 15:07:28 -08001701 DISPLAY_PROGRESS("Compress: %u/%u files. Current: ...%s ",
sen6030cdf2021-05-06 14:50:28 -04001702 fCtx->currFileIdx+1, fCtx->nbFilesTotal, truncatedSrcFileName);
senhuang42cad6bf92020-09-15 13:01:46 -04001703 } else {
Nick Terrellfbff7822022-01-07 15:07:28 -08001704 DISPLAY_PROGRESS("Compress: %u/%u files. Current: %*s ",
sen6030cdf2021-05-06 14:50:28 -04001705 fCtx->currFileIdx+1, fCtx->nbFilesTotal, (int)(18-srcFileNameSize), srcFileName);
senhuang42cad6bf92020-09-15 13:01:46 -04001706 }
senhuang42b6abbc32020-08-26 11:35:07 -04001707 }
Nick Terrellfbff7822022-01-07 15:07:28 -08001708 DISPLAY_PROGRESS("Read:%6.*f%4s ", consumed_hrs.precision, consumed_hrs.value, consumed_hrs.suffix);
Yann Colletca02ebe2018-09-19 15:09:45 -07001709 if (fileSize != UTIL_FILESIZE_UNKNOWN)
Nick Terrellfbff7822022-01-07 15:07:28 -08001710 DISPLAY_PROGRESS("/%6.*f%4s", file_hrs.precision, file_hrs.value, file_hrs.suffix);
1711 DISPLAY_PROGRESS(" ==> %2.f%%", cShare);
Yann Collet2dd76032018-08-09 15:51:30 -07001712 }
Nick Terrell15f32ad2022-12-14 15:17:05 -08001713 } /* if (SHOULD_DISPLAY_PROGRESS() && READY_FOR_UPDATE()) */
Yann Colletca02ebe2018-09-19 15:09:45 -07001714 } /* while ((inBuff.pos != inBuff.size) */
Nick Terrellf48d34e2017-12-14 13:00:20 -08001715 } while (directive != ZSTD_e_end);
Yann Collet4f137032015-12-17 02:23:58 +01001716
Nick Terrell4e706d72018-07-17 14:57:27 -07001717 if (fileSize != UTIL_FILESIZE_UNKNOWN && *readsize != fileSize) {
1718 EXM_THROW(27, "Read error : Incomplete read : %llu / %llu B",
1719 (unsigned long long)*readsize, (unsigned long long)fileSize);
1720 }
1721
Yonatan Komornikcc0657f2022-01-31 15:43:41 -08001722 AIO_WritePool_releaseIoJob(writeJob);
1723 AIO_WritePool_sparseWriteEnd(ressPtr->writeCtx);
1724
Yann Collet90eca312018-02-02 14:24:56 -08001725 return compressedfilesize;
1726}
1727
1728/*! FIO_compressFilename_internal() :
1729 * same as FIO_compressFilename_extRess(), with `ress.desFile` already opened.
1730 * @return : 0 : compression completed correctly,
1731 * 1 : missing or pb opening srcFileName
1732 */
1733static int
senhuang4251234962020-09-07 13:13:05 -04001734FIO_compressFilename_internal(FIO_ctx_t* const fCtx,
1735 FIO_prefs_t* const prefs,
Karl Ostmo5e220bf2019-01-22 17:31:13 -08001736 cRess_t ress,
Yann Collet90eca312018-02-02 14:24:56 -08001737 const char* dstFileName, const char* srcFileName,
1738 int compressionLevel)
1739{
Ephraim Parka38601f2019-06-04 09:25:16 -07001740 UTIL_time_t const timeStart = UTIL_getTime();
Ephraim Park5fe97422019-06-04 09:04:35 -07001741 clock_t const cpuStart = clock();
Yann Collet90eca312018-02-02 14:24:56 -08001742 U64 readsize = 0;
1743 U64 compressedfilesize = 0;
Bimba Shrestha9388dac2020-03-09 15:40:18 -05001744 U64 const fileSize = UTIL_getFileSize(srcFileName);
Yann Collet9fb4a422021-03-20 17:29:41 -07001745 DISPLAYLEVEL(5, "%s: %llu bytes \n", srcFileName, (unsigned long long)fileSize);
Yann Collet90eca312018-02-02 14:24:56 -08001746
1747 /* compression format selection */
Karl Ostmo5e220bf2019-01-22 17:31:13 -08001748 switch (prefs->compressionType) {
Yann Collet90eca312018-02-02 14:24:56 -08001749 default:
1750 case FIO_zstdCompression:
senhuang4251234962020-09-07 13:13:05 -04001751 compressedfilesize = FIO_compressZstdFrame(fCtx, prefs, &ress, srcFileName, fileSize, compressionLevel, &readsize);
Yann Collet90eca312018-02-02 14:24:56 -08001752 break;
1753
1754 case FIO_gzipCompression:
1755#ifdef ZSTD_GZCOMPRESS
1756 compressedfilesize = FIO_compressGzFrame(&ress, srcFileName, fileSize, compressionLevel, &readsize);
1757#else
1758 (void)compressionLevel;
1759 EXM_THROW(20, "zstd: %s: file cannot be compressed as gzip (zstd compiled without ZSTD_GZCOMPRESS) -- ignored \n",
1760 srcFileName);
1761#endif
1762 break;
1763
1764 case FIO_xzCompression:
1765 case FIO_lzmaCompression:
1766#ifdef ZSTD_LZMACOMPRESS
Karl Ostmo5e220bf2019-01-22 17:31:13 -08001767 compressedfilesize = FIO_compressLzmaFrame(&ress, srcFileName, fileSize, compressionLevel, &readsize, prefs->compressionType==FIO_lzmaCompression);
Yann Collet90eca312018-02-02 14:24:56 -08001768#else
1769 (void)compressionLevel;
1770 EXM_THROW(20, "zstd: %s: file cannot be compressed as xz/lzma (zstd compiled without ZSTD_LZMACOMPRESS) -- ignored \n",
1771 srcFileName);
1772#endif
1773 break;
1774
1775 case FIO_lz4Compression:
1776#ifdef ZSTD_LZ4COMPRESS
Yann Collet07e04782019-01-25 14:42:44 -08001777 compressedfilesize = FIO_compressLz4Frame(&ress, srcFileName, fileSize, compressionLevel, prefs->checksumFlag, &readsize);
Yann Collet90eca312018-02-02 14:24:56 -08001778#else
1779 (void)compressionLevel;
1780 EXM_THROW(20, "zstd: %s: file cannot be compressed as lz4 (zstd compiled without ZSTD_LZ4COMPRESS) -- ignored \n",
1781 srcFileName);
1782#endif
1783 break;
1784 }
1785
Yann Collet4f137032015-12-17 02:23:58 +01001786 /* Status */
senhuang4228a9dc72020-09-03 20:23:30 -04001787 fCtx->totalBytesInput += (size_t)readsize;
1788 fCtx->totalBytesOutput += (size_t)compressedfilesize;
Nick Terrellfbff7822022-01-07 15:07:28 -08001789 DISPLAY_PROGRESS("\r%79s\r", "");
1790 if (FIO_shouldDisplayFileSummary(fCtx)) {
W. Felix Handtebc46b6e2021-06-09 16:04:10 -04001791 UTIL_HumanReadableSize_t hr_isize = UTIL_makeHumanReadableSize((U64) readsize);
1792 UTIL_HumanReadableSize_t hr_osize = UTIL_makeHumanReadableSize((U64) compressedfilesize);
senhuang421ebe3602020-10-07 13:42:34 -04001793 if (readsize == 0) {
Nick Terrellfbff7822022-01-07 15:07:28 -08001794 DISPLAY_SUMMARY("%-20s : (%6.*f%s => %6.*f%s, %s) \n",
senhuang421ebe3602020-10-07 13:42:34 -04001795 srcFileName,
W. Felix Handtebc46b6e2021-06-09 16:04:10 -04001796 hr_isize.precision, hr_isize.value, hr_isize.suffix,
1797 hr_osize.precision, hr_osize.value, hr_osize.suffix,
senhuang421ebe3602020-10-07 13:42:34 -04001798 dstFileName);
1799 } else {
Nick Terrellfbff7822022-01-07 15:07:28 -08001800 DISPLAY_SUMMARY("%-20s :%6.2f%% (%6.*f%s => %6.*f%s, %s) \n",
senhuang421ebe3602020-10-07 13:42:34 -04001801 srcFileName,
Yann Collet0d793a62021-01-06 01:35:52 -08001802 (double)compressedfilesize / (double)readsize * 100,
W. Felix Handtebbb81c82021-06-09 13:05:44 -04001803 hr_isize.precision, hr_isize.value, hr_isize.suffix,
1804 hr_osize.precision, hr_osize.value, hr_osize.suffix,
senhuang421ebe3602020-10-07 13:42:34 -04001805 dstFileName);
senhuang42da388912020-08-25 16:46:47 -04001806 }
Bimba Shresthad0412f32020-04-03 12:10:02 -07001807 }
Yann Collet4f137032015-12-17 02:23:58 +01001808
Ephraim Park5fe97422019-06-04 09:04:35 -07001809 /* Elapsed Time and CPU Load */
1810 { clock_t const cpuEnd = clock();
1811 double const cpuLoad_s = (double)(cpuEnd - cpuStart) / CLOCKS_PER_SEC;
1812 U64 const timeLength_ns = UTIL_clockSpanNano(timeStart);
1813 double const timeLength_s = (double)timeLength_ns / 1000000000;
1814 double const cpuLoad_pct = (cpuLoad_s / timeLength_s) * 100;
Ephraim Parke498bb62019-06-04 09:42:18 -07001815 DISPLAYLEVEL(4, "%-20s : Completed in %.2f sec (cpu load : %.0f%%)\n",
Ephraim Park5fe97422019-06-04 09:04:35 -07001816 srcFileName, timeLength_s, cpuLoad_pct);
1817 }
Yann Collet4f137032015-12-17 02:23:58 +01001818 return 0;
1819}
1820
1821
Yann Collet9012b6c2018-10-01 17:16:34 -07001822/*! FIO_compressFilename_dstFile() :
Yonatan Komornikcc0657f2022-01-31 15:43:41 -08001823 * open dstFileName, or pass-through if ress.file != NULL,
Yann Collet9012b6c2018-10-01 17:16:34 -07001824 * then start compression with FIO_compressFilename_internal().
1825 * Manages source removal (--rm) and file permissions transfer.
1826 * note : ress.srcFile must be != NULL,
1827 * so reach this function through FIO_compressFilename_srcFile().
1828 * @return : 0 : compression completed correctly,
1829 * 1 : pb
1830 */
senhuang4251234962020-09-07 13:13:05 -04001831static int FIO_compressFilename_dstFile(FIO_ctx_t* const fCtx,
1832 FIO_prefs_t* const prefs,
Karl Ostmo5e220bf2019-01-22 17:31:13 -08001833 cRess_t ress,
Yann Collet9012b6c2018-10-01 17:16:34 -07001834 const char* dstFileName,
1835 const char* srcFileName,
W. Felix Handte5653f962023-01-17 14:05:15 -08001836 const stat_t* srcFileStat,
Yann Collet9012b6c2018-10-01 17:16:34 -07001837 int compressionLevel)
1838{
1839 int closeDstFile = 0;
1840 int result;
W. Felix Handte1e3eba62023-01-17 15:08:15 -08001841 int transferStat = 0;
W. Felix Handtef746c372023-02-06 08:05:47 -08001842 int dstFd = -1;
W. Felix Handte5653f962023-01-17 14:05:15 -08001843
Yonatan Komornikcc0657f2022-01-31 15:43:41 -08001844 assert(AIO_ReadPool_getFile(ress.readCtx) != NULL);
1845 if (AIO_WritePool_getFile(ress.writeCtx) == NULL) {
W. Felix Handte1e3eba62023-01-17 15:08:15 -08001846 int dstFileInitialPermissions = DEFAULT_FILE_PERMISSIONS;
W. Felix Handteb87f97b2021-03-08 17:39:14 -05001847 if ( strcmp (srcFileName, stdinmark)
Mike Gilbert57a86d92022-01-13 16:47:18 -05001848 && strcmp (dstFileName, stdoutmark)
W. Felix Handtea5ed28f2023-01-17 14:08:22 -08001849 && UTIL_isRegularFileStat(srcFileStat) ) {
W. Felix Handte1e3eba62023-01-17 15:08:15 -08001850 transferStat = 1;
1851 dstFileInitialPermissions = TEMPORARY_FILE_PERMISSIONS;
W. Felix Handteb87f97b2021-03-08 17:39:14 -05001852 }
1853
Yann Collet9012b6c2018-10-01 17:16:34 -07001854 closeDstFile = 1;
Yann Collet7aaac3f2019-11-25 10:35:36 -08001855 DISPLAYLEVEL(6, "FIO_compressFilename_dstFile: opening dst: %s \n", dstFileName);
Christoph Grüningerb921f1a2024-02-11 11:22:05 +01001856 { FILE *dstFile = FIO_openDstFile(fCtx, prefs, srcFileName, dstFileName, dstFileInitialPermissions);
1857 if (dstFile==NULL) return 1; /* could not open dstFileName */
1858 dstFd = fileno(dstFile);
1859 AIO_WritePool_setFile(ress.writeCtx, dstFile);
1860 }
Yann Collet9012b6c2018-10-01 17:16:34 -07001861 /* Must only be added after FIO_openDstFile() succeeds.
1862 * Otherwise we may delete the destination file if it already exists,
1863 * and the user presses Ctrl-C when asked if they wish to overwrite.
1864 */
1865 addHandler(dstFileName);
Yann Collet9012b6c2018-10-01 17:16:34 -07001866 }
1867
senhuang4251234962020-09-07 13:13:05 -04001868 result = FIO_compressFilename_internal(fCtx, prefs, ress, dstFileName, srcFileName, compressionLevel);
Yann Collet9012b6c2018-10-01 17:16:34 -07001869
1870 if (closeDstFile) {
Yann Collet9012b6c2018-10-01 17:16:34 -07001871 clearHandler();
1872
W. Felix Handtef746c372023-02-06 08:05:47 -08001873 if (transferStat) {
1874 UTIL_setFDStat(dstFd, dstFileName, srcFileStat);
1875 }
1876
Yann Collet7aaac3f2019-11-25 10:35:36 -08001877 DISPLAYLEVEL(6, "FIO_compressFilename_dstFile: closing dst: %s \n", dstFileName);
Yonatan Komornikcc0657f2022-01-31 15:43:41 -08001878 if (AIO_WritePool_closeFile(ress.writeCtx)) { /* error closing file */
Yann Collet9012b6c2018-10-01 17:16:34 -07001879 DISPLAYLEVEL(1, "zstd: %s: %s \n", dstFileName, strerror(errno));
1880 result=1;
1881 }
W. Felix Handtef746c372023-02-06 08:05:47 -08001882
W. Felix Handte1e3eba62023-01-17 15:08:15 -08001883 if (transferStat) {
W. Felix Handtef746c372023-02-06 08:05:47 -08001884 UTIL_utime(dstFileName, srcFileStat);
W. Felix Handte9cd6c1f2021-08-04 14:49:56 -04001885 }
W. Felix Handtef746c372023-02-06 08:05:47 -08001886
Yann Collet9012b6c2018-10-01 17:16:34 -07001887 if ( (result != 0) /* operation failure */
Yann Collet9012b6c2018-10-01 17:16:34 -07001888 && strcmp(dstFileName, stdoutmark) /* special case : don't remove() stdout */
1889 ) {
W. Felix Handteb02cdf62020-08-10 15:39:14 -04001890 FIO_removeFile(dstFileName); /* remove compression artefact; note don't do anything special if remove() fails */
Yann Collet9012b6c2018-10-01 17:16:34 -07001891 }
1892 }
Ephraim Park5fe97422019-06-04 09:04:35 -07001893
Yann Collet9012b6c2018-10-01 17:16:34 -07001894 return result;
1895}
1896
Shashank Tavildar9ab6a742019-10-29 12:27:54 -07001897/* List used to compare file extensions (used with --exclude-compressed flag)
1898* Different from the suffixList and should only apply to ZSTD compress operationResult
1899*/
Shashank Tavildar0f2bff22019-10-28 18:21:47 -07001900static const char *compressedFileExtensions[] = {
1901 ZSTD_EXTENSION,
1902 TZSTD_EXTENSION,
1903 GZ_EXTENSION,
1904 TGZ_EXTENSION,
1905 LZMA_EXTENSION,
1906 XZ_EXTENSION,
1907 TXZ_EXTENSION,
1908 LZ4_EXTENSION,
1909 TLZ4_EXTENSION,
daniellerozenblit5a66afa2024-03-12 13:49:06 -04001910 ".7z",
1911 ".aa3",
1912 ".aac",
1913 ".aar",
1914 ".ace",
1915 ".alac",
1916 ".ape",
1917 ".apk",
1918 ".apng",
1919 ".arc",
1920 ".archive",
1921 ".arj",
1922 ".ark",
1923 ".asf",
1924 ".avi",
1925 ".avif",
1926 ".ba",
1927 ".br",
1928 ".bz2",
1929 ".cab",
1930 ".cdx",
1931 ".chm",
1932 ".cr2",
1933 ".divx",
1934 ".dmg",
1935 ".dng",
1936 ".docm",
1937 ".docx",
1938 ".dotm",
1939 ".dotx",
1940 ".dsft",
1941 ".ear",
1942 ".eftx",
1943 ".emz",
1944 ".eot",
1945 ".epub",
1946 ".f4v",
1947 ".flac",
1948 ".flv",
1949 ".gho",
1950 ".gif",
1951 ".gifv",
1952 ".gnp",
1953 ".iso",
1954 ".jar",
1955 ".jpeg",
1956 ".jpg",
1957 ".jxl",
1958 ".lz",
1959 ".lzh",
1960 ".m4a",
1961 ".m4v",
1962 ".mkv",
1963 ".mov",
1964 ".mp2",
1965 ".mp3",
1966 ".mp4",
1967 ".mpa",
1968 ".mpc",
1969 ".mpe",
1970 ".mpeg",
1971 ".mpg",
1972 ".mpl",
1973 ".mpv",
1974 ".msi",
1975 ".odp",
1976 ".ods",
1977 ".odt",
1978 ".ogg",
1979 ".ogv",
1980 ".otp",
1981 ".ots",
1982 ".ott",
1983 ".pea",
1984 ".png",
1985 ".pptx",
1986 ".qt",
1987 ".rar",
1988 ".s7z",
1989 ".sfx",
1990 ".sit",
1991 ".sitx",
1992 ".sqx",
1993 ".svgz",
1994 ".swf",
1995 ".tbz2",
1996 ".tib",
1997 ".tlz",
1998 ".vob",
1999 ".war",
2000 ".webm",
2001 ".webp",
2002 ".wma",
2003 ".wmv",
2004 ".woff",
2005 ".woff2",
2006 ".wvl",
2007 ".xlsx",
2008 ".xpi",
2009 ".xps",
2010 ".zip",
2011 ".zipx",
2012 ".zoo",
2013 ".zpaq",
Shashank Tavildar0f2bff22019-10-28 18:21:47 -07002014 NULL
2015};
Yann Collet9012b6c2018-10-01 17:16:34 -07002016
Yann Colletb71adf42016-07-02 01:05:31 +02002017/*! FIO_compressFilename_srcFile() :
Yann Collet459a6b72016-02-15 20:37:23 +01002018 * @return : 0 : compression completed correctly,
2019 * 1 : missing or pb opening srcFileName
2020 */
Yann Collet9012b6c2018-10-01 17:16:34 -07002021static int
senhuang4251234962020-09-07 13:13:05 -04002022FIO_compressFilename_srcFile(FIO_ctx_t* const fCtx,
2023 FIO_prefs_t* const prefs,
Karl Ostmo5e220bf2019-01-22 17:31:13 -08002024 cRess_t ress,
Yann Collet9012b6c2018-10-01 17:16:34 -07002025 const char* dstFileName,
2026 const char* srcFileName,
2027 int compressionLevel)
Yann Collet459a6b72016-02-15 20:37:23 +01002028{
2029 int result;
Yonatan Komornikcc0657f2022-01-31 15:43:41 -08002030 FILE* srcFile;
W. Felix Handte2ad68552023-01-17 14:01:06 -08002031 stat_t srcFileStat;
Yonatan Komornik79bdb8c2023-02-02 15:19:22 -08002032 U64 fileSize = UTIL_FILESIZE_UNKNOWN;
Yann Collet7aaac3f2019-11-25 10:35:36 -08002033 DISPLAYLEVEL(6, "FIO_compressFilename_srcFile: %s \n", srcFileName);
Yann Collet459a6b72016-02-15 20:37:23 +01002034
W. Felix Handte03820762023-01-17 14:50:31 -08002035 if (strcmp(srcFileName, stdinmark)) {
2036 if (UTIL_stat(srcFileName, &srcFileStat)) {
2037 /* failure to stat at all is handled during opening */
Przemyslaw Skibinski64fa2db2017-01-25 13:02:33 +01002038
W. Felix Handte03820762023-01-17 14:50:31 -08002039 /* ensure src is not a directory */
2040 if (UTIL_isDirectoryStat(&srcFileStat)) {
2041 DISPLAYLEVEL(1, "zstd: %s is a directory -- ignored \n", srcFileName);
2042 return 1;
2043 }
2044
2045 /* ensure src is not the same as dict (if present) */
2046 if (ress.dictFileName != NULL && UTIL_isSameFileStat(srcFileName, ress.dictFileName, &srcFileStat, &ress.dictFileStat)) {
2047 DISPLAYLEVEL(1, "zstd: cannot use %s as an input file and dictionary \n", srcFileName);
2048 return 1;
2049 }
2050 }
shakeelrao12909332019-03-23 21:53:13 -07002051 }
2052
Shashank Tavildar0f2bff22019-10-28 18:21:47 -07002053 /* Check if "srcFile" is compressed. Only done if --exclude-compressed flag is used
2054 * YES => ZSTD will skip compression of the file and will return 0.
2055 * NO => ZSTD will resume with compress operation.
2056 */
2057 if (prefs->excludeCompressedFiles == 1 && UTIL_isCompressedFile(srcFileName, compressedFileExtensions)) {
2058 DISPLAYLEVEL(4, "File is already compressed : %s \n", srcFileName);
2059 return 0;
2060 }
2061
W. Felix Handte2ad68552023-01-17 14:01:06 -08002062 srcFile = FIO_openSrcFile(prefs, srcFileName, &srcFileStat);
Yonatan Komornikcc0657f2022-01-31 15:43:41 -08002063 if (srcFile == NULL) return 1; /* srcFile could not be opened */
Shashank Tavildar02433e02019-10-28 14:54:54 -07002064
Yonatan Komornik79bdb8c2023-02-02 15:19:22 -08002065 /* Don't use AsyncIO for small files */
2066 if (strcmp(srcFileName, stdinmark)) /* Stdin doesn't have stats */
2067 fileSize = UTIL_getFileSizeStat(&srcFileStat);
2068 if(fileSize != UTIL_FILESIZE_UNKNOWN && fileSize < ZSTD_BLOCKSIZE_MAX * 3) {
2069 AIO_ReadPool_setAsync(ress.readCtx, 0);
2070 AIO_WritePool_setAsync(ress.writeCtx, 0);
2071 } else {
2072 AIO_ReadPool_setAsync(ress.readCtx, 1);
2073 AIO_WritePool_setAsync(ress.writeCtx, 1);
2074 }
2075
Yonatan Komornikcc0657f2022-01-31 15:43:41 -08002076 AIO_ReadPool_setFile(ress.readCtx, srcFile);
W. Felix Handte5653f962023-01-17 14:05:15 -08002077 result = FIO_compressFilename_dstFile(
2078 fCtx, prefs, ress,
2079 dstFileName, srcFileName,
2080 &srcFileStat, compressionLevel);
Yonatan Komornikcc0657f2022-01-31 15:43:41 -08002081 AIO_ReadPool_closeFile(ress.readCtx);
Yann Collet459a6b72016-02-15 20:37:23 +01002082
Yann Collet8c85b292023-01-23 18:55:51 -08002083 if ( prefs->removeSrcFile /* --rm */
2084 && result == 0 /* success */
2085 && strcmp(srcFileName, stdinmark) /* exception : don't erase stdin */
Yann Collet9012b6c2018-10-01 17:16:34 -07002086 ) {
Nick Terrella6052af2017-11-17 16:38:56 -08002087 /* We must clear the handler, since after this point calling it would
2088 * delete both the source and destination files.
2089 */
2090 clearHandler();
W. Felix Handteb02cdf62020-08-10 15:39:14 -04002091 if (FIO_removeFile(srcFileName))
Yann Colletdccd6b62017-02-27 15:57:50 -08002092 EXM_THROW(1, "zstd: %s: %s", srcFileName, strerror(errno));
2093 }
Yann Collet459a6b72016-02-15 20:37:23 +01002094 return result;
2095}
2096
Yann Collet3dfcafa2022-08-03 21:39:35 +02002097static const char*
2098checked_index(const char* options[], size_t length, size_t index) {
Kevin Svetlitski0665d4c2021-11-05 12:01:20 -07002099 assert(index < length);
Yonatan Komornik1598e6c2022-01-21 13:55:41 -08002100 /* Necessary to avoid warnings since -O3 will omit the above `assert` */
Kevin Svetlitski7fbd1262021-11-11 14:37:02 -08002101 (void) length;
Kevin Svetlitski0665d4c2021-11-05 12:01:20 -07002102 return options[index];
2103}
2104
Yann Collet3dfcafa2022-08-03 21:39:35 +02002105#define INDEX(options, index) checked_index((options), sizeof(options) / sizeof(char*), (size_t)(index))
Kevin Svetlitski0665d4c2021-11-05 12:01:20 -07002106
Yann Collet8c85b292023-01-23 18:55:51 -08002107void FIO_displayCompressionParameters(const FIO_prefs_t* prefs)
2108{
Kevin Svetlitski0665d4c2021-11-05 12:01:20 -07002109 static const char* formatOptions[5] = {ZSTD_EXTENSION, GZ_EXTENSION, XZ_EXTENSION,
2110 LZMA_EXTENSION, LZ4_EXTENSION};
2111 static const char* sparseOptions[3] = {" --no-sparse", "", " --sparse"};
2112 static const char* checkSumOptions[3] = {" --no-check", "", " --check"};
2113 static const char* rowMatchFinderOptions[3] = {"", " --no-row-match-finder", " --row-match-finder"};
2114 static const char* compressLiteralsOptions[3] = {"", " --compress-literals", " --no-compress-literals"};
2115
2116 assert(g_display_prefs.displayLevel >= 4);
2117
2118 DISPLAY("--format=%s", formatOptions[prefs->compressionType]);
2119 DISPLAY("%s", INDEX(sparseOptions, prefs->sparseFileSupport));
2120 DISPLAY("%s", prefs->dictIDFlag ? "" : " --no-dictID");
2121 DISPLAY("%s", INDEX(checkSumOptions, prefs->checksumFlag));
2122 DISPLAY(" --block-size=%d", prefs->blockSize);
2123 if (prefs->adaptiveMode)
2124 DISPLAY(" --adapt=min=%d,max=%d", prefs->minAdaptLevel, prefs->maxAdaptLevel);
2125 DISPLAY("%s", INDEX(rowMatchFinderOptions, prefs->useRowMatchFinder));
2126 DISPLAY("%s", prefs->rsyncable ? " --rsyncable" : "");
2127 if (prefs->streamSrcSize)
Kevin Svetlitski375e3aa2021-11-11 13:17:30 -08002128 DISPLAY(" --stream-size=%u", (unsigned) prefs->streamSrcSize);
Kevin Svetlitski0665d4c2021-11-05 12:01:20 -07002129 if (prefs->srcSizeHint)
2130 DISPLAY(" --size-hint=%d", prefs->srcSizeHint);
2131 if (prefs->targetCBlockSize)
Kevin Svetlitski375e3aa2021-11-11 13:17:30 -08002132 DISPLAY(" --target-compressed-block-size=%u", (unsigned) prefs->targetCBlockSize);
Kevin Svetlitski0665d4c2021-11-05 12:01:20 -07002133 DISPLAY("%s", INDEX(compressLiteralsOptions, prefs->literalCompressionMode));
2134 DISPLAY(" --memory=%u", prefs->memLimit ? prefs->memLimit : 128 MB);
2135 DISPLAY(" --threads=%d", prefs->nbWorkers);
2136 DISPLAY("%s", prefs->excludeCompressedFiles ? " --exclude-compressed" : "");
2137 DISPLAY(" --%scontent-size", prefs->contentSize ? "" : "no-");
2138 DISPLAY("\n");
2139}
2140
2141#undef INDEX
2142
senhuang4251234962020-09-07 13:13:05 -04002143int FIO_compressFilename(FIO_ctx_t* const fCtx, FIO_prefs_t* const prefs, const char* dstFileName,
Sen Huang7f98b462019-09-05 16:03:35 -07002144 const char* srcFileName, const char* dictFileName,
Yann Collet458a1a12020-04-13 10:13:29 -07002145 int compressionLevel, ZSTD_compressionParameters comprParams)
Yann Collet4856a002015-01-24 01:58:16 +01002146{
daniellerozenblitb2ad17a2023-03-28 19:44:53 -04002147 cRess_t ress = FIO_createCResources(prefs, dictFileName, UTIL_getFileSize(srcFileName), compressionLevel, comprParams);
senhuang4251234962020-09-07 13:13:05 -04002148 int const result = FIO_compressFilename_srcFile(fCtx, prefs, ress, dstFileName, srcFileName, compressionLevel);
Yann Collet9d909222015-12-17 14:09:55 +01002149
Xin Xie9a8ccd42020-06-19 19:35:51 -07002150#define DISPLAY_LEVEL_DEFAULT 2
Yann Colletb71adf42016-07-02 01:05:31 +02002151
senhuang42043b9342020-10-14 20:19:46 -04002152 FIO_freeCResources(&ress);
Yann Colletb71adf42016-07-02 01:05:31 +02002153 return result;
Yann Collet4856a002015-01-24 01:58:16 +01002154}
2155
Yann Collet9012b6c2018-10-01 17:16:34 -07002156/* FIO_determineCompressedName() :
2157 * create a destination filename for compressed srcFileName.
2158 * @return a pointer to it.
2159 * This function never returns an error (it may abort() in case of pb)
2160 */
2161static const char*
Sen Huang64bc4412019-10-03 13:53:04 -04002162FIO_determineCompressedName(const char* srcFileName, const char* outDirName, const char* suffix)
Yann Collet9012b6c2018-10-01 17:16:34 -07002163{
2164 static size_t dfnbCapacity = 0;
2165 static char* dstFileNameBuffer = NULL; /* using static allocation : this function cannot be multi-threaded */
Sen Huang64bc4412019-10-03 13:53:04 -04002166 char* outDirFilename = NULL;
2167 size_t sfnSize = strlen(srcFileName);
W. Felix Handte91c3f542019-10-24 20:18:57 -04002168 size_t const srcSuffixLen = strlen(suffix);
Yonatan Komornikae467042022-07-29 16:13:07 -07002169
2170 if(!strcmp(srcFileName, stdinmark)) {
2171 return stdoutmark;
2172 }
2173
Sen Huang64bc4412019-10-03 13:53:04 -04002174 if (outDirName) {
W. Felix Handte91c3f542019-10-24 20:18:57 -04002175 outDirFilename = FIO_createFilename_fromOutDir(srcFileName, outDirName, srcSuffixLen);
Sen Huang64bc4412019-10-03 13:53:04 -04002176 sfnSize = strlen(outDirFilename);
2177 assert(outDirFilename != NULL);
2178 }
Yann Collet9012b6c2018-10-01 17:16:34 -07002179
W. Felix Handte91c3f542019-10-24 20:18:57 -04002180 if (dfnbCapacity <= sfnSize+srcSuffixLen+1) {
Yann Collet433059b2018-10-10 17:06:25 -07002181 /* resize buffer for dstName */
Yann Collet9012b6c2018-10-01 17:16:34 -07002182 free(dstFileNameBuffer);
W. Felix Handte91c3f542019-10-24 20:18:57 -04002183 dfnbCapacity = sfnSize + srcSuffixLen + 30;
Yann Collet9012b6c2018-10-01 17:16:34 -07002184 dstFileNameBuffer = (char*)malloc(dfnbCapacity);
2185 if (!dstFileNameBuffer) {
2186 EXM_THROW(30, "zstd: %s", strerror(errno));
Sen Huang64bc4412019-10-03 13:53:04 -04002187 }
2188 }
Yann Collet3ca62612018-10-02 15:59:11 -07002189 assert(dstFileNameBuffer != NULL);
Yann Collet9012b6c2018-10-01 17:16:34 -07002190
Sen Huang64bc4412019-10-03 13:53:04 -04002191 if (outDirFilename) {
2192 memcpy(dstFileNameBuffer, outDirFilename, sfnSize);
2193 free(outDirFilename);
2194 } else {
2195 memcpy(dstFileNameBuffer, srcFileName, sfnSize);
2196 }
W. Felix Handte91c3f542019-10-24 20:18:57 -04002197 memcpy(dstFileNameBuffer+sfnSize, suffix, srcSuffixLen+1 /* Include terminating null */);
Yann Collet9012b6c2018-10-01 17:16:34 -07002198 return dstFileNameBuffer;
2199}
2200
Bimba Shrestha5b0a4522020-04-17 15:58:53 -05002201static unsigned long long FIO_getLargestFileSize(const char** inFileNames, unsigned nbFiles)
Bimba Shresthaf25a6e92020-01-10 14:25:24 -08002202{
Bimba Shrestha5b0a4522020-04-17 15:58:53 -05002203 size_t i;
2204 unsigned long long fileSize, maxFileSize = 0;
Bimba Shresthaf25a6e92020-01-10 14:25:24 -08002205 for (i = 0; i < nbFiles; i++) {
Bimba Shrestha5b0a4522020-04-17 15:58:53 -05002206 fileSize = UTIL_getFileSize(inFileNames[i]);
Bimba Shresthaf25a6e92020-01-10 14:25:24 -08002207 maxFileSize = fileSize > maxFileSize ? fileSize : maxFileSize;
2208 }
2209 return maxFileSize;
2210}
Yann Collet9012b6c2018-10-01 17:16:34 -07002211
2212/* FIO_compressMultipleFilenames() :
2213 * compress nbFiles files
Sen Huang7f98b462019-09-05 16:03:35 -07002214 * into either one destination (outFileName),
2215 * or into one file each (outFileName == NULL, but suffix != NULL),
2216 * or into a destination folder (specified with -O)
Yann Collet9012b6c2018-10-01 17:16:34 -07002217 */
senhuang4251234962020-09-07 13:13:05 -04002218int FIO_compressMultipleFilenames(FIO_ctx_t* const fCtx,
2219 FIO_prefs_t* const prefs,
senhuang42b6abbc32020-08-26 11:35:07 -04002220 const char** inFileNamesTable,
Xin Xie9a8ccd42020-06-19 19:35:51 -07002221 const char* outMirroredRootDirName,
Yann Collet17951332019-10-17 15:32:03 -07002222 const char* outDirName,
Nick Terrell4680e852017-12-12 18:32:50 -08002223 const char* outFileName, const char* suffix,
Przemyslaw Skibinski8349d672016-12-13 13:24:59 +01002224 const char* dictFileName, int compressionLevel,
Yann Collet33f77092018-08-13 13:02:03 -07002225 ZSTD_compressionParameters comprParams)
Yann Collet4f137032015-12-17 02:23:58 +01002226{
senhuang42202b2952020-09-03 09:28:40 -04002227 int status;
Yann Collet9012b6c2018-10-01 17:16:34 -07002228 int error = 0;
Bimba Shresthaf25a6e92020-01-10 14:25:24 -08002229 cRess_t ress = FIO_createCResources(prefs, dictFileName,
Yann Collet0d793a62021-01-06 01:35:52 -08002230 FIO_getLargestFileSize(inFileNamesTable, (unsigned)fCtx->nbFilesTotal),
Bimba Shresthaf25a6e92020-01-10 14:25:24 -08002231 compressionLevel, comprParams);
Yann Collet4f137032015-12-17 02:23:58 +01002232
2233 /* init */
Yann Collet9012b6c2018-10-01 17:16:34 -07002234 assert(outFileName != NULL || suffix != NULL);
Yann Collet9012b6c2018-10-01 17:16:34 -07002235 if (outFileName != NULL) { /* output into a single destination (stdout typically) */
Yonatan Komornikcc0657f2022-01-31 15:43:41 -08002236 FILE *dstFile;
Yann Collet8c85b292023-01-23 18:55:51 -08002237 if (FIO_multiFilesConcatWarning(fCtx, prefs, outFileName, 1 /* displayLevelCutoff */)) {
senhuang42043b9342020-10-14 20:19:46 -04002238 FIO_freeCResources(&ress);
senhuang427991c552020-08-26 16:50:20 -04002239 return 1;
senhuang427e867ad2020-08-26 18:52:32 -04002240 }
Yonatan Komornikcc0657f2022-01-31 15:43:41 -08002241 dstFile = FIO_openDstFile(fCtx, prefs, NULL, outFileName, DEFAULT_FILE_PERMISSIONS);
2242 if (dstFile == NULL) { /* could not open outFileName */
Yann Collet9012b6c2018-10-01 17:16:34 -07002243 error = 1;
Pádraig Bradye0596712018-01-02 15:17:32 +00002244 } else {
Yonatan Komornikcc0657f2022-01-31 15:43:41 -08002245 AIO_WritePool_setFile(ress.writeCtx, dstFile);
senhuang42a6414f12020-09-01 12:32:18 -04002246 for (; fCtx->currFileIdx < fCtx->nbFilesTotal; ++fCtx->currFileIdx) {
senhuang4251234962020-09-07 13:13:05 -04002247 status = FIO_compressFilename_srcFile(fCtx, prefs, ress, outFileName, inFileNamesTable[fCtx->currFileIdx], compressionLevel);
senhuang427842f432020-09-03 09:22:07 -04002248 if (!status) fCtx->nbFilesProcessed++;
senhuang42a6414f12020-09-01 12:32:18 -04002249 error |= status;
senhuang42da388912020-08-25 16:46:47 -04002250 }
Yonatan Komornikcc0657f2022-01-31 15:43:41 -08002251 if (AIO_WritePool_closeFile(ress.writeCtx))
Yann Collet0f2d4432018-12-19 17:25:58 -08002252 EXM_THROW(29, "Write error (%s) : cannot properly close %s",
2253 strerror(errno), outFileName);
Pádraig Bradye0596712018-01-02 15:17:32 +00002254 }
Yann Collet459a6b72016-02-15 20:37:23 +01002255 } else {
Xin Xie9a8ccd42020-06-19 19:35:51 -07002256 if (outMirroredRootDirName)
Yann Collet0d793a62021-01-06 01:35:52 -08002257 UTIL_mirrorSourceFilesDirectories(inFileNamesTable, (unsigned)fCtx->nbFilesTotal, outMirroredRootDirName);
Xin Xie9a8ccd42020-06-19 19:35:51 -07002258
senhuang42a6414f12020-09-01 12:32:18 -04002259 for (; fCtx->currFileIdx < fCtx->nbFilesTotal; ++fCtx->currFileIdx) {
senhuang42d54566f2020-08-28 11:01:04 -04002260 const char* const srcFileName = inFileNamesTable[fCtx->currFileIdx];
Xin Xie9a8ccd42020-06-19 19:35:51 -07002261 const char* dstFileName = NULL;
2262 if (outMirroredRootDirName) {
2263 char* validMirroredDirName = UTIL_createMirroredDestDirName(srcFileName, outMirroredRootDirName);
2264 if (validMirroredDirName) {
2265 dstFileName = FIO_determineCompressedName(srcFileName, validMirroredDirName, suffix);
2266 free(validMirroredDirName);
2267 } else {
2268 DISPLAYLEVEL(2, "zstd: --output-dir-mirror cannot compress '%s' into '%s' \n", srcFileName, outMirroredRootDirName);
2269 error=1;
2270 continue;
2271 }
2272 } else {
2273 dstFileName = FIO_determineCompressedName(srcFileName, outDirName, suffix); /* cannot fail */
2274 }
senhuang4251234962020-09-07 13:13:05 -04002275 status = FIO_compressFilename_srcFile(fCtx, prefs, ress, dstFileName, srcFileName, compressionLevel);
senhuang427842f432020-09-03 09:22:07 -04002276 if (!status) fCtx->nbFilesProcessed++;
senhuang42a6414f12020-09-01 12:32:18 -04002277 error |= status;
Sen Huang64bc4412019-10-03 13:53:04 -04002278 }
Xin Xie9a8ccd42020-06-19 19:35:51 -07002279
Sen Huangc5ebb372019-10-09 09:39:52 -04002280 if (outDirName)
Yann Collet0d793a62021-01-06 01:35:52 -08002281 FIO_checkFilenameCollisions(inFileNamesTable , (unsigned)fCtx->nbFilesTotal);
Sen Huang64bc4412019-10-03 13:53:04 -04002282 }
Yann Collet4f137032015-12-17 02:23:58 +01002283
Nick Terrellfbff7822022-01-07 15:07:28 -08002284 if (FIO_shouldDisplayMultipleFileSummary(fCtx)) {
W. Felix Handtebbb81c82021-06-09 13:05:44 -04002285 UTIL_HumanReadableSize_t hr_isize = UTIL_makeHumanReadableSize((U64) fCtx->totalBytesInput);
2286 UTIL_HumanReadableSize_t hr_osize = UTIL_makeHumanReadableSize((U64) fCtx->totalBytesOutput);
Scott Baker77001f02021-06-04 22:21:00 -07002287
Nick Terrellfbff7822022-01-07 15:07:28 -08002288 DISPLAY_PROGRESS("\r%79s\r", "");
2289 if (fCtx->totalBytesInput == 0) {
2290 DISPLAY_SUMMARY("%3d files compressed : (%6.*f%4s => %6.*f%4s)\n",
2291 fCtx->nbFilesProcessed,
2292 hr_isize.precision, hr_isize.value, hr_isize.suffix,
2293 hr_osize.precision, hr_osize.value, hr_osize.suffix);
2294 } else {
2295 DISPLAY_SUMMARY("%3d files compressed : %.2f%% (%6.*f%4s => %6.*f%4s)\n",
2296 fCtx->nbFilesProcessed,
2297 (double)fCtx->totalBytesOutput/((double)fCtx->totalBytesInput)*100,
2298 hr_isize.precision, hr_isize.value, hr_isize.suffix,
2299 hr_osize.precision, hr_osize.value, hr_osize.suffix);
2300 }
senhuang42ab0d3322020-09-15 15:53:32 -04002301 }
Yann Collet4f137032015-12-17 02:23:58 +01002302
senhuang42043b9342020-10-14 20:19:46 -04002303 FIO_freeCResources(&ress);
Yann Collet9012b6c2018-10-01 17:16:34 -07002304 return error;
Yann Collet4f137032015-12-17 02:23:58 +01002305}
2306
Yann Colletf8494622016-05-07 22:43:40 +02002307#endif /* #ifndef ZSTD_NOCOMPRESS */
inikep3c7c3522016-04-22 13:59:05 +02002308
Yann Collet4f137032015-12-17 02:23:58 +01002309
inikepdb396432016-04-22 18:22:30 +02002310
2311#ifndef ZSTD_NODECOMPRESS
2312
Yann Collet4f137032015-12-17 02:23:58 +01002313/* **************************************************************************
Yann Collet166645e2017-08-18 18:30:41 -07002314 * Decompression
2315 ***************************************************************************/
Yann Colletdeb078b2015-12-17 20:30:14 +01002316typedef struct {
Danielle Rozenblit96e55c12023-03-08 08:06:10 -08002317 FIO_Dict_t dict;
Yann Collet6263ba52016-08-13 23:45:45 +02002318 ZSTD_DStream* dctx;
Yonatan Komornik70df5de2022-01-24 14:43:02 -08002319 WritePoolCtx_t *writeCtx;
Yonatan Komornikcc0657f2022-01-31 15:43:41 -08002320 ReadPoolCtx_t *readCtx;
Yann Colletdeb078b2015-12-17 20:30:14 +01002321} dRess_t;
2322
Karl Ostmo5e220bf2019-01-22 17:31:13 -08002323static dRess_t FIO_createDResources(FIO_prefs_t* const prefs, const char* dictFileName)
Yann Colletdeb078b2015-12-17 20:30:14 +01002324{
Danielle Rozenblit96e55c12023-03-08 08:06:10 -08002325 int useMMap = prefs->mmapDict == ZSTD_ps_enable;
2326 int forceNoUseMMap = prefs->mmapDict == ZSTD_ps_disable;
Danielle Rozenblit8a189b12023-02-13 15:23:06 -08002327 stat_t statbuf;
Yann Colletdeb078b2015-12-17 20:30:14 +01002328 dRess_t ress;
Gianfranco Costamagnade6b46d2023-07-07 09:26:30 +02002329 memset(&statbuf, 0, sizeof(statbuf));
Yann Collet5e80dd32016-07-13 17:38:39 +02002330 memset(&ress, 0, sizeof(ress));
Yann Colletdeb078b2015-12-17 20:30:14 +01002331
Danielle Rozenblit8a189b12023-02-13 15:23:06 -08002332 FIO_getDictFileStat(dictFileName, &statbuf);
2333
Danielle Rozenblit4373c5a2023-02-13 07:26:22 -08002334 if (prefs->patchFromMode){
Danielle Rozenblit8a189b12023-02-13 15:23:06 -08002335 U64 const dictSize = UTIL_getFileSizeStat(&statbuf);
Danielle Rozenblit96e55c12023-03-08 08:06:10 -08002336 useMMap |= dictSize > prefs->memLimit;
Danielle Rozenblit610c8b92023-02-09 07:37:37 -08002337 FIO_adjustMemLimitForPatchFromMode(prefs, dictSize, 0 /* just use the dict size */);
Danielle Rozenblit4373c5a2023-02-13 07:26:22 -08002338 }
Bimba Shrestha659ff852020-04-21 21:12:50 -05002339
Yann Collet5e80dd32016-07-13 17:38:39 +02002340 /* Allocation */
Yann Collet6263ba52016-08-13 23:45:45 +02002341 ress.dctx = ZSTD_createDStream();
Yann Collet0f2d4432018-12-19 17:25:58 -08002342 if (ress.dctx==NULL)
2343 EXM_THROW(60, "Error: %s : can't create ZSTD_DStream", strerror(errno));
Karl Ostmo5e220bf2019-01-22 17:31:13 -08002344 CHECK( ZSTD_DCtx_setMaxWindowSize(ress.dctx, prefs->memLimit) );
senhuang42a0305602020-08-24 17:28:00 -04002345 CHECK( ZSTD_DCtx_setParameter(ress.dctx, ZSTD_d_forceIgnoreChecksum, !prefs->checksumFlag));
Yann Collet0d793a62021-01-06 01:35:52 -08002346
Yann Colletdeb078b2015-12-17 20:30:14 +01002347 /* dictionary */
daniellerozenblitb2ad17a2023-03-28 19:44:53 -04002348 {
2349 FIO_dictBufferType_t dictBufferType = (useMMap && !forceNoUseMMap) ? FIO_mmapDict : FIO_mallocDict;
2350 FIO_initDict(&ress.dict, dictFileName, prefs, &statbuf, dictBufferType);
Danielle Rozenblit610c8b92023-02-09 07:37:37 -08002351
Danielle Rozenblit4373c5a2023-02-13 07:26:22 -08002352 CHECK(ZSTD_DCtx_reset(ress.dctx, ZSTD_reset_session_only) );
Danielle Rozenblit610c8b92023-02-09 07:37:37 -08002353
Danielle Rozenblit4373c5a2023-02-13 07:26:22 -08002354 if (prefs->patchFromMode){
Danielle Rozenblit96e55c12023-03-08 08:06:10 -08002355 CHECK(ZSTD_DCtx_refPrefix(ress.dctx, ress.dict.dictBuffer, ress.dict.dictBufferSize));
Danielle Rozenblit610c8b92023-02-09 07:37:37 -08002356 } else {
Danielle Rozenblit96e55c12023-03-08 08:06:10 -08002357 CHECK(ZSTD_DCtx_loadDictionary_byReference(ress.dctx, ress.dict.dictBuffer, ress.dict.dictBufferSize));
Danielle Rozenblit610c8b92023-02-09 07:37:37 -08002358 }
Yann Collet3ecbe6a2016-09-14 17:26:59 +02002359 }
Yann Colletdeb078b2015-12-17 20:30:14 +01002360
Yonatan Komornik70df5de2022-01-24 14:43:02 -08002361 ress.writeCtx = AIO_WritePool_create(prefs, ZSTD_DStreamOutSize());
Yonatan Komornikcc0657f2022-01-31 15:43:41 -08002362 ress.readCtx = AIO_ReadPool_create(prefs, ZSTD_DStreamInSize());
Yann Colletdeb078b2015-12-17 20:30:14 +01002363 return ress;
2364}
2365
2366static void FIO_freeDResources(dRess_t ress)
2367{
Danielle Rozenblit96e55c12023-03-08 08:06:10 -08002368 FIO_freeDict(&(ress.dict));
Yann Collet6d4fef32017-05-17 18:36:15 -07002369 CHECK( ZSTD_freeDStream(ress.dctx) );
Yonatan Komornik70df5de2022-01-24 14:43:02 -08002370 AIO_WritePool_free(ress.writeCtx);
Yonatan Komornikcc0657f2022-01-31 15:43:41 -08002371 AIO_ReadPool_free(ress.readCtx);
Yonatan Komornik1598e6c2022-01-21 13:55:41 -08002372}
Yann Collet4f137032015-12-17 02:23:58 +01002373
Yann Collet5bf13592023-03-31 11:13:52 -07002374/* FIO_passThrough() : just copy input into output, for compatibility with gzip -df mode
2375 * @return : 0 (no error) */
Yonatan Komornikcc0657f2022-01-31 15:43:41 -08002376static int FIO_passThrough(dRess_t *ress)
Yann Colletde95f962016-05-23 19:46:47 +02002377{
Yonatan Komornikcc0657f2022-01-31 15:43:41 -08002378 size_t const blockSize = MIN(MIN(64 KB, ZSTD_DStreamInSize()), ZSTD_DStreamOutSize());
2379 IOJob_t *writeJob = AIO_WritePool_acquireJob(ress->writeCtx);
2380 AIO_ReadPool_fillBuffer(ress->readCtx, blockSize);
Yann Colletde95f962016-05-23 19:46:47 +02002381
Yonatan Komornikcc0657f2022-01-31 15:43:41 -08002382 while(ress->readCtx->srcBufferLoaded) {
2383 size_t writeSize;
2384 writeSize = MIN(blockSize, ress->readCtx->srcBufferLoaded);
2385 assert(writeSize <= writeJob->bufferSize);
2386 memcpy(writeJob->buffer, ress->readCtx->srcBuffer, writeSize);
2387 writeJob->usedBufferSize = writeSize;
2388 AIO_WritePool_enqueueAndReacquireWriteJob(&writeJob);
2389 AIO_ReadPool_consumeBytes(ress->readCtx, writeSize);
2390 AIO_ReadPool_fillBuffer(ress->readCtx, blockSize);
Yann Colletde95f962016-05-23 19:46:47 +02002391 }
Yonatan Komornikcc0657f2022-01-31 15:43:41 -08002392 assert(ress->readCtx->reachedEof);
2393 AIO_WritePool_releaseIoJob(writeJob);
2394 AIO_WritePool_sparseWriteEnd(ress->writeCtx);
Yann Colletde95f962016-05-23 19:46:47 +02002395 return 0;
2396}
2397
Yann Collet2bfc79a2018-02-01 16:13:04 -08002398/* FIO_zstdErrorHelp() :
2399 * detailed error message when requested window size is too large */
Yann Collet0ee36092019-10-17 16:09:53 -07002400static void
2401FIO_zstdErrorHelp(const FIO_prefs_t* const prefs,
2402 const dRess_t* ress,
Yann Collet5bf13592023-03-31 11:13:52 -07002403 size_t err,
2404 const char* srcFileName)
Nick Terrellc233bdb2017-09-22 14:04:39 -07002405{
2406 ZSTD_frameHeader header;
Yann Collet2bfc79a2018-02-01 16:13:04 -08002407
2408 /* Help message only for one specific error */
2409 if (ZSTD_getErrorCode(err) != ZSTD_error_frameParameter_windowTooLarge)
Nick Terrellc233bdb2017-09-22 14:04:39 -07002410 return;
Yann Collet2bfc79a2018-02-01 16:13:04 -08002411
Nick Terrellc233bdb2017-09-22 14:04:39 -07002412 /* Try to decode the frame header */
Yonatan Komornikcc0657f2022-01-31 15:43:41 -08002413 err = ZSTD_getFrameHeader(&header, ress->readCtx->srcBuffer, ress->readCtx->srcBufferLoaded);
Yann Collet2bfc79a2018-02-01 16:13:04 -08002414 if (err == 0) {
Yann Collet6c492af2018-02-01 20:16:00 -08002415 unsigned long long const windowSize = header.windowSize;
Yann Colletededcfc2018-12-21 16:19:44 -08002416 unsigned const windowLog = FIO_highbit64(windowSize) + ((windowSize & (windowSize - 1)) != 0);
Karl Ostmo5e220bf2019-01-22 17:31:13 -08002417 assert(prefs->memLimit > 0);
Yann Collet458a1a12020-04-13 10:13:29 -07002418 DISPLAYLEVEL(1, "%s : Window size larger than maximum : %llu > %u \n",
Karl Ostmo5e220bf2019-01-22 17:31:13 -08002419 srcFileName, windowSize, prefs->memLimit);
Nick Terrellc233bdb2017-09-22 14:04:39 -07002420 if (windowLog <= ZSTD_WINDOWLOG_MAX) {
Yann Colletededcfc2018-12-21 16:19:44 -08002421 unsigned const windowMB = (unsigned)((windowSize >> 20) + ((windowSize & ((1 MB) - 1)) != 0));
Yann Colletb1407f92018-10-03 12:43:59 -07002422 assert(windowSize < (U64)(1ULL << 52)); /* ensure now overflow for windowMB */
Yann Collet458a1a12020-04-13 10:13:29 -07002423 DISPLAYLEVEL(1, "%s : Use --long=%u or --memory=%uMB \n",
Nick Terrellc233bdb2017-09-22 14:04:39 -07002424 srcFileName, windowLog, windowMB);
2425 return;
Yann Collet458a1a12020-04-13 10:13:29 -07002426 } }
2427 DISPLAYLEVEL(1, "%s : Window log larger than ZSTD_WINDOWLOG_MAX=%u; not supported \n",
Nick Terrellc233bdb2017-09-22 14:04:39 -07002428 srcFileName, ZSTD_WINDOWLOG_MAX);
2429}
Przemyslaw Skibinskib493e3b2016-12-05 17:39:38 +01002430
2431/** FIO_decompressFrame() :
Yann Colletc34185d2017-07-03 10:27:16 -07002432 * @return : size of decoded zstd frame, or an error code
Yann Collet458a1a12020-04-13 10:13:29 -07002433 */
Yann Colletc34185d2017-07-03 10:27:16 -07002434#define FIO_ERROR_FRAME_DECODING ((unsigned long long)(-2))
Yann Collet0ee36092019-10-17 16:09:53 -07002435static unsigned long long
Yonatan Komornikcc0657f2022-01-31 15:43:41 -08002436FIO_decompressZstdFrame(FIO_ctx_t* const fCtx, dRess_t* ress,
Yann Collet458a1a12020-04-13 10:13:29 -07002437 const FIO_prefs_t* const prefs,
2438 const char* srcFileName,
2439 U64 alreadyDecoded) /* for multi-frames streams */
Przemyslaw Skibinskib493e3b2016-12-05 17:39:38 +01002440{
2441 U64 frameSize = 0;
Yonatan Komornik70df5de2022-01-24 14:43:02 -08002442 IOJob_t *writeJob = AIO_WritePool_acquireJob(ress->writeCtx);
Przemyslaw Skibinskib493e3b2016-12-05 17:39:38 +01002443
Yann Collet83ec3d02024-03-12 11:27:42 -07002444 /* display last 20 characters only when not --verbose */
Yann Collet458a1a12020-04-13 10:13:29 -07002445 { size_t const srcFileLength = strlen(srcFileName);
Yann Collet83ec3d02024-03-12 11:27:42 -07002446 if ((srcFileLength>20) && (g_display_prefs.displayLevel<3))
2447 srcFileName += srcFileLength-20;
Yann Collet458a1a12020-04-13 10:13:29 -07002448 }
Yann Collet8afcc802017-09-29 15:54:09 -07002449
Stephen Kittadb54292021-02-20 17:28:19 +01002450 ZSTD_DCtx_reset(ress->dctx, ZSTD_reset_session_only);
Przemyslaw Skibinskib493e3b2016-12-05 17:39:38 +01002451
Nick Terrellc233bdb2017-09-22 14:04:39 -07002452 /* Header loading : ensures ZSTD_getFrameHeader() will succeed */
Yonatan Komornikcc0657f2022-01-31 15:43:41 -08002453 AIO_ReadPool_fillBuffer(ress->readCtx, ZSTD_FRAMEHEADERSIZE_MAX);
Przemyslaw Skibinskib493e3b2016-12-05 17:39:38 +01002454
2455 /* Main decompression Loop */
2456 while (1) {
Yann Colletea684c32023-01-18 15:38:36 -08002457 ZSTD_inBuffer inBuff = setInBuffer( ress->readCtx->srcBuffer, ress->readCtx->srcBufferLoaded, 0 );
2458 ZSTD_outBuffer outBuff= setOutBuffer( writeJob->buffer, writeJob->bufferSize, 0 );
Przemyslaw Skibinskib493e3b2016-12-05 17:39:38 +01002459 size_t const readSizeHint = ZSTD_decompressStream(ress->dctx, &outBuff, &inBuff);
W. Felix Handte87e94e32021-06-10 12:31:42 -04002460 UTIL_HumanReadableSize_t const hrs = UTIL_makeHumanReadableSize(alreadyDecoded+frameSize);
Yann Collet01a1abf2017-05-05 19:15:24 -07002461 if (ZSTD_isError(readSizeHint)) {
2462 DISPLAYLEVEL(1, "%s : Decoding error (36) : %s \n",
2463 srcFileName, ZSTD_getErrorName(readSizeHint));
Karl Ostmo5e220bf2019-01-22 17:31:13 -08002464 FIO_zstdErrorHelp(prefs, ress, readSizeHint, srcFileName);
Yonatan Komornik70df5de2022-01-24 14:43:02 -08002465 AIO_WritePool_releaseIoJob(writeJob);
Yann Colletc34185d2017-07-03 10:27:16 -07002466 return FIO_ERROR_FRAME_DECODING;
Yann Collet01a1abf2017-05-05 19:15:24 -07002467 }
Przemyslaw Skibinskib493e3b2016-12-05 17:39:38 +01002468
2469 /* Write block */
Yonatan Komornik1598e6c2022-01-21 13:55:41 -08002470 writeJob->usedBufferSize = outBuff.pos;
Yonatan Komornik70df5de2022-01-24 14:43:02 -08002471 AIO_WritePool_enqueueAndReacquireWriteJob(&writeJob);
Przemyslaw Skibinskib493e3b2016-12-05 17:39:38 +01002472 frameSize += outBuff.pos;
Binh Vod2f31b62021-06-07 11:50:22 -04002473 if (fCtx->nbFilesTotal > 1) {
2474 size_t srcFileNameSize = strlen(srcFileName);
2475 if (srcFileNameSize > 18) {
2476 const char* truncatedSrcFileName = srcFileName + srcFileNameSize - 15;
Nick Terrellfbff7822022-01-07 15:07:28 -08002477 DISPLAYUPDATE_PROGRESS(
2478 "\rDecompress: %2u/%2u files. Current: ...%s : %.*f%s... ",
2479 fCtx->currFileIdx+1, fCtx->nbFilesTotal, truncatedSrcFileName, hrs.precision, hrs.value, hrs.suffix);
senhuang421d5c6fd2020-09-16 10:28:45 -04002480 } else {
Nick Terrellfbff7822022-01-07 15:07:28 -08002481 DISPLAYUPDATE_PROGRESS("\rDecompress: %2u/%2u files. Current: %s : %.*f%s... ",
W. Felix Handte87e94e32021-06-10 12:31:42 -04002482 fCtx->currFileIdx+1, fCtx->nbFilesTotal, srcFileName, hrs.precision, hrs.value, hrs.suffix);
senhuang421d5c6fd2020-09-16 10:28:45 -04002483 }
Binh Vod2f31b62021-06-07 11:50:22 -04002484 } else {
Nick Terrellfbff7822022-01-07 15:07:28 -08002485 DISPLAYUPDATE_PROGRESS("\r%-20.20s : %.*f%s... ",
W. Felix Handte87e94e32021-06-10 12:31:42 -04002486 srcFileName, hrs.precision, hrs.value, hrs.suffix);
senhuang42b6abbc32020-08-26 11:35:07 -04002487 }
Przemyslaw Skibinskib493e3b2016-12-05 17:39:38 +01002488
Yonatan Komornikcc0657f2022-01-31 15:43:41 -08002489 AIO_ReadPool_consumeBytes(ress->readCtx, inBuff.pos);
Przemyslaw Skibinskib493e3b2016-12-05 17:39:38 +01002490
2491 if (readSizeHint == 0) break; /* end of frame */
Przemyslaw Skibinskib493e3b2016-12-05 17:39:38 +01002492
2493 /* Fill input buffer */
Yonatan Komornikcc0657f2022-01-31 15:43:41 -08002494 { size_t const toDecode = MIN(readSizeHint, ZSTD_DStreamInSize()); /* support large skippable frames */
2495 if (ress->readCtx->srcBufferLoaded < toDecode) {
2496 size_t const readSize = AIO_ReadPool_fillBuffer(ress->readCtx, toDecode);
Yann Collet8afcc802017-09-29 15:54:09 -07002497 if (readSize==0) {
2498 DISPLAYLEVEL(1, "%s : Read error (39) : premature end \n",
Yonatan Komornikcc0657f2022-01-31 15:43:41 -08002499 srcFileName);
2500 AIO_WritePool_releaseIoJob(writeJob);
Yann Collet8afcc802017-09-29 15:54:09 -07002501 return FIO_ERROR_FRAME_DECODING;
2502 }
Yonatan Komornikcc0657f2022-01-31 15:43:41 -08002503 } } }
Przemyslaw Skibinskib493e3b2016-12-05 17:39:38 +01002504
Yonatan Komornik70df5de2022-01-24 14:43:02 -08002505 AIO_WritePool_releaseIoJob(writeJob);
2506 AIO_WritePool_sparseWriteEnd(ress->writeCtx);
Przemyslaw Skibinskib493e3b2016-12-05 17:39:38 +01002507
2508 return frameSize;
2509}
2510
2511
Przemyslaw Skibinski19aad422016-12-01 11:56:31 +01002512#ifdef ZSTD_GZDECOMPRESS
Yann Collet0ee36092019-10-17 16:09:53 -07002513static unsigned long long
Yonatan Komornikcc0657f2022-01-31 15:43:41 -08002514FIO_decompressGzFrame(dRess_t* ress, const char* srcFileName)
Przemyslaw Skibinskib0f2ef22016-12-02 13:50:29 +01002515{
Yann Collet5bd42372016-12-02 12:40:57 -08002516 unsigned long long outFileSize = 0;
Przemyslaw Skibinskic5eebca2016-12-02 15:01:31 +01002517 z_stream strm;
Nick Terrellaa8bcf32017-03-13 18:11:07 -07002518 int flush = Z_NO_FLUSH;
Yann Collet6c351122017-07-03 13:24:50 -07002519 int decodingError = 0;
Yonatan Komornik70df5de2022-01-24 14:43:02 -08002520 IOJob_t *writeJob = NULL;
Yann Collet5bd42372016-12-02 12:40:57 -08002521
Przemyslaw Skibinskic5eebca2016-12-02 15:01:31 +01002522 strm.zalloc = Z_NULL;
2523 strm.zfree = Z_NULL;
2524 strm.opaque = Z_NULL;
2525 strm.next_in = 0;
Przemyslaw Skibinski862698f2017-02-27 13:21:05 +01002526 strm.avail_in = 0;
Danielle Rozenblit4dffc352022-12-14 06:58:35 -08002527 /* see https://www.zlib.net/manual.html */
Yann Collete12ae022017-05-16 17:32:33 -07002528 if (inflateInit2(&strm, 15 /* maxWindowLogSize */ + 16 /* gzip only */) != Z_OK)
Yann Collet6c351122017-07-03 13:24:50 -07002529 return FIO_ERROR_FRAME_DECODING;
Yann Collet5bd42372016-12-02 12:40:57 -08002530
Yonatan Komornik70df5de2022-01-24 14:43:02 -08002531 writeJob = AIO_WritePool_acquireJob(ress->writeCtx);
Yonatan Komornik1598e6c2022-01-21 13:55:41 -08002532 strm.next_out = (Bytef*)writeJob->buffer;
2533 strm.avail_out = (uInt)writeJob->bufferSize;
Yonatan Komornikcc0657f2022-01-31 15:43:41 -08002534 strm.avail_in = (uInt)ress->readCtx->srcBufferLoaded;
2535 strm.next_in = (z_const unsigned char*)ress->readCtx->srcBuffer;
Przemyslaw Skibinski4b504f12016-12-02 13:11:39 +01002536
Przemyslaw Skibinskic5eebca2016-12-02 15:01:31 +01002537 for ( ; ; ) {
Yann Collet6c351122017-07-03 13:24:50 -07002538 int ret;
Przemyslaw Skibinskib493e3b2016-12-05 17:39:38 +01002539 if (strm.avail_in == 0) {
Yonatan Komornikcc0657f2022-01-31 15:43:41 -08002540 AIO_ReadPool_consumeAndRefill(ress->readCtx);
2541 if (ress->readCtx->srcBufferLoaded == 0) flush = Z_FINISH;
2542 strm.next_in = (z_const unsigned char*)ress->readCtx->srcBuffer;
2543 strm.avail_in = (uInt)ress->readCtx->srcBufferLoaded;
Przemyslaw Skibinski4b504f12016-12-02 13:11:39 +01002544 }
Nick Terrellaa8bcf32017-03-13 18:11:07 -07002545 ret = inflate(&strm, flush);
Yann Collet6c351122017-07-03 13:24:50 -07002546 if (ret == Z_BUF_ERROR) {
2547 DISPLAYLEVEL(1, "zstd: %s: premature gz end \n", srcFileName);
2548 decodingError = 1; break;
2549 }
Yann Collete12ae022017-05-16 17:32:33 -07002550 if (ret != Z_OK && ret != Z_STREAM_END) {
Yann Collet6c351122017-07-03 13:24:50 -07002551 DISPLAYLEVEL(1, "zstd: %s: inflate error %d \n", srcFileName, ret);
2552 decodingError = 1; break;
Yann Collete12ae022017-05-16 17:32:33 -07002553 }
Yonatan Komornik1598e6c2022-01-21 13:55:41 -08002554 { size_t const decompBytes = writeJob->bufferSize - strm.avail_out;
Yann Collet5bd42372016-12-02 12:40:57 -08002555 if (decompBytes) {
Yonatan Komornik1598e6c2022-01-21 13:55:41 -08002556 writeJob->usedBufferSize = decompBytes;
Yonatan Komornik70df5de2022-01-24 14:43:02 -08002557 AIO_WritePool_enqueueAndReacquireWriteJob(&writeJob);
Yann Collet5bd42372016-12-02 12:40:57 -08002558 outFileSize += decompBytes;
Yonatan Komornik1598e6c2022-01-21 13:55:41 -08002559 strm.next_out = (Bytef*)writeJob->buffer;
2560 strm.avail_out = (uInt)writeJob->bufferSize;
Przemyslaw Skibinskib493e3b2016-12-05 17:39:38 +01002561 }
2562 }
2563 if (ret == Z_STREAM_END) break;
2564 }
Przemyslaw Skibinski19aad422016-12-01 11:56:31 +01002565
Yonatan Komornikcc0657f2022-01-31 15:43:41 -08002566 AIO_ReadPool_consumeBytes(ress->readCtx, ress->readCtx->srcBufferLoaded - strm.avail_in);
Yonatan Komornik1598e6c2022-01-21 13:55:41 -08002567
Yann Colletc9f21c82017-07-03 13:45:09 -07002568 if ( (inflateEnd(&strm) != Z_OK) /* release resources ; error detected */
2569 && (decodingError==0) ) {
Yann Collet6c351122017-07-03 13:24:50 -07002570 DISPLAYLEVEL(1, "zstd: %s: inflateEnd error \n", srcFileName);
2571 decodingError = 1;
2572 }
Yonatan Komornik70df5de2022-01-24 14:43:02 -08002573 AIO_WritePool_releaseIoJob(writeJob);
2574 AIO_WritePool_sparseWriteEnd(ress->writeCtx);
Yann Collet6c351122017-07-03 13:24:50 -07002575 return decodingError ? FIO_ERROR_FRAME_DECODING : outFileSize;
Nick Terrellaa8bcf32017-03-13 18:11:07 -07002576}
2577#endif
2578
Nick Terrellaa8bcf32017-03-13 18:11:07 -07002579#ifdef ZSTD_LZMADECOMPRESS
Yann Collet0ee36092019-10-17 16:09:53 -07002580static unsigned long long
Yonatan Komornikcc0657f2022-01-31 15:43:41 -08002581FIO_decompressLzmaFrame(dRess_t* ress,
Yann Collet0ee36092019-10-17 16:09:53 -07002582 const char* srcFileName, int plain_lzma)
Nick Terrellaa8bcf32017-03-13 18:11:07 -07002583{
2584 unsigned long long outFileSize = 0;
2585 lzma_stream strm = LZMA_STREAM_INIT;
2586 lzma_action action = LZMA_RUN;
Yann Colletc34185d2017-07-03 10:27:16 -07002587 lzma_ret initRet;
2588 int decodingError = 0;
Yonatan Komornik70df5de2022-01-24 14:43:02 -08002589 IOJob_t *writeJob = NULL;
Nick Terrellaa8bcf32017-03-13 18:11:07 -07002590
2591 strm.next_in = 0;
2592 strm.avail_in = 0;
2593 if (plain_lzma) {
Yann Colletc34185d2017-07-03 10:27:16 -07002594 initRet = lzma_alone_decoder(&strm, UINT64_MAX); /* LZMA */
Nick Terrellaa8bcf32017-03-13 18:11:07 -07002595 } else {
Yann Colletc34185d2017-07-03 10:27:16 -07002596 initRet = lzma_stream_decoder(&strm, UINT64_MAX, 0); /* XZ */
Nick Terrellaa8bcf32017-03-13 18:11:07 -07002597 }
2598
Yann Colletc34185d2017-07-03 10:27:16 -07002599 if (initRet != LZMA_OK) {
2600 DISPLAYLEVEL(1, "zstd: %s: %s error %d \n",
2601 plain_lzma ? "lzma_alone_decoder" : "lzma_stream_decoder",
2602 srcFileName, initRet);
2603 return FIO_ERROR_FRAME_DECODING;
2604 }
Nick Terrellaa8bcf32017-03-13 18:11:07 -07002605
Yonatan Komornik70df5de2022-01-24 14:43:02 -08002606 writeJob = AIO_WritePool_acquireJob(ress->writeCtx);
Alex Xu886de7b2023-02-14 00:30:56 +00002607 strm.next_out = (BYTE*)writeJob->buffer;
2608 strm.avail_out = writeJob->bufferSize;
Yonatan Komornikcc0657f2022-01-31 15:43:41 -08002609 strm.next_in = (BYTE const*)ress->readCtx->srcBuffer;
2610 strm.avail_in = ress->readCtx->srcBufferLoaded;
Nick Terrellaa8bcf32017-03-13 18:11:07 -07002611
2612 for ( ; ; ) {
Yann Colletc34185d2017-07-03 10:27:16 -07002613 lzma_ret ret;
Nick Terrellaa8bcf32017-03-13 18:11:07 -07002614 if (strm.avail_in == 0) {
Yonatan Komornikcc0657f2022-01-31 15:43:41 -08002615 AIO_ReadPool_consumeAndRefill(ress->readCtx);
2616 if (ress->readCtx->srcBufferLoaded == 0) action = LZMA_FINISH;
2617 strm.next_in = (BYTE const*)ress->readCtx->srcBuffer;
2618 strm.avail_in = ress->readCtx->srcBufferLoaded;
Nick Terrellaa8bcf32017-03-13 18:11:07 -07002619 }
2620 ret = lzma_code(&strm, action);
2621
Yann Colletc34185d2017-07-03 10:27:16 -07002622 if (ret == LZMA_BUF_ERROR) {
2623 DISPLAYLEVEL(1, "zstd: %s: premature lzma end \n", srcFileName);
2624 decodingError = 1; break;
2625 }
Yann Collete12ae022017-05-16 17:32:33 -07002626 if (ret != LZMA_OK && ret != LZMA_STREAM_END) {
Yann Colletc34185d2017-07-03 10:27:16 -07002627 DISPLAYLEVEL(1, "zstd: %s: lzma_code decoding error %d \n",
2628 srcFileName, ret);
2629 decodingError = 1; break;
Yann Collete12ae022017-05-16 17:32:33 -07002630 }
Yonatan Komornik1598e6c2022-01-21 13:55:41 -08002631 { size_t const decompBytes = writeJob->bufferSize - strm.avail_out;
Nick Terrellaa8bcf32017-03-13 18:11:07 -07002632 if (decompBytes) {
Yonatan Komornik1598e6c2022-01-21 13:55:41 -08002633 writeJob->usedBufferSize = decompBytes;
Yonatan Komornik70df5de2022-01-24 14:43:02 -08002634 AIO_WritePool_enqueueAndReacquireWriteJob(&writeJob);
Nick Terrellaa8bcf32017-03-13 18:11:07 -07002635 outFileSize += decompBytes;
Alex Xu886de7b2023-02-14 00:30:56 +00002636 strm.next_out = (BYTE*)writeJob->buffer;
Yonatan Komornik1598e6c2022-01-21 13:55:41 -08002637 strm.avail_out = writeJob->bufferSize;
Yann Collete12ae022017-05-16 17:32:33 -07002638 } }
Nick Terrellaa8bcf32017-03-13 18:11:07 -07002639 if (ret == LZMA_STREAM_END) break;
2640 }
2641
Yonatan Komornikcc0657f2022-01-31 15:43:41 -08002642 AIO_ReadPool_consumeBytes(ress->readCtx, ress->readCtx->srcBufferLoaded - strm.avail_in);
Nick Terrellaa8bcf32017-03-13 18:11:07 -07002643 lzma_end(&strm);
Yonatan Komornik70df5de2022-01-24 14:43:02 -08002644 AIO_WritePool_releaseIoJob(writeJob);
2645 AIO_WritePool_sparseWriteEnd(ress->writeCtx);
Yann Colletc34185d2017-07-03 10:27:16 -07002646 return decodingError ? FIO_ERROR_FRAME_DECODING : outFileSize;
Przemyslaw Skibinski19aad422016-12-01 11:56:31 +01002647}
2648#endif
2649
Sean Purcell4de86322017-04-24 16:48:25 -07002650#ifdef ZSTD_LZ4DECOMPRESS
Yann Collet0ee36092019-10-17 16:09:53 -07002651static unsigned long long
Yonatan Komornikcc0657f2022-01-31 15:43:41 -08002652FIO_decompressLz4Frame(dRess_t* ress, const char* srcFileName)
Sean Purcell4de86322017-04-24 16:48:25 -07002653{
2654 unsigned long long filesize = 0;
Yonatan Komornik1598e6c2022-01-21 13:55:41 -08002655 LZ4F_errorCode_t nextToLoad = 4;
Sean Purcell2c4b6fe2017-04-25 11:00:54 -07002656 LZ4F_decompressionContext_t dCtx;
Sean Purcell4de86322017-04-24 16:48:25 -07002657 LZ4F_errorCode_t const errorCode = LZ4F_createDecompressionContext(&dCtx, LZ4F_VERSION);
Yann Collete97ff3b2017-07-03 11:27:29 -07002658 int decodingError = 0;
Yonatan Komornik70df5de2022-01-24 14:43:02 -08002659 IOJob_t *writeJob = NULL;
Sean Purcell4de86322017-04-24 16:48:25 -07002660
Yann Collete97ff3b2017-07-03 11:27:29 -07002661 if (LZ4F_isError(errorCode)) {
Yann Collet6c351122017-07-03 13:24:50 -07002662 DISPLAYLEVEL(1, "zstd: failed to create lz4 decompression context \n");
Yann Collete97ff3b2017-07-03 11:27:29 -07002663 return FIO_ERROR_FRAME_DECODING;
2664 }
Sean Purcell4de86322017-04-24 16:48:25 -07002665
Yonatan Komornik70df5de2022-01-24 14:43:02 -08002666 writeJob = AIO_WritePool_acquireJob(ress->writeCtx);
2667
Sean Purcell4de86322017-04-24 16:48:25 -07002668 /* Main Loop */
2669 for (;nextToLoad;) {
Sean Purcell4de86322017-04-24 16:48:25 -07002670 size_t pos = 0;
Yonatan Komornik1598e6c2022-01-21 13:55:41 -08002671 size_t decodedBytes = writeJob->bufferSize;
2672 int fullBufferDecoded = 0;
Sean Purcell4de86322017-04-24 16:48:25 -07002673
2674 /* Read input */
Yonatan Komornikcc0657f2022-01-31 15:43:41 -08002675 AIO_ReadPool_fillBuffer(ress->readCtx, nextToLoad);
2676 if(!ress->readCtx->srcBufferLoaded) break; /* reached end of file */
Sean Purcell4de86322017-04-24 16:48:25 -07002677
Yonatan Komornikcc0657f2022-01-31 15:43:41 -08002678 while ((pos < ress->readCtx->srcBufferLoaded) || fullBufferDecoded) { /* still to read, or still to flush */
Sean Purcell4de86322017-04-24 16:48:25 -07002679 /* Decode Input (at least partially) */
Yonatan Komornikcc0657f2022-01-31 15:43:41 -08002680 size_t remaining = ress->readCtx->srcBufferLoaded - pos;
Yonatan Komornik1598e6c2022-01-21 13:55:41 -08002681 decodedBytes = writeJob->bufferSize;
Yonatan Komornikcc0657f2022-01-31 15:43:41 -08002682 nextToLoad = LZ4F_decompress(dCtx, writeJob->buffer, &decodedBytes, (char*)(ress->readCtx->srcBuffer)+pos,
2683 &remaining, NULL);
Yann Collete97ff3b2017-07-03 11:27:29 -07002684 if (LZ4F_isError(nextToLoad)) {
Yann Collet6c351122017-07-03 13:24:50 -07002685 DISPLAYLEVEL(1, "zstd: %s: lz4 decompression error : %s \n",
Yann Collete12ae022017-05-16 17:32:33 -07002686 srcFileName, LZ4F_getErrorName(nextToLoad));
cyan497362487b52018-04-23 18:50:16 -07002687 decodingError = 1; nextToLoad = 0; break;
Yann Collete97ff3b2017-07-03 11:27:29 -07002688 }
Sean Purcell4de86322017-04-24 16:48:25 -07002689 pos += remaining;
Yonatan Komornikcc0657f2022-01-31 15:43:41 -08002690 assert(pos <= ress->readCtx->srcBufferLoaded);
Yonatan Komornik1598e6c2022-01-21 13:55:41 -08002691 fullBufferDecoded = decodedBytes == writeJob->bufferSize;
Sean Purcell4de86322017-04-24 16:48:25 -07002692
2693 /* Write Block */
2694 if (decodedBytes) {
W. Felix Handte87e94e32021-06-10 12:31:42 -04002695 UTIL_HumanReadableSize_t hrs;
Yonatan Komornik1598e6c2022-01-21 13:55:41 -08002696 writeJob->usedBufferSize = decodedBytes;
Yonatan Komornik70df5de2022-01-24 14:43:02 -08002697 AIO_WritePool_enqueueAndReacquireWriteJob(&writeJob);
Sean Purcell4de86322017-04-24 16:48:25 -07002698 filesize += decodedBytes;
W. Felix Handte87e94e32021-06-10 12:31:42 -04002699 hrs = UTIL_makeHumanReadableSize(filesize);
Nick Terrellfbff7822022-01-07 15:07:28 -08002700 DISPLAYUPDATE_PROGRESS("\rDecompressed : %.*f%s ", hrs.precision, hrs.value, hrs.suffix);
Sean Purcell4de86322017-04-24 16:48:25 -07002701 }
2702
2703 if (!nextToLoad) break;
2704 }
Yonatan Komornikcc0657f2022-01-31 15:43:41 -08002705 AIO_ReadPool_consumeBytes(ress->readCtx, pos);
Sean Purcell4de86322017-04-24 16:48:25 -07002706 }
Yann Collete97ff3b2017-07-03 11:27:29 -07002707 if (nextToLoad!=0) {
Yann Collet6c351122017-07-03 13:24:50 -07002708 DISPLAYLEVEL(1, "zstd: %s: unfinished lz4 stream \n", srcFileName);
Yann Collete97ff3b2017-07-03 11:27:29 -07002709 decodingError=1;
2710 }
Sean Purcell4de86322017-04-24 16:48:25 -07002711
2712 LZ4F_freeDecompressionContext(dCtx);
Yonatan Komornik70df5de2022-01-24 14:43:02 -08002713 AIO_WritePool_releaseIoJob(writeJob);
2714 AIO_WritePool_sparseWriteEnd(ress->writeCtx);
Sean Purcell4de86322017-04-24 16:48:25 -07002715
Yann Collete97ff3b2017-07-03 11:27:29 -07002716 return decodingError ? FIO_ERROR_FRAME_DECODING : filesize;
Sean Purcell4de86322017-04-24 16:48:25 -07002717}
2718#endif
2719
2720
Przemyslaw Skibinski19aad422016-12-01 11:56:31 +01002721
Yann Colletc34185d2017-07-03 10:27:16 -07002722/** FIO_decompressFrames() :
2723 * Find and decode frames inside srcFile
2724 * srcFile presumed opened and valid
2725 * @return : 0 : OK
2726 * 1 : error
2727 */
senhuang4251234962020-09-07 13:13:05 -04002728static int FIO_decompressFrames(FIO_ctx_t* const fCtx,
Yonatan Komornikcc0657f2022-01-31 15:43:41 -08002729 dRess_t ress, const FIO_prefs_t* const prefs,
2730 const char* dstFileName, const char* srcFileName)
Yann Colletc34185d2017-07-03 10:27:16 -07002731{
2732 unsigned readSomething = 0;
2733 unsigned long long filesize = 0;
Nick Terrell03cc84f2022-08-04 17:15:59 -07002734 int passThrough = prefs->passThrough;
2735
2736 if (passThrough == -1) {
2737 /* If pass-through mode is not explicitly enabled or disabled,
2738 * default to the legacy behavior of enabling it if we are writing
2739 * to stdout with the overwrite flag enabled.
2740 */
2741 passThrough = prefs->overwrite && !strcmp(dstFileName, stdoutmark);
2742 }
2743 assert(passThrough == 0 || passThrough == 1);
Yann Colletc34185d2017-07-03 10:27:16 -07002744
2745 /* for each frame */
2746 for ( ; ; ) {
2747 /* check magic number -> version */
2748 size_t const toRead = 4;
Yonatan Komornikcc0657f2022-01-31 15:43:41 -08002749 const BYTE* buf;
2750 AIO_ReadPool_fillBuffer(ress.readCtx, toRead);
2751 buf = (const BYTE*)ress.readCtx->srcBuffer;
2752 if (ress.readCtx->srcBufferLoaded==0) {
Yann Colletb8280fe2017-07-03 15:14:55 -07002753 if (readSomething==0) { /* srcFile is empty (which is invalid) */
Yann Colletc34185d2017-07-03 10:27:16 -07002754 DISPLAYLEVEL(1, "zstd: %s: unexpected end of file \n", srcFileName);
2755 return 1;
Yann Colletb8280fe2017-07-03 15:14:55 -07002756 } /* else, just reached frame boundary */
Yann Colletc34185d2017-07-03 10:27:16 -07002757 break; /* no more input */
2758 }
2759 readSomething = 1; /* there is at least 1 byte in srcFile */
Chris Burgess2b9fde92022-07-29 15:22:46 -04002760 if (ress.readCtx->srcBufferLoaded < toRead) { /* not enough input to check magic number */
Nick Terrell03cc84f2022-08-04 17:15:59 -07002761 if (passThrough) {
Chris Burgess2b9fde92022-07-29 15:22:46 -04002762 return FIO_passThrough(&ress);
2763 }
Yann Colletc34185d2017-07-03 10:27:16 -07002764 DISPLAYLEVEL(1, "zstd: %s: unknown header \n", srcFileName);
2765 return 1;
2766 }
Yonatan Komornikcc0657f2022-01-31 15:43:41 -08002767 if (ZSTD_isFrame(buf, ress.readCtx->srcBufferLoaded)) {
2768 unsigned long long const frameSize = FIO_decompressZstdFrame(fCtx, &ress, prefs, srcFileName, filesize);
Yann Colletb8280fe2017-07-03 15:14:55 -07002769 if (frameSize == FIO_ERROR_FRAME_DECODING) return 1;
2770 filesize += frameSize;
2771 } else if (buf[0] == 31 && buf[1] == 139) { /* gz magic number */
Yann Colletc34185d2017-07-03 10:27:16 -07002772#ifdef ZSTD_GZDECOMPRESS
Yonatan Komornikcc0657f2022-01-31 15:43:41 -08002773 unsigned long long const frameSize = FIO_decompressGzFrame(&ress, srcFileName);
Yann Collet368b9742017-07-03 13:47:46 -07002774 if (frameSize == FIO_ERROR_FRAME_DECODING) return 1;
2775 filesize += frameSize;
Yann Colletc34185d2017-07-03 10:27:16 -07002776#else
2777 DISPLAYLEVEL(1, "zstd: %s: gzip file cannot be uncompressed (zstd compiled without HAVE_ZLIB) -- ignored \n", srcFileName);
2778 return 1;
2779#endif
2780 } else if ((buf[0] == 0xFD && buf[1] == 0x37) /* xz magic number */
2781 || (buf[0] == 0x5D && buf[1] == 0x00)) { /* lzma header (no magic number) */
2782#ifdef ZSTD_LZMADECOMPRESS
Yonatan Komornikcc0657f2022-01-31 15:43:41 -08002783 unsigned long long const frameSize = FIO_decompressLzmaFrame(&ress, srcFileName, buf[0] != 0xFD);
Yann Colletc34185d2017-07-03 10:27:16 -07002784 if (frameSize == FIO_ERROR_FRAME_DECODING) return 1;
2785 filesize += frameSize;
2786#else
2787 DISPLAYLEVEL(1, "zstd: %s: xz/lzma file cannot be uncompressed (zstd compiled without HAVE_LZMA) -- ignored \n", srcFileName);
2788 return 1;
2789#endif
2790 } else if (MEM_readLE32(buf) == LZ4_MAGICNUMBER) {
2791#ifdef ZSTD_LZ4DECOMPRESS
Yonatan Komornikcc0657f2022-01-31 15:43:41 -08002792 unsigned long long const frameSize = FIO_decompressLz4Frame(&ress, srcFileName);
Yann Collete97ff3b2017-07-03 11:27:29 -07002793 if (frameSize == FIO_ERROR_FRAME_DECODING) return 1;
2794 filesize += frameSize;
Yann Colletc34185d2017-07-03 10:27:16 -07002795#else
2796 DISPLAYLEVEL(1, "zstd: %s: lz4 file cannot be uncompressed (zstd compiled without HAVE_LZ4) -- ignored \n", srcFileName);
2797 return 1;
2798#endif
Nick Terrell03cc84f2022-08-04 17:15:59 -07002799 } else if (passThrough) {
Yonatan Komornikcc0657f2022-01-31 15:43:41 -08002800 return FIO_passThrough(&ress);
Yann Colletc34185d2017-07-03 10:27:16 -07002801 } else {
2802 DISPLAYLEVEL(1, "zstd: %s: unsupported format \n", srcFileName);
2803 return 1;
2804 } } /* for each frame */
2805
2806 /* Final Status */
senhuang4228a9dc72020-09-03 20:23:30 -04002807 fCtx->totalBytesOutput += (size_t)filesize;
Nick Terrellfbff7822022-01-07 15:07:28 -08002808 DISPLAY_PROGRESS("\r%79s\r", "");
2809 if (FIO_shouldDisplayFileSummary(fCtx))
2810 DISPLAY_SUMMARY("%-20s: %llu bytes \n", srcFileName, filesize);
Yann Colletc34185d2017-07-03 10:27:16 -07002811
2812 return 0;
2813}
2814
Yann Colletc7bd6a42018-10-01 14:04:00 -07002815/** FIO_decompressDstFile() :
Yonatan Komornikcc0657f2022-01-31 15:43:41 -08002816 open `dstFileName`, or pass-through if writeCtx's file is already != 0,
Yann Colletc7bd6a42018-10-01 14:04:00 -07002817 then start decompression process (FIO_decompressFrames()).
2818 @return : 0 : OK
2819 1 : operation aborted
2820*/
senhuang4251234962020-09-07 13:13:05 -04002821static int FIO_decompressDstFile(FIO_ctx_t* const fCtx,
2822 FIO_prefs_t* const prefs,
Yonatan Komornikcc0657f2022-01-31 15:43:41 -08002823 dRess_t ress,
W. Felix Handte5653f962023-01-17 14:05:15 -08002824 const char* dstFileName,
2825 const char* srcFileName,
2826 const stat_t* srcFileStat)
Yann Colletc7bd6a42018-10-01 14:04:00 -07002827{
2828 int result;
Yann Colletc7bd6a42018-10-01 14:04:00 -07002829 int releaseDstFile = 0;
W. Felix Handte1e3eba62023-01-17 15:08:15 -08002830 int transferStat = 0;
W. Felix Handtef746c372023-02-06 08:05:47 -08002831 int dstFd = 0;
Yann Colletc7bd6a42018-10-01 14:04:00 -07002832
Yonatan Komornik70df5de2022-01-24 14:43:02 -08002833 if ((AIO_WritePool_getFile(ress.writeCtx) == NULL) && (prefs->testMode == 0)) {
Yonatan Komornik1598e6c2022-01-21 13:55:41 -08002834 FILE *dstFile;
W. Felix Handteb87f97b2021-03-08 17:39:14 -05002835 int dstFilePermissions = DEFAULT_FILE_PERMISSIONS;
2836 if ( strcmp(srcFileName, stdinmark) /* special case : don't transfer permissions from stdin */
Mike Gilbert57a86d92022-01-13 16:47:18 -05002837 && strcmp(dstFileName, stdoutmark)
W. Felix Handtea5ed28f2023-01-17 14:08:22 -08002838 && UTIL_isRegularFileStat(srcFileStat) ) {
W. Felix Handte1e3eba62023-01-17 15:08:15 -08002839 transferStat = 1;
2840 dstFilePermissions = TEMPORARY_FILE_PERMISSIONS;
W. Felix Handteb87f97b2021-03-08 17:39:14 -05002841 }
2842
Yann Colletc7bd6a42018-10-01 14:04:00 -07002843 releaseDstFile = 1;
2844
Yonatan Komornik1598e6c2022-01-21 13:55:41 -08002845 dstFile = FIO_openDstFile(fCtx, prefs, srcFileName, dstFileName, dstFilePermissions);
2846 if (dstFile==NULL) return 1;
W. Felix Handtef746c372023-02-06 08:05:47 -08002847 dstFd = fileno(dstFile);
Yonatan Komornik70df5de2022-01-24 14:43:02 -08002848 AIO_WritePool_setFile(ress.writeCtx, dstFile);
Yann Colletc7bd6a42018-10-01 14:04:00 -07002849
2850 /* Must only be added after FIO_openDstFile() succeeds.
2851 * Otherwise we may delete the destination file if it already exists,
2852 * and the user presses Ctrl-C when asked if they wish to overwrite.
2853 */
2854 addHandler(dstFileName);
Yann Colletc7bd6a42018-10-01 14:04:00 -07002855 }
2856
Yonatan Komornikcc0657f2022-01-31 15:43:41 -08002857 result = FIO_decompressFrames(fCtx, ress, prefs, dstFileName, srcFileName);
Yann Colletc7bd6a42018-10-01 14:04:00 -07002858
2859 if (releaseDstFile) {
Yann Colletc7bd6a42018-10-01 14:04:00 -07002860 clearHandler();
W. Felix Handtef746c372023-02-06 08:05:47 -08002861
2862 if (transferStat) {
2863 UTIL_setFDStat(dstFd, dstFileName, srcFileStat);
2864 }
2865
Yonatan Komornik70df5de2022-01-24 14:43:02 -08002866 if (AIO_WritePool_closeFile(ress.writeCtx)) {
Yann Colletc7bd6a42018-10-01 14:04:00 -07002867 DISPLAYLEVEL(1, "zstd: %s: %s \n", dstFileName, strerror(errno));
2868 result = 1;
2869 }
2870
W. Felix Handte1e3eba62023-01-17 15:08:15 -08002871 if (transferStat) {
W. Felix Handtef746c372023-02-06 08:05:47 -08002872 UTIL_utime(dstFileName, srcFileStat);
W. Felix Handte9cd6c1f2021-08-04 14:49:56 -04002873 }
2874
Yann Colletc7bd6a42018-10-01 14:04:00 -07002875 if ( (result != 0) /* operation failure */
Yann Colletc7bd6a42018-10-01 14:04:00 -07002876 && strcmp(dstFileName, stdoutmark) /* special case : don't remove() stdout */
2877 ) {
W. Felix Handteb02cdf62020-08-10 15:39:14 -04002878 FIO_removeFile(dstFileName); /* remove decompression artefact; note: don't do anything special if remove() fails */
Yann Colletc7bd6a42018-10-01 14:04:00 -07002879 }
Yann Colletc7bd6a42018-10-01 14:04:00 -07002880 }
2881
2882 return result;
2883}
2884
Yann Colletc34185d2017-07-03 10:27:16 -07002885
Yann Collet1f1f2392016-02-12 18:33:26 +01002886/** FIO_decompressSrcFile() :
Yann Colletc7bd6a42018-10-01 14:04:00 -07002887 Open `srcFileName`, transfer control to decompressDstFile()
Yann Collet1f1f2392016-02-12 18:33:26 +01002888 @return : 0 : OK
Yann Colletc7bd6a42018-10-01 14:04:00 -07002889 1 : error
Yann Collet1f1f2392016-02-12 18:33:26 +01002890*/
senhuang4251234962020-09-07 13:13:05 -04002891static int FIO_decompressSrcFile(FIO_ctx_t* const fCtx, FIO_prefs_t* const prefs, dRess_t ress, const char* dstFileName, const char* srcFileName)
Yann Colletb1f3f4b2015-10-18 22:18:32 +01002892{
Przemyslaw Skibinskib0f2ef22016-12-02 13:50:29 +01002893 FILE* srcFile;
W. Felix Handte2ad68552023-01-17 14:01:06 -08002894 stat_t srcFileStat;
Yann Colletc34185d2017-07-03 10:27:16 -07002895 int result;
Yonatan Komornik79bdb8c2023-02-02 15:19:22 -08002896 U64 fileSize = UTIL_FILESIZE_UNKNOWN;
Przemyslaw Skibinski0e146752016-11-30 13:34:21 +01002897
Yann Colletb09b12c2016-06-09 22:59:51 +02002898 if (UTIL_isDirectory(srcFileName)) {
2899 DISPLAYLEVEL(1, "zstd: %s is a directory -- ignored \n", srcFileName);
2900 return 1;
2901 }
Przemyslaw Skibinski0e146752016-11-30 13:34:21 +01002902
W. Felix Handte2ad68552023-01-17 14:01:06 -08002903 srcFile = FIO_openSrcFile(prefs, srcFileName, &srcFileStat);
Yann Collet0b9b8942017-02-27 00:27:30 -08002904 if (srcFile==NULL) return 1;
Yonatan Komornik79bdb8c2023-02-02 15:19:22 -08002905
2906 /* Don't use AsyncIO for small files */
2907 if (strcmp(srcFileName, stdinmark)) /* Stdin doesn't have stats */
2908 fileSize = UTIL_getFileSizeStat(&srcFileStat);
2909 if(fileSize != UTIL_FILESIZE_UNKNOWN && fileSize < ZSTD_BLOCKSIZE_MAX * 3) {
2910 AIO_ReadPool_setAsync(ress.readCtx, 0);
2911 AIO_WritePool_setAsync(ress.writeCtx, 0);
2912 } else {
2913 AIO_ReadPool_setAsync(ress.readCtx, 1);
2914 AIO_WritePool_setAsync(ress.writeCtx, 1);
2915 }
2916
Yonatan Komornikcc0657f2022-01-31 15:43:41 -08002917 AIO_ReadPool_setFile(ress.readCtx, srcFile);
Yann Collet88fcd292015-11-25 14:42:45 +01002918
W. Felix Handte5653f962023-01-17 14:05:15 -08002919 result = FIO_decompressDstFile(fCtx, prefs, ress, dstFileName, srcFileName, &srcFileStat);
Yonatan Komornikcc0657f2022-01-31 15:43:41 -08002920
2921 AIO_ReadPool_setFile(ress.readCtx, NULL);
Yann Colletb1f3f4b2015-10-18 22:18:32 +01002922
Przemyslaw Skibinskib0f2ef22016-12-02 13:50:29 +01002923 /* Close file */
Yann Colletc34185d2017-07-03 10:27:16 -07002924 if (fclose(srcFile)) {
Yann Collet92889702017-09-18 13:41:54 -07002925 DISPLAYLEVEL(1, "zstd: %s: %s \n", srcFileName, strerror(errno)); /* error should not happen */
Yann Colletc34185d2017-07-03 10:27:16 -07002926 return 1;
2927 }
Karl Ostmo5e220bf2019-01-22 17:31:13 -08002928 if ( prefs->removeSrcFile /* --rm */
Yann Colletc7bd6a42018-10-01 14:04:00 -07002929 && (result==0) /* decompression successful */
Yann Collet6c351122017-07-03 13:24:50 -07002930 && strcmp(srcFileName, stdinmark) ) /* not stdin */ {
Nick Terrella6052af2017-11-17 16:38:56 -08002931 /* We must clear the handler, since after this point calling it would
2932 * delete both the source and destination files.
2933 */
2934 clearHandler();
W. Felix Handteb02cdf62020-08-10 15:39:14 -04002935 if (FIO_removeFile(srcFileName)) {
Yann Collet6c351122017-07-03 13:24:50 -07002936 /* failed to remove src file */
2937 DISPLAYLEVEL(1, "zstd: %s: %s \n", srcFileName, strerror(errno));
2938 return 1;
2939 } }
Yann Colletc34185d2017-07-03 10:27:16 -07002940 return result;
Yann Collet1f1f2392016-02-12 18:33:26 +01002941}
Yann Colletb1f3f4b2015-10-18 22:18:32 +01002942
Yann Collet1f1f2392016-02-12 18:33:26 +01002943
Yann Colletb1f3f4b2015-10-18 22:18:32 +01002944
senhuang4251234962020-09-07 13:13:05 -04002945int FIO_decompressFilename(FIO_ctx_t* const fCtx, FIO_prefs_t* const prefs,
Karl Ostmo5e220bf2019-01-22 17:31:13 -08002946 const char* dstFileName, const char* srcFileName,
Yann Colletdeb078b2015-12-17 20:30:14 +01002947 const char* dictFileName)
2948{
Karl Ostmo5e220bf2019-01-22 17:31:13 -08002949 dRess_t const ress = FIO_createDResources(prefs, dictFileName);
Yann Colletdeb078b2015-12-17 20:30:14 +01002950
senhuang4251234962020-09-07 13:13:05 -04002951 int const decodingError = FIO_decompressSrcFile(fCtx, prefs, ress, dstFileName, srcFileName);
Yann Colletdeb078b2015-12-17 20:30:14 +01002952
Danielle Rozenblit70850eb2023-03-08 16:54:57 -08002953
2954
Yann Colletdeb078b2015-12-17 20:30:14 +01002955 FIO_freeDResources(ress);
Yann Collet6c351122017-07-03 13:24:50 -07002956 return decodingError;
Yann Colletdeb078b2015-12-17 20:30:14 +01002957}
2958
W. Felix Handte91c3f542019-10-24 20:18:57 -04002959static const char *suffixList[] = {
2960 ZSTD_EXTENSION,
2961 TZSTD_EXTENSION,
senhuang426b6cc802020-09-18 12:49:51 -04002962#ifndef ZSTD_NODECOMPRESS
2963 ZSTD_ALT_EXTENSION,
2964#endif
W. Felix Handte91c3f542019-10-24 20:18:57 -04002965#ifdef ZSTD_GZDECOMPRESS
2966 GZ_EXTENSION,
2967 TGZ_EXTENSION,
2968#endif
2969#ifdef ZSTD_LZMADECOMPRESS
2970 LZMA_EXTENSION,
2971 XZ_EXTENSION,
2972 TXZ_EXTENSION,
2973#endif
2974#ifdef ZSTD_LZ4DECOMPRESS
2975 LZ4_EXTENSION,
2976 TLZ4_EXTENSION,
2977#endif
2978 NULL
2979};
2980
2981static const char *suffixListStr =
2982 ZSTD_EXTENSION "/" TZSTD_EXTENSION
2983#ifdef ZSTD_GZDECOMPRESS
2984 "/" GZ_EXTENSION "/" TGZ_EXTENSION
2985#endif
2986#ifdef ZSTD_LZMADECOMPRESS
2987 "/" LZMA_EXTENSION "/" XZ_EXTENSION "/" TXZ_EXTENSION
2988#endif
2989#ifdef ZSTD_LZ4DECOMPRESS
2990 "/" LZ4_EXTENSION "/" TLZ4_EXTENSION
2991#endif
2992;
Yann Colletdeb078b2015-12-17 20:30:14 +01002993
Yann Collet1ab71a82018-09-28 18:19:23 -07002994/* FIO_determineDstName() :
2995 * create a destination filename from a srcFileName.
2996 * @return a pointer to it.
2997 * @return == NULL if there is an error */
2998static const char*
Sen Huang64bc4412019-10-03 13:53:04 -04002999FIO_determineDstName(const char* srcFileName, const char* outDirName)
Yann Collet1ab71a82018-09-28 18:19:23 -07003000{
3001 static size_t dfnbCapacity = 0;
3002 static char* dstFileNameBuffer = NULL; /* using static allocation : this function cannot be multi-threaded */
W. Felix Handte91c3f542019-10-24 20:18:57 -04003003 size_t dstFileNameEndPos;
Sen Huang6b81bfb2019-10-03 15:23:49 -04003004 char* outDirFilename = NULL;
W. Felix Handte91c3f542019-10-24 20:18:57 -04003005 const char* dstSuffix = "";
3006 size_t dstSuffixLen = 0;
Sergey Ponomarevb804dd32019-09-14 21:14:43 +03003007
Sen Huang64bc4412019-10-03 13:53:04 -04003008 size_t sfnSize = strlen(srcFileName);
Yann Collet17951332019-10-17 15:32:03 -07003009
W. Felix Handte91c3f542019-10-24 20:18:57 -04003010 size_t srcSuffixLen;
3011 const char* const srcSuffix = strrchr(srcFileName, '.');
Yonatan Komornikae467042022-07-29 16:13:07 -07003012
3013 if(!strcmp(srcFileName, stdinmark)) {
3014 return stdoutmark;
3015 }
3016
W. Felix Handte91c3f542019-10-24 20:18:57 -04003017 if (srcSuffix == NULL) {
3018 DISPLAYLEVEL(1,
3019 "zstd: %s: unknown suffix (%s expected). "
3020 "Can't derive the output file name. "
3021 "Specify it with -o dstFileName. Ignoring.\n",
3022 srcFileName, suffixListStr);
Yann Collet1ab71a82018-09-28 18:19:23 -07003023 return NULL;
3024 }
W. Felix Handte91c3f542019-10-24 20:18:57 -04003025 srcSuffixLen = strlen(srcSuffix);
Yann Collet1ab71a82018-09-28 18:19:23 -07003026
W. Felix Handte91c3f542019-10-24 20:18:57 -04003027 {
3028 const char** matchedSuffixPtr;
3029 for (matchedSuffixPtr = suffixList; *matchedSuffixPtr != NULL; matchedSuffixPtr++) {
3030 if (!strcmp(*matchedSuffixPtr, srcSuffix)) {
3031 break;
3032 }
3033 }
3034
3035 /* check suffix is authorized */
3036 if (sfnSize <= srcSuffixLen || *matchedSuffixPtr == NULL) {
3037 DISPLAYLEVEL(1,
3038 "zstd: %s: unknown suffix (%s expected). "
3039 "Can't derive the output file name. "
3040 "Specify it with -o dstFileName. Ignoring.\n",
3041 srcFileName, suffixListStr);
3042 return NULL;
3043 }
3044
3045 if ((*matchedSuffixPtr)[1] == 't') {
3046 dstSuffix = ".tar";
3047 dstSuffixLen = strlen(dstSuffix);
3048 }
Yann Collet1ab71a82018-09-28 18:19:23 -07003049 }
W. Felix Handte91c3f542019-10-24 20:18:57 -04003050
Sen Huang64bc4412019-10-03 13:53:04 -04003051 if (outDirName) {
Sen Huang6b81bfb2019-10-03 15:23:49 -04003052 outDirFilename = FIO_createFilename_fromOutDir(srcFileName, outDirName, 0);
3053 sfnSize = strlen(outDirFilename);
3054 assert(outDirFilename != NULL);
Sen Huang64bc4412019-10-03 13:53:04 -04003055 }
Yann Collet1ab71a82018-09-28 18:19:23 -07003056
W. Felix Handte91c3f542019-10-24 20:18:57 -04003057 if (dfnbCapacity+srcSuffixLen <= sfnSize+1+dstSuffixLen) {
Sen Huang64bc4412019-10-03 13:53:04 -04003058 /* allocate enough space to write dstFilename into it */
Yann Collet1ab71a82018-09-28 18:19:23 -07003059 free(dstFileNameBuffer);
3060 dfnbCapacity = sfnSize + 20;
3061 dstFileNameBuffer = (char*)malloc(dfnbCapacity);
3062 if (dstFileNameBuffer==NULL)
W. Felix Handte91c3f542019-10-24 20:18:57 -04003063 EXM_THROW(74, "%s : not enough memory for dstFileName",
3064 strerror(errno));
Yann Collet1ab71a82018-09-28 18:19:23 -07003065 }
3066
3067 /* return dst name == src name truncated from suffix */
Yann Collet3ca62612018-10-02 15:59:11 -07003068 assert(dstFileNameBuffer != NULL);
W. Felix Handte91c3f542019-10-24 20:18:57 -04003069 dstFileNameEndPos = sfnSize - srcSuffixLen;
Sen Huang6b81bfb2019-10-03 15:23:49 -04003070 if (outDirFilename) {
Felix Handte506e1a12019-10-24 17:49:34 -04003071 memcpy(dstFileNameBuffer, outDirFilename, dstFileNameEndPos);
Sen Huang6b81bfb2019-10-03 15:23:49 -04003072 free(outDirFilename);
Sen Huang64bc4412019-10-03 13:53:04 -04003073 } else {
Felix Handte506e1a12019-10-24 17:49:34 -04003074 memcpy(dstFileNameBuffer, srcFileName, dstFileNameEndPos);
Sen Huang64bc4412019-10-03 13:53:04 -04003075 }
W. Felix Handte91c3f542019-10-24 20:18:57 -04003076
3077 /* The short tar extensions tzst, tgz, txz and tlz4 files should have "tar"
3078 * extension on decompression. Also writes terminating null. */
3079 strcpy(dstFileNameBuffer + dstFileNameEndPos, dstSuffix);
Yann Collet1ab71a82018-09-28 18:19:23 -07003080 return dstFileNameBuffer;
3081
3082 /* note : dstFileNameBuffer memory is not going to be free */
3083}
3084
Yann Colletc7bd6a42018-10-01 14:04:00 -07003085int
senhuang4251234962020-09-07 13:13:05 -04003086FIO_decompressMultipleFilenames(FIO_ctx_t* const fCtx,
3087 FIO_prefs_t* const prefs,
senhuang42b6abbc32020-08-26 11:35:07 -04003088 const char** srcNamesTable,
Xin Xie9a8ccd42020-06-19 19:35:51 -07003089 const char* outMirroredRootDirName,
Sen Huang64bc4412019-10-03 13:53:04 -04003090 const char* outDirName, const char* outFileName,
Yann Colletc7bd6a42018-10-01 14:04:00 -07003091 const char* dictFileName)
Yann Colletdeb078b2015-12-17 20:30:14 +01003092{
senhuang42202b2952020-09-03 09:28:40 -04003093 int status;
Yann Colletc7bd6a42018-10-01 14:04:00 -07003094 int error = 0;
Karl Ostmo5e220bf2019-01-22 17:31:13 -08003095 dRess_t ress = FIO_createDResources(prefs, dictFileName);
Yann Colletdeb078b2015-12-17 20:30:14 +01003096
Nick Terrell4680e852017-12-12 18:32:50 -08003097 if (outFileName) {
Yann Collet8c85b292023-01-23 18:55:51 -08003098 if (FIO_multiFilesConcatWarning(fCtx, prefs, outFileName, 1 /* displayLevelCutoff */)) {
senhuang427e867ad2020-08-26 18:52:32 -04003099 FIO_freeDResources(ress);
senhuang427991c552020-08-26 16:50:20 -04003100 return 1;
senhuang427e867ad2020-08-26 18:52:32 -04003101 }
Yann Colletcaf40d02019-10-17 16:58:49 -07003102 if (!prefs->testMode) {
Yonatan Komornik1598e6c2022-01-21 13:55:41 -08003103 FILE* dstFile = FIO_openDstFile(fCtx, prefs, NULL, outFileName, DEFAULT_FILE_PERMISSIONS);
3104 if (dstFile == 0) EXM_THROW(19, "cannot open %s", outFileName);
Yonatan Komornik70df5de2022-01-24 14:43:02 -08003105 AIO_WritePool_setFile(ress.writeCtx, dstFile);
Yann Colletcaf40d02019-10-17 16:58:49 -07003106 }
senhuang42a6414f12020-09-01 12:32:18 -04003107 for (; fCtx->currFileIdx < fCtx->nbFilesTotal; fCtx->currFileIdx++) {
senhuang4251234962020-09-07 13:13:05 -04003108 status = FIO_decompressSrcFile(fCtx, prefs, ress, outFileName, srcNamesTable[fCtx->currFileIdx]);
senhuang427842f432020-09-03 09:22:07 -04003109 if (!status) fCtx->nbFilesProcessed++;
senhuang42a6414f12020-09-01 12:32:18 -04003110 error |= status;
senhuang42a3401ca2020-08-25 17:23:47 -04003111 }
Yonatan Komornik70df5de2022-01-24 14:43:02 -08003112 if ((!prefs->testMode) && (AIO_WritePool_closeFile(ress.writeCtx)))
Yann Collet0f2d4432018-12-19 17:25:58 -08003113 EXM_THROW(72, "Write error : %s : cannot properly close output file",
3114 strerror(errno));
Yann Colletaccfd802016-02-15 19:33:16 +01003115 } else {
Xin Xie9a8ccd42020-06-19 19:35:51 -07003116 if (outMirroredRootDirName)
Yann Collet0d793a62021-01-06 01:35:52 -08003117 UTIL_mirrorSourceFilesDirectories(srcNamesTable, (unsigned)fCtx->nbFilesTotal, outMirroredRootDirName);
Xin Xie9a8ccd42020-06-19 19:35:51 -07003118
senhuang42a6414f12020-09-01 12:32:18 -04003119 for (; fCtx->currFileIdx < fCtx->nbFilesTotal; fCtx->currFileIdx++) { /* create dstFileName */
3120 const char* const srcFileName = srcNamesTable[fCtx->currFileIdx];
Xin Xie9a8ccd42020-06-19 19:35:51 -07003121 const char* dstFileName = NULL;
3122 if (outMirroredRootDirName) {
3123 char* validMirroredDirName = UTIL_createMirroredDestDirName(srcFileName, outMirroredRootDirName);
3124 if (validMirroredDirName) {
3125 dstFileName = FIO_determineDstName(srcFileName, validMirroredDirName);
3126 free(validMirroredDirName);
3127 } else {
3128 DISPLAYLEVEL(2, "zstd: --output-dir-mirror cannot decompress '%s' into '%s'\n", srcFileName, outMirroredRootDirName);
3129 }
3130 } else {
3131 dstFileName = FIO_determineDstName(srcFileName, outDirName);
3132 }
Yann Colletc7bd6a42018-10-01 14:04:00 -07003133 if (dstFileName == NULL) { error=1; continue; }
senhuang4251234962020-09-07 13:13:05 -04003134 status = FIO_decompressSrcFile(fCtx, prefs, ress, dstFileName, srcFileName);
senhuang427842f432020-09-03 09:22:07 -04003135 if (!status) fCtx->nbFilesProcessed++;
senhuang42a6414f12020-09-01 12:32:18 -04003136 error |= status;
Yann Collet8b23eea2016-05-10 05:37:43 +02003137 }
Sen Huangc5ebb372019-10-09 09:39:52 -04003138 if (outDirName)
Yann Collet0d793a62021-01-06 01:35:52 -08003139 FIO_checkFilenameCollisions(srcNamesTable , (unsigned)fCtx->nbFilesTotal);
Yann Collet8b23eea2016-05-10 05:37:43 +02003140 }
Yann Collet0d793a62021-01-06 01:35:52 -08003141
Nick Terrellfbff7822022-01-07 15:07:28 -08003142 if (FIO_shouldDisplayMultipleFileSummary(fCtx)) {
3143 DISPLAY_PROGRESS("\r%79s\r", "");
Yann Colletea684c32023-01-18 15:38:36 -08003144 DISPLAY_SUMMARY("%d files decompressed : %6llu bytes total \n",
3145 fCtx->nbFilesProcessed, (unsigned long long)fCtx->totalBytesOutput);
Nick Terrellfbff7822022-01-07 15:07:28 -08003146 }
Yann Colletdeb078b2015-12-17 20:30:14 +01003147
3148 FIO_freeDResources(ress);
Yann Colletc7bd6a42018-10-01 14:04:00 -07003149 return error;
Yann Colletdeb078b2015-12-17 20:30:14 +01003150}
Yann Colletaccfd802016-02-15 19:33:16 +01003151
Yann Collet166645e2017-08-18 18:30:41 -07003152/* **************************************************************************
3153 * .zst file info (--list command)
3154 ***************************************************************************/
3155
3156typedef struct {
Nick Terrell6dd958e2017-10-04 12:23:23 -07003157 U64 decompressedSize;
3158 U64 compressedSize;
3159 U64 windowSize;
Yann Collet166645e2017-08-18 18:30:41 -07003160 int numActualFrames;
3161 int numSkippableFrames;
Yann Collet166645e2017-08-18 18:30:41 -07003162 int decompUnavailable;
Yann Collet166645e2017-08-18 18:30:41 -07003163 int usesCheck;
FRexdc394092022-10-20 21:46:50 +02003164 BYTE checksum[4];
Yann Collet56f1f0e2017-09-26 11:21:36 -07003165 U32 nbFiles;
htnhancc8c9842022-07-05 21:28:33 -05003166 unsigned dictID;
Yann Collet166645e2017-08-18 18:30:41 -07003167} fileInfo_t;
3168
Karl Ostmo4fa585a2019-01-14 17:58:46 -08003169typedef enum {
3170 info_success=0,
3171 info_frame_error=1,
3172 info_not_zstd=2,
3173 info_file_error=3,
Yann Colletea684c32023-01-18 15:38:36 -08003174 info_truncated_input=4
Karl Ostmo4fa585a2019-01-14 17:58:46 -08003175} InfoError;
Yann Collet166645e2017-08-18 18:30:41 -07003176
Yann Colletec1cb8e2018-09-28 16:04:00 -07003177#define ERROR_IF(c,n,...) { \
Yann Collet9b45db72018-09-27 16:49:08 -07003178 if (c) { \
3179 DISPLAYLEVEL(1, __VA_ARGS__); \
3180 DISPLAYLEVEL(1, " \n"); \
3181 return n; \
3182 } \
3183}
3184
3185static InfoError
3186FIO_analyzeFrames(fileInfo_t* info, FILE* const srcFile)
3187{
Yann Collet166645e2017-08-18 18:30:41 -07003188 /* begin analyzing frame */
3189 for ( ; ; ) {
3190 BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX];
3191 size_t const numBytesRead = fread(headerBuffer, 1, sizeof(headerBuffer), srcFile);
Nick Terrellb1ec94e2019-10-21 19:42:14 -07003192 if (numBytesRead < ZSTD_FRAMEHEADERSIZE_MIN(ZSTD_f_zstd1)) {
Yann Collet56f1f0e2017-09-26 11:21:36 -07003193 if ( feof(srcFile)
3194 && (numBytesRead == 0)
Yann Collet18b79532017-10-17 16:14:25 -07003195 && (info->compressedSize > 0)
3196 && (info->compressedSize != UTIL_FILESIZE_UNKNOWN) ) {
Karl Ostmo4fa585a2019-01-14 17:58:46 -08003197 unsigned long long file_position = (unsigned long long) LONG_TELL(srcFile);
3198 unsigned long long file_size = (unsigned long long) info->compressedSize;
3199 ERROR_IF(file_position != file_size, info_truncated_input,
3200 "Error: seeked to position %llu, which is beyond file size of %llu\n",
3201 file_position,
3202 file_size);
Yann Colletd987ab52018-09-28 09:34:16 -07003203 break; /* correct end of file => success */
Yann Collet166645e2017-08-18 18:30:41 -07003204 }
Yann Colletec1cb8e2018-09-28 16:04:00 -07003205 ERROR_IF(feof(srcFile), info_not_zstd, "Error: reached end of file with incomplete frame");
3206 ERROR_IF(1, info_frame_error, "Error: did not reach end of file but ran out of frames");
Yann Collet166645e2017-08-18 18:30:41 -07003207 }
3208 { U32 const magicNumber = MEM_readLE32(headerBuffer);
3209 /* Zstandard frame */
3210 if (magicNumber == ZSTD_MAGICNUMBER) {
Nick Terrell6dd958e2017-10-04 12:23:23 -07003211 ZSTD_frameHeader header;
Yann Collet166645e2017-08-18 18:30:41 -07003212 U64 const frameContentSize = ZSTD_getFrameContentSize(headerBuffer, numBytesRead);
Yann Collet9b45db72018-09-27 16:49:08 -07003213 if ( frameContentSize == ZSTD_CONTENTSIZE_ERROR
3214 || frameContentSize == ZSTD_CONTENTSIZE_UNKNOWN ) {
Yann Collet166645e2017-08-18 18:30:41 -07003215 info->decompUnavailable = 1;
3216 } else {
3217 info->decompressedSize += frameContentSize;
3218 }
Yann Colletec1cb8e2018-09-28 16:04:00 -07003219 ERROR_IF(ZSTD_getFrameHeader(&header, headerBuffer, numBytesRead) != 0,
Yann Collet9b45db72018-09-27 16:49:08 -07003220 info_frame_error, "Error: could not decode frame header");
htnhand7eb8292022-07-08 12:20:50 -05003221 if (info->dictID != 0 && info->dictID != header.dictID) {
3222 DISPLAY("WARNING: File contains multiple frames with different dictionary IDs. Showing dictID 0 instead");
3223 info->dictID = 0;
3224 } else {
3225 info->dictID = header.dictID;
3226 }
Nick Terrell6dd958e2017-10-04 12:23:23 -07003227 info->windowSize = header.windowSize;
Yann Collet166645e2017-08-18 18:30:41 -07003228 /* move to the end of the frame header */
3229 { size_t const headerSize = ZSTD_frameHeaderSize(headerBuffer, numBytesRead);
Yann Colletec1cb8e2018-09-28 16:04:00 -07003230 ERROR_IF(ZSTD_isError(headerSize), info_frame_error, "Error: could not determine frame header size");
3231 ERROR_IF(fseek(srcFile, ((long)headerSize)-((long)numBytesRead), SEEK_CUR) != 0,
Yann Collet9b45db72018-09-27 16:49:08 -07003232 info_frame_error, "Error: could not move to end of frame header");
3233 }
Yann Collet166645e2017-08-18 18:30:41 -07003234
Yann Collet9b45db72018-09-27 16:49:08 -07003235 /* skip all blocks in the frame */
Yann Collet166645e2017-08-18 18:30:41 -07003236 { int lastBlock = 0;
3237 do {
3238 BYTE blockHeaderBuffer[3];
Yann Colletec1cb8e2018-09-28 16:04:00 -07003239 ERROR_IF(fread(blockHeaderBuffer, 1, 3, srcFile) != 3,
Yann Collet9b45db72018-09-27 16:49:08 -07003240 info_frame_error, "Error while reading block header");
Yann Collet166645e2017-08-18 18:30:41 -07003241 { U32 const blockHeader = MEM_readLE24(blockHeaderBuffer);
3242 U32 const blockTypeID = (blockHeader >> 1) & 3;
3243 U32 const isRLE = (blockTypeID == 1);
3244 U32 const isWrongBlock = (blockTypeID == 3);
3245 long const blockSize = isRLE ? 1 : (long)(blockHeader >> 3);
Yann Colletec1cb8e2018-09-28 16:04:00 -07003246 ERROR_IF(isWrongBlock, info_frame_error, "Error: unsupported block type");
Yann Collet166645e2017-08-18 18:30:41 -07003247 lastBlock = blockHeader & 1;
Yann Colletec1cb8e2018-09-28 16:04:00 -07003248 ERROR_IF(fseek(srcFile, blockSize, SEEK_CUR) != 0,
Yann Collet9b45db72018-09-27 16:49:08 -07003249 info_frame_error, "Error: could not skip to end of block");
3250 }
Yann Collet166645e2017-08-18 18:30:41 -07003251 } while (lastBlock != 1);
Yann Collet166645e2017-08-18 18:30:41 -07003252 }
3253
3254 /* check if checksum is used */
3255 { BYTE const frameHeaderDescriptor = headerBuffer[4];
3256 int const contentChecksumFlag = (frameHeaderDescriptor & (1 << 2)) >> 2;
3257 if (contentChecksumFlag) {
Yann Collet166645e2017-08-18 18:30:41 -07003258 info->usesCheck = 1;
FRexdc394092022-10-20 21:46:50 +02003259 ERROR_IF(fread(info->checksum, 1, 4, srcFile) != 4,
3260 info_frame_error, "Error: could not read checksum");
Yann Collet9b45db72018-09-27 16:49:08 -07003261 } }
Yann Collet166645e2017-08-18 18:30:41 -07003262 info->numActualFrames++;
3263 }
3264 /* Skippable frame */
Yann Collet2c8fde52018-11-13 17:36:35 -08003265 else if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
Yann Collet166645e2017-08-18 18:30:41 -07003266 U32 const frameSize = MEM_readLE32(headerBuffer + 4);
3267 long const seek = (long)(8 + frameSize - numBytesRead);
Yann Colletec1cb8e2018-09-28 16:04:00 -07003268 ERROR_IF(LONG_SEEK(srcFile, seek, SEEK_CUR) != 0,
Yann Collet9b45db72018-09-27 16:49:08 -07003269 info_frame_error, "Error: could not find end of skippable frame");
Yann Collet166645e2017-08-18 18:30:41 -07003270 info->numSkippableFrames++;
3271 }
3272 /* unknown content */
3273 else {
Yann Collet9b45db72018-09-27 16:49:08 -07003274 return info_not_zstd;
Yann Collet166645e2017-08-18 18:30:41 -07003275 }
Yann Collet9b45db72018-09-27 16:49:08 -07003276 } /* magic number analysis */
3277 } /* end analyzing frames */
3278 return info_success;
Yann Collet166645e2017-08-18 18:30:41 -07003279}
3280
Yann Collet9b45db72018-09-27 16:49:08 -07003281
3282static InfoError
3283getFileInfo_fileConfirmed(fileInfo_t* info, const char* inFileName)
Yann Collet18b79532017-10-17 16:14:25 -07003284{
Yann Collet9b45db72018-09-27 16:49:08 -07003285 InfoError status;
W. Felix Handte2ad68552023-01-17 14:01:06 -08003286 stat_t srcFileStat;
3287 FILE* const srcFile = FIO_openSrcFile(NULL, inFileName, &srcFileStat);
Yann Colletec1cb8e2018-09-28 16:04:00 -07003288 ERROR_IF(srcFile == NULL, info_file_error, "Error: could not open source file %s", inFileName);
Yann Collet9b45db72018-09-27 16:49:08 -07003289
W. Felix Handte2ad68552023-01-17 14:01:06 -08003290 info->compressedSize = UTIL_getFileSizeStat(&srcFileStat);
Yann Collet9b45db72018-09-27 16:49:08 -07003291 status = FIO_analyzeFrames(info, srcFile);
3292
3293 fclose(srcFile);
3294 info->nbFiles = 1;
3295 return status;
3296}
3297
3298
3299/** getFileInfo() :
3300 * Reads information from file, stores in *info
3301 * @return : InfoError status
3302 */
3303static InfoError
3304getFileInfo(fileInfo_t* info, const char* srcFileName)
3305{
Yann Colletec1cb8e2018-09-28 16:04:00 -07003306 ERROR_IF(!UTIL_isRegularFile(srcFileName),
Yann Collet9b45db72018-09-27 16:49:08 -07003307 info_file_error, "Error : %s is not a file", srcFileName);
Yann Collet18b79532017-10-17 16:14:25 -07003308 return getFileInfo_fileConfirmed(info, srcFileName);
3309}
3310
3311
Yann Collet9b45db72018-09-27 16:49:08 -07003312static void
3313displayInfo(const char* inFileName, const fileInfo_t* info, int displayLevel)
3314{
W. Felix Handte87e94e32021-06-10 12:31:42 -04003315 UTIL_HumanReadableSize_t const window_hrs = UTIL_makeHumanReadableSize(info->windowSize);
3316 UTIL_HumanReadableSize_t const compressed_hrs = UTIL_makeHumanReadableSize(info->compressedSize);
3317 UTIL_HumanReadableSize_t const decompressed_hrs = UTIL_makeHumanReadableSize(info->decompressedSize);
Yann Collet0d793a62021-01-06 01:35:52 -08003318 double const ratio = (info->compressedSize == 0) ? 0 : ((double)info->decompressedSize)/(double)info->compressedSize;
Yann Collet166645e2017-08-18 18:30:41 -07003319 const char* const checkString = (info->usesCheck ? "XXH64" : "None");
3320 if (displayLevel <= 2) {
3321 if (!info->decompUnavailable) {
W. Felix Handte87e94e32021-06-10 12:31:42 -04003322 DISPLAYOUT("%6d %5d %6.*f%4s %8.*f%4s %5.3f %5s %s\n",
Yann Collet56f1f0e2017-09-26 11:21:36 -07003323 info->numSkippableFrames + info->numActualFrames,
3324 info->numSkippableFrames,
W. Felix Handte87e94e32021-06-10 12:31:42 -04003325 compressed_hrs.precision, compressed_hrs.value, compressed_hrs.suffix,
3326 decompressed_hrs.precision, decompressed_hrs.value, decompressed_hrs.suffix,
Yann Collet166645e2017-08-18 18:30:41 -07003327 ratio, checkString, inFileName);
3328 } else {
W. Felix Handte87e94e32021-06-10 12:31:42 -04003329 DISPLAYOUT("%6d %5d %6.*f%4s %5s %s\n",
Yann Collet56f1f0e2017-09-26 11:21:36 -07003330 info->numSkippableFrames + info->numActualFrames,
3331 info->numSkippableFrames,
W. Felix Handte87e94e32021-06-10 12:31:42 -04003332 compressed_hrs.precision, compressed_hrs.value, compressed_hrs.suffix,
Yann Collet56f1f0e2017-09-26 11:21:36 -07003333 checkString, inFileName);
Yann Collet166645e2017-08-18 18:30:41 -07003334 }
3335 } else {
Yann Collet9ef32b32017-10-14 00:02:32 -07003336 DISPLAYOUT("%s \n", inFileName);
Yann Collet166645e2017-08-18 18:30:41 -07003337 DISPLAYOUT("# Zstandard Frames: %d\n", info->numActualFrames);
Yann Collet9ef32b32017-10-14 00:02:32 -07003338 if (info->numSkippableFrames)
3339 DISPLAYOUT("# Skippable Frames: %d\n", info->numSkippableFrames);
htnhancc8c9842022-07-05 21:28:33 -05003340 DISPLAYOUT("DictID: %u\n", info->dictID);
W. Felix Handte87e94e32021-06-10 12:31:42 -04003341 DISPLAYOUT("Window Size: %.*f%s (%llu B)\n",
3342 window_hrs.precision, window_hrs.value, window_hrs.suffix,
Nick Terrell6dd958e2017-10-04 12:23:23 -07003343 (unsigned long long)info->windowSize);
W. Felix Handte87e94e32021-06-10 12:31:42 -04003344 DISPLAYOUT("Compressed Size: %.*f%s (%llu B)\n",
3345 compressed_hrs.precision, compressed_hrs.value, compressed_hrs.suffix,
Yann Collet3095ca82017-09-26 13:53:50 -07003346 (unsigned long long)info->compressedSize);
Yann Collet166645e2017-08-18 18:30:41 -07003347 if (!info->decompUnavailable) {
W. Felix Handte87e94e32021-06-10 12:31:42 -04003348 DISPLAYOUT("Decompressed Size: %.*f%s (%llu B)\n",
3349 decompressed_hrs.precision, decompressed_hrs.value, decompressed_hrs.suffix,
Yann Collet3095ca82017-09-26 13:53:50 -07003350 (unsigned long long)info->decompressedSize);
Yann Collet166645e2017-08-18 18:30:41 -07003351 DISPLAYOUT("Ratio: %.4f\n", ratio);
3352 }
FRexdc394092022-10-20 21:46:50 +02003353
3354 if (info->usesCheck && info->numActualFrames == 1) {
3355 DISPLAYOUT("Check: %s %02x%02x%02x%02x\n", checkString,
3356 info->checksum[3], info->checksum[2],
3357 info->checksum[1], info->checksum[0]
3358 );
3359 } else {
3360 DISPLAYOUT("Check: %s\n", checkString);
3361 }
3362
Yann Collet166645e2017-08-18 18:30:41 -07003363 DISPLAYOUT("\n");
3364 }
3365}
3366
Yann Collet56f1f0e2017-09-26 11:21:36 -07003367static fileInfo_t FIO_addFInfo(fileInfo_t fi1, fileInfo_t fi2)
3368{
3369 fileInfo_t total;
Conrad Meyer60637422018-02-28 14:16:30 -08003370 memset(&total, 0, sizeof(total));
Yann Collet56f1f0e2017-09-26 11:21:36 -07003371 total.numActualFrames = fi1.numActualFrames + fi2.numActualFrames;
3372 total.numSkippableFrames = fi1.numSkippableFrames + fi2.numSkippableFrames;
3373 total.compressedSize = fi1.compressedSize + fi2.compressedSize;
3374 total.decompressedSize = fi1.decompressedSize + fi2.decompressedSize;
3375 total.decompUnavailable = fi1.decompUnavailable | fi2.decompUnavailable;
3376 total.usesCheck = fi1.usesCheck & fi2.usesCheck;
3377 total.nbFiles = fi1.nbFiles + fi2.nbFiles;
3378 return total;
3379}
Yann Collet166645e2017-08-18 18:30:41 -07003380
Yann Colletec1cb8e2018-09-28 16:04:00 -07003381static int
3382FIO_listFile(fileInfo_t* total, const char* inFileName, int displayLevel)
3383{
Yann Collet166645e2017-08-18 18:30:41 -07003384 fileInfo_t info;
3385 memset(&info, 0, sizeof(info));
Yann Collet9b45db72018-09-27 16:49:08 -07003386 { InfoError const error = getFileInfo(&info, inFileName);
Karl Ostmo4fa585a2019-01-14 17:58:46 -08003387 switch (error) {
3388 case info_frame_error:
3389 /* display error, but provide output */
3390 DISPLAYLEVEL(1, "Error while parsing \"%s\" \n", inFileName);
3391 break;
3392 case info_not_zstd:
3393 DISPLAYOUT("File \"%s\" not compressed by zstd \n", inFileName);
3394 if (displayLevel > 2) DISPLAYOUT("\n");
3395 return 1;
3396 case info_file_error:
3397 /* error occurred while opening the file */
3398 if (displayLevel > 2) DISPLAYOUT("\n");
3399 return 1;
3400 case info_truncated_input:
3401 DISPLAYOUT("File \"%s\" is truncated \n", inFileName);
3402 if (displayLevel > 2) DISPLAYOUT("\n");
3403 return 1;
3404 case info_success:
3405 default:
3406 break;
Yann Collet166645e2017-08-18 18:30:41 -07003407 }
Karl Ostmo4fa585a2019-01-14 17:58:46 -08003408
Yann Collet166645e2017-08-18 18:30:41 -07003409 displayInfo(inFileName, &info, displayLevel);
Yann Collet56f1f0e2017-09-26 11:21:36 -07003410 *total = FIO_addFInfo(*total, info);
Yann Collet0ed8ee42018-12-20 14:46:23 -08003411 assert(error == info_success || error == info_frame_error);
Yann Collet458a1a12020-04-13 10:13:29 -07003412 return (int)error;
Yann Collet166645e2017-08-18 18:30:41 -07003413 }
3414}
3415
Yann Collet9b45db72018-09-27 16:49:08 -07003416int FIO_listMultipleFiles(unsigned numFiles, const char** filenameTable, int displayLevel)
3417{
3418 /* ensure no specified input is stdin (needs fseek() capability) */
3419 { unsigned u;
3420 for (u=0; u<numFiles;u++) {
Yann Colletec1cb8e2018-09-28 16:04:00 -07003421 ERROR_IF(!strcmp (filenameTable[u], stdinmark),
Yann Collet9b45db72018-09-27 16:49:08 -07003422 1, "zstd: --list does not support reading from standard input");
3423 } }
Topher Lubaway4c166082018-06-11 10:13:00 -07003424
Yann Collet166645e2017-08-18 18:30:41 -07003425 if (numFiles == 0) {
Nick Terrelle58a39f2022-01-14 12:37:32 -08003426 if (!UTIL_isConsole(stdin)) {
Yann Collet9b45db72018-09-27 16:49:08 -07003427 DISPLAYLEVEL(1, "zstd: --list does not support reading from standard input \n");
W. Felix Handte712a9fd2018-06-29 15:33:44 -04003428 }
Yann Collet9b45db72018-09-27 16:49:08 -07003429 DISPLAYLEVEL(1, "No files given \n");
W. Felix Handte712a9fd2018-06-29 15:33:44 -04003430 return 1;
Yann Collet166645e2017-08-18 18:30:41 -07003431 }
Yann Collet9b45db72018-09-27 16:49:08 -07003432
Nick Terrell6dd958e2017-10-04 12:23:23 -07003433 if (displayLevel <= 2) {
3434 DISPLAYOUT("Frames Skips Compressed Uncompressed Ratio Check Filename\n");
3435 }
Yann Collet56f1f0e2017-09-26 11:21:36 -07003436 { int error = 0;
Yann Collet56f1f0e2017-09-26 11:21:36 -07003437 fileInfo_t total;
3438 memset(&total, 0, sizeof(total));
3439 total.usesCheck = 1;
Yann Collet9b45db72018-09-27 16:49:08 -07003440 /* --list each file, and check for any error */
3441 { unsigned u;
3442 for (u=0; u<numFiles;u++) {
3443 error |= FIO_listFile(&total, filenameTable[u], displayLevel);
3444 } }
Yann Collet9ef32b32017-10-14 00:02:32 -07003445 if (numFiles > 1 && displayLevel <= 2) { /* display total */
W. Felix Handte87e94e32021-06-10 12:31:42 -04003446 UTIL_HumanReadableSize_t const compressed_hrs = UTIL_makeHumanReadableSize(total.compressedSize);
3447 UTIL_HumanReadableSize_t const decompressed_hrs = UTIL_makeHumanReadableSize(total.decompressedSize);
Yann Collet0d793a62021-01-06 01:35:52 -08003448 double const ratio = (total.compressedSize == 0) ? 0 : ((double)total.decompressedSize)/(double)total.compressedSize;
Yann Collet56f1f0e2017-09-26 11:21:36 -07003449 const char* const checkString = (total.usesCheck ? "XXH64" : "");
3450 DISPLAYOUT("----------------------------------------------------------------- \n");
3451 if (total.decompUnavailable) {
W. Felix Handte87e94e32021-06-10 12:31:42 -04003452 DISPLAYOUT("%6d %5d %6.*f%4s %5s %u files\n",
Yann Collet56f1f0e2017-09-26 11:21:36 -07003453 total.numSkippableFrames + total.numActualFrames,
3454 total.numSkippableFrames,
W. Felix Handte87e94e32021-06-10 12:31:42 -04003455 compressed_hrs.precision, compressed_hrs.value, compressed_hrs.suffix,
Yann Colletededcfc2018-12-21 16:19:44 -08003456 checkString, (unsigned)total.nbFiles);
Yann Collet56f1f0e2017-09-26 11:21:36 -07003457 } else {
W. Felix Handte87e94e32021-06-10 12:31:42 -04003458 DISPLAYOUT("%6d %5d %6.*f%4s %8.*f%4s %5.3f %5s %u files\n",
Yann Collet56f1f0e2017-09-26 11:21:36 -07003459 total.numSkippableFrames + total.numActualFrames,
3460 total.numSkippableFrames,
W. Felix Handte87e94e32021-06-10 12:31:42 -04003461 compressed_hrs.precision, compressed_hrs.value, compressed_hrs.suffix,
3462 decompressed_hrs.precision, decompressed_hrs.value, decompressed_hrs.suffix,
Yann Colletededcfc2018-12-21 16:19:44 -08003463 ratio, checkString, (unsigned)total.nbFiles);
Yann Collet9ef32b32017-10-14 00:02:32 -07003464 } }
Yann Collet166645e2017-08-18 18:30:41 -07003465 return error;
3466 }
3467}
3468
3469
Yann Collet8b23eea2016-05-10 05:37:43 +02003470#endif /* #ifndef ZSTD_NODECOMPRESS */