blob: a77824edb051bee02db8cb3f6338773f4083c25d [file] [log] [blame]
Yann Colletb0cb0812017-08-31 12:20:50 -07001/*
Elliott Hughes44aba642023-09-12 20:18:59 +00002 * Copyright (c) Meta Platforms, Inc. and affiliates.
Nick Terrellc9325202016-09-01 15:22:19 -07003 * All rights reserved.
4 *
Yann Colletb0cb0812017-08-31 12:20:50 -07005 * This source code is licensed under both the BSD-style license (found in the
6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7 * in the COPYING file in the root directory of this source tree).
Nick Terrellc9325202016-09-01 15:22:19 -07008 */
9#include "Options.h"
Nick Terrell58f499c2017-01-26 20:47:59 -080010#include "util.h"
Nick Terrell254c5b12016-09-21 14:29:47 -070011#include "utils/ScopeGuard.h"
Nick Terrellc9325202016-09-01 15:22:19 -070012
Nick Terrell254c5b12016-09-21 14:29:47 -070013#include <algorithm>
14#include <cassert>
Nick Terrellc9325202016-09-01 15:22:19 -070015#include <cstdio>
Nick Terrell7df55e12016-09-01 18:26:19 -070016#include <cstring>
Nick Terrelldac03762016-09-23 14:38:25 -070017#include <iterator>
Nick Terrell4c202812016-09-06 12:40:59 -070018#include <thread>
Nick Terrell254c5b12016-09-21 14:29:47 -070019#include <vector>
20
Nick Terrellc9325202016-09-01 15:22:19 -070021
22namespace pzstd {
23
24namespace {
Nick Terrell254c5b12016-09-21 14:29:47 -070025unsigned defaultNumThreads() {
26#ifdef PZSTD_NUM_THREADS
27 return PZSTD_NUM_THREADS;
28#else
29 return std::thread::hardware_concurrency();
30#endif
31}
32
33unsigned parseUnsigned(const char **arg) {
Nick Terrellc9325202016-09-01 15:22:19 -070034 unsigned result = 0;
Nick Terrell254c5b12016-09-21 14:29:47 -070035 while (**arg >= '0' && **arg <= '9') {
Nick Terrellc9325202016-09-01 15:22:19 -070036 result *= 10;
Nick Terrell254c5b12016-09-21 14:29:47 -070037 result += **arg - '0';
38 ++(*arg);
Nick Terrellc9325202016-09-01 15:22:19 -070039 }
40 return result;
41}
42
Nick Terrell254c5b12016-09-21 14:29:47 -070043const char *getArgument(const char *options, const char **argv, int &i,
44 int argc) {
45 if (options[1] != 0) {
46 return options + 1;
47 }
48 ++i;
49 if (i == argc) {
50 std::fprintf(stderr, "Option -%c requires an argument, but none provided\n",
51 *options);
52 return nullptr;
53 }
54 return argv[i];
55}
56
57const std::string kZstdExtension = ".zst";
58constexpr char kStdIn[] = "-";
59constexpr char kStdOut[] = "-";
60constexpr unsigned kDefaultCompressionLevel = 3;
61constexpr unsigned kMaxNonUltraCompressionLevel = 19;
62
63#ifdef _WIN32
64const char nullOutput[] = "nul";
65#else
66const char nullOutput[] = "/dev/null";
67#endif
68
69void notSupported(const char *option) {
70 std::fprintf(stderr, "Operation not supported: %s\n", option);
71}
Nick Terrellc9325202016-09-01 15:22:19 -070072
73void usage() {
74 std::fprintf(stderr, "Usage:\n");
Nick Terrell254c5b12016-09-21 14:29:47 -070075 std::fprintf(stderr, " pzstd [args] [FILE(s)]\n");
Nick Terrellc9325202016-09-01 15:22:19 -070076 std::fprintf(stderr, "Parallel ZSTD options:\n");
Chris Lamb2dbe4082018-05-04 08:39:03 -070077 std::fprintf(stderr, " -p, --processes # : number of threads to use for (de)compression (default:<numcpus>)\n");
Nick Terrellc9325202016-09-01 15:22:19 -070078
79 std::fprintf(stderr, "ZSTD options:\n");
Nick Terrell254c5b12016-09-21 14:29:47 -070080 std::fprintf(stderr, " -# : # compression level (1-%d, default:%d)\n", kMaxNonUltraCompressionLevel, kDefaultCompressionLevel);
81 std::fprintf(stderr, " -d, --decompress : decompression\n");
82 std::fprintf(stderr, " -o file : result stored into `file` (only if 1 input file)\n");
Sean Purcell793e7bb2017-03-23 12:41:51 -070083 std::fprintf(stderr, " -f, --force : overwrite output without prompting, (de)compress links\n");
Nick Terrell254c5b12016-09-21 14:29:47 -070084 std::fprintf(stderr, " --rm : remove source file(s) after successful (de)compression\n");
85 std::fprintf(stderr, " -k, --keep : preserve source file(s) (default)\n");
86 std::fprintf(stderr, " -h, --help : display help and exit\n");
87 std::fprintf(stderr, " -V, --version : display version number and exit\n");
88 std::fprintf(stderr, " -v, --verbose : verbose mode; specify multiple times to increase log level (default:2)\n");
89 std::fprintf(stderr, " -q, --quiet : suppress warnings; specify twice to suppress errors too\n");
binhdvo7abebc82021-11-29 14:10:43 -050090 std::fprintf(stderr, " -c, --stdout : write to standard output (even if it is the console)\n");
Nick Terrell254c5b12016-09-21 14:29:47 -070091#ifdef UTIL_HAS_CREATEFILELIST
92 std::fprintf(stderr, " -r : operate recursively on directories\n");
93#endif
94 std::fprintf(stderr, " --ultra : enable levels beyond %i, up to %i (requires more memory)\n", kMaxNonUltraCompressionLevel, ZSTD_maxCLevel());
95 std::fprintf(stderr, " -C, --check : integrity check (default)\n");
96 std::fprintf(stderr, " --no-check : no integrity check\n");
97 std::fprintf(stderr, " -t, --test : test compressed file integrity\n");
98 std::fprintf(stderr, " -- : all arguments after \"--\" are treated as files\n");
Nick Terrellc9325202016-09-01 15:22:19 -070099}
100} // anonymous namespace
101
102Options::Options()
Nick Terrell254c5b12016-09-21 14:29:47 -0700103 : numThreads(defaultNumThreads()), maxWindowLog(23),
104 compressionLevel(kDefaultCompressionLevel), decompress(false),
105 overwrite(false), keepSource(true), writeMode(WriteMode::Auto),
106 checksum(true), verbosity(2) {}
Nick Terrellc9325202016-09-01 15:22:19 -0700107
Nick Terrell254c5b12016-09-21 14:29:47 -0700108Options::Status Options::parse(int argc, const char **argv) {
109 bool test = false;
110 bool recursive = false;
Nick Terrellc9325202016-09-01 15:22:19 -0700111 bool ultra = false;
Nick Terrell254c5b12016-09-21 14:29:47 -0700112 bool forceStdout = false;
Sean Purcell793e7bb2017-03-23 12:41:51 -0700113 bool followLinks = false;
Nick Terrell254c5b12016-09-21 14:29:47 -0700114 // Local copy of input files, which are pointers into argv.
115 std::vector<const char *> localInputFiles;
Nick Terrellc9325202016-09-01 15:22:19 -0700116 for (int i = 1; i < argc; ++i) {
Nick Terrell254c5b12016-09-21 14:29:47 -0700117 const char *arg = argv[i];
118 // Protect against empty arguments
119 if (arg[0] == 0) {
Nick Terrellc9325202016-09-01 15:22:19 -0700120 continue;
Nick Terrellc9325202016-09-01 15:22:19 -0700121 }
Nick Terrell254c5b12016-09-21 14:29:47 -0700122 // Everything after "--" is an input file
123 if (!std::strcmp(arg, "--")) {
124 ++i;
125 std::copy(argv + i, argv + argc, std::back_inserter(localInputFiles));
126 break;
127 }
128 // Long arguments that don't have a short option
129 {
130 bool isLongOption = true;
131 if (!std::strcmp(arg, "--rm")) {
132 keepSource = false;
133 } else if (!std::strcmp(arg, "--ultra")) {
Nick Terrellc9325202016-09-01 15:22:19 -0700134 ultra = true;
135 maxWindowLog = 0;
Nick Terrell254c5b12016-09-21 14:29:47 -0700136 } else if (!std::strcmp(arg, "--no-check")) {
137 checksum = false;
138 } else if (!std::strcmp(arg, "--sparse")) {
139 writeMode = WriteMode::Sparse;
140 notSupported("Sparse mode");
141 return Status::Failure;
142 } else if (!std::strcmp(arg, "--no-sparse")) {
143 writeMode = WriteMode::Regular;
144 notSupported("Sparse mode");
145 return Status::Failure;
146 } else if (!std::strcmp(arg, "--dictID")) {
147 notSupported(arg);
148 return Status::Failure;
149 } else if (!std::strcmp(arg, "--no-dictID")) {
150 notSupported(arg);
151 return Status::Failure;
152 } else {
153 isLongOption = false;
154 }
155 if (isLongOption) {
156 continue;
157 }
158 }
159 // Arguments with a short option simply set their short option.
160 const char *options = nullptr;
161 if (!std::strcmp(arg, "--processes")) {
162 options = "p";
163 } else if (!std::strcmp(arg, "--version")) {
164 options = "V";
165 } else if (!std::strcmp(arg, "--help")) {
166 options = "h";
167 } else if (!std::strcmp(arg, "--decompress")) {
168 options = "d";
169 } else if (!std::strcmp(arg, "--force")) {
170 options = "f";
171 } else if (!std::strcmp(arg, "--stdout")) {
172 options = "c";
173 } else if (!std::strcmp(arg, "--keep")) {
174 options = "k";
175 } else if (!std::strcmp(arg, "--verbose")) {
176 options = "v";
177 } else if (!std::strcmp(arg, "--quiet")) {
178 options = "q";
179 } else if (!std::strcmp(arg, "--check")) {
180 options = "C";
181 } else if (!std::strcmp(arg, "--test")) {
182 options = "t";
183 } else if (arg[0] == '-' && arg[1] != 0) {
184 options = arg + 1;
185 } else {
186 localInputFiles.emplace_back(arg);
187 continue;
188 }
189 assert(options != nullptr);
190
191 bool finished = false;
192 while (!finished && *options != 0) {
193 // Parse the compression level
194 if (*options >= '0' && *options <= '9') {
195 compressionLevel = parseUnsigned(&options);
196 continue;
197 }
198
199 switch (*options) {
Nick Terrellc9325202016-09-01 15:22:19 -0700200 case 'h':
Nick Terrell254c5b12016-09-21 14:29:47 -0700201 case 'H':
Nick Terrellc9325202016-09-01 15:22:19 -0700202 usage();
Nick Terrell254c5b12016-09-21 14:29:47 -0700203 return Status::Message;
204 case 'V':
205 std::fprintf(stderr, "PZSTD version: %s.\n", ZSTD_VERSION_STRING);
206 return Status::Message;
207 case 'p': {
208 finished = true;
209 const char *optionArgument = getArgument(options, argv, i, argc);
210 if (optionArgument == nullptr) {
211 return Status::Failure;
212 }
213 if (*optionArgument < '0' || *optionArgument > '9') {
214 std::fprintf(stderr, "Option -p expects a number, but %s provided\n",
215 optionArgument);
216 return Status::Failure;
217 }
218 numThreads = parseUnsigned(&optionArgument);
219 if (*optionArgument != 0) {
220 std::fprintf(stderr,
221 "Option -p expects a number, but %u%s provided\n",
222 numThreads, optionArgument);
223 return Status::Failure;
224 }
225 break;
226 }
227 case 'o': {
228 finished = true;
229 const char *optionArgument = getArgument(options, argv, i, argc);
230 if (optionArgument == nullptr) {
231 return Status::Failure;
232 }
233 outputFile = optionArgument;
234 break;
235 }
236 case 'C':
237 checksum = true;
238 break;
239 case 'k':
240 keepSource = true;
241 break;
Nick Terrellc9325202016-09-01 15:22:19 -0700242 case 'd':
243 decompress = true;
244 break;
245 case 'f':
246 overwrite = true;
Nick Terrell254c5b12016-09-21 14:29:47 -0700247 forceStdout = true;
Sean Purcell793e7bb2017-03-23 12:41:51 -0700248 followLinks = true;
Nick Terrellc9325202016-09-01 15:22:19 -0700249 break;
Nick Terrell254c5b12016-09-21 14:29:47 -0700250 case 't':
251 test = true;
252 decompress = true;
Nick Terrellc9325202016-09-01 15:22:19 -0700253 break;
Nick Terrell254c5b12016-09-21 14:29:47 -0700254#ifdef UTIL_HAS_CREATEFILELIST
255 case 'r':
256 recursive = true;
257 break;
258#endif
Nick Terrellc9325202016-09-01 15:22:19 -0700259 case 'c':
Nick Terrell254c5b12016-09-21 14:29:47 -0700260 outputFile = kStdOut;
261 forceStdout = true;
Nick Terrellc9325202016-09-01 15:22:19 -0700262 break;
Nick Terrell254c5b12016-09-21 14:29:47 -0700263 case 'v':
264 ++verbosity;
265 break;
266 case 'q':
267 --verbosity;
268 // Ignore them for now
269 break;
270 // Unsupported options from Zstd
271 case 'D':
272 case 's':
273 notSupported("Zstd dictionaries.");
274 return Status::Failure;
275 case 'b':
276 case 'e':
277 case 'i':
278 case 'B':
279 notSupported("Zstd benchmarking options.");
280 return Status::Failure;
Nick Terrellc9325202016-09-01 15:22:19 -0700281 default:
Nick Terrellf1073c12016-09-21 16:04:44 -0700282 std::fprintf(stderr, "Invalid argument: %s\n", arg);
Nick Terrell254c5b12016-09-21 14:29:47 -0700283 return Status::Failure;
Nick Terrellac143482016-09-02 12:35:36 -0700284 }
Nick Terrell254c5b12016-09-21 14:29:47 -0700285 if (!finished) {
286 ++options;
287 }
288 } // while (*options != 0);
289 } // for (int i = 1; i < argc; ++i);
290
Nick Terrellac4310d2016-10-26 00:09:39 -0700291 // Set options for test mode
292 if (test) {
293 outputFile = nullOutput;
294 keepSource = true;
295 }
296
Nick Terrell254c5b12016-09-21 14:29:47 -0700297 // Input file defaults to standard input if not provided.
298 if (localInputFiles.empty()) {
299 localInputFiles.emplace_back(kStdIn);
300 }
301
302 // Check validity of input files
303 if (localInputFiles.size() > 1) {
304 const auto it = std::find(localInputFiles.begin(), localInputFiles.end(),
305 std::string{kStdIn});
306 if (it != localInputFiles.end()) {
307 std::fprintf(
308 stderr,
309 "Cannot specify standard input when handling multiple files\n");
310 return Status::Failure;
Nick Terrellc9325202016-09-01 15:22:19 -0700311 }
312 }
Nick Terrell254c5b12016-09-21 14:29:47 -0700313 if (localInputFiles.size() > 1 || recursive) {
314 if (!outputFile.empty() && outputFile != nullOutput) {
315 std::fprintf(
316 stderr,
317 "Cannot specify an output file when handling multiple inputs\n");
318 return Status::Failure;
319 }
320 }
321
Sean Purcell793e7bb2017-03-23 12:41:51 -0700322 g_utilDisplayLevel = verbosity;
323 // Remove local input files that are symbolic links
324 if (!followLinks) {
325 std::remove_if(localInputFiles.begin(), localInputFiles.end(),
326 [&](const char *path) {
327 bool isLink = UTIL_isLink(path);
328 if (isLink && verbosity >= 2) {
329 std::fprintf(
330 stderr,
331 "Warning : %s is symbolic link, ignoring\n",
332 path);
333 }
334 return isLink;
335 });
336 }
337
Nick Terrell254c5b12016-09-21 14:29:47 -0700338 // Translate input files/directories into files to (de)compress
339 if (recursive) {
Yann Collet31a0abb2019-11-06 09:10:05 -0800340 FileNamesTable* const files = UTIL_createExpandedFNT(localInputFiles.data(), localInputFiles.size(), followLinks);
Nick Terrell254c5b12016-09-21 14:29:47 -0700341 if (files == nullptr) {
342 std::fprintf(stderr, "Error traversing directories\n");
343 return Status::Failure;
344 }
345 auto guard =
Yann Collet31a0abb2019-11-06 09:10:05 -0800346 makeScopeGuard([&] { UTIL_freeFileNamesTable(files); });
347 if (files->tableSize == 0) {
Nick Terrell254c5b12016-09-21 14:29:47 -0700348 std::fprintf(stderr, "No files found\n");
349 return Status::Failure;
350 }
Yann Collet31a0abb2019-11-06 09:10:05 -0800351 inputFiles.resize(files->tableSize);
352 std::copy(files->fileNames, files->fileNames + files->tableSize, inputFiles.begin());
Nick Terrell254c5b12016-09-21 14:29:47 -0700353 } else {
354 inputFiles.resize(localInputFiles.size());
355 std::copy(localInputFiles.begin(), localInputFiles.end(),
356 inputFiles.begin());
357 }
358 localInputFiles.clear();
359 assert(!inputFiles.empty());
360
361 // If reading from standard input, default to standard output
362 if (inputFiles[0] == kStdIn && outputFile.empty()) {
363 assert(inputFiles.size() == 1);
364 outputFile = "-";
365 }
366
367 if (inputFiles[0] == kStdIn && IS_CONSOLE(stdin)) {
368 assert(inputFiles.size() == 1);
369 std::fprintf(stderr, "Cannot read input from interactive console\n");
370 return Status::Failure;
371 }
372 if (outputFile == "-" && IS_CONSOLE(stdout) && !(forceStdout && decompress)) {
373 std::fprintf(stderr, "Will not write to console stdout unless -c or -f is "
374 "specified and decompressing\n");
375 return Status::Failure;
376 }
377
Nick Terrellc9325202016-09-01 15:22:19 -0700378 // Check compression level
379 {
Nick Terrell254c5b12016-09-21 14:29:47 -0700380 unsigned maxCLevel =
381 ultra ? ZSTD_maxCLevel() : kMaxNonUltraCompressionLevel;
382 if (compressionLevel > maxCLevel || compressionLevel == 0) {
383 std::fprintf(stderr, "Invalid compression level %u.\n", compressionLevel);
384 return Status::Failure;
Nick Terrellc9325202016-09-01 15:22:19 -0700385 }
386 }
Nick Terrell254c5b12016-09-21 14:29:47 -0700387
Nick Terrellc9325202016-09-01 15:22:19 -0700388 // Check that numThreads is set
389 if (numThreads == 0) {
Nick Terrell254c5b12016-09-21 14:29:47 -0700390 std::fprintf(stderr, "Invalid arguments: # of threads not specified "
391 "and unable to determine hardware concurrency.\n");
392 return Status::Failure;
Nick Terrellc9325202016-09-01 15:22:19 -0700393 }
Nick Terrell254c5b12016-09-21 14:29:47 -0700394
395 // Modify verbosity
396 // If we are piping input and output, turn off interaction
397 if (inputFiles[0] == kStdIn && outputFile == kStdOut && verbosity == 2) {
398 verbosity = 1;
399 }
400 // If we are in multi-file mode, turn off interaction
401 if (inputFiles.size() > 1 && verbosity == 2) {
402 verbosity = 1;
403 }
404
Nick Terrell254c5b12016-09-21 14:29:47 -0700405 return Status::Success;
406}
407
408std::string Options::getOutputFile(const std::string &inputFile) const {
409 if (!outputFile.empty()) {
410 return outputFile;
411 }
412 // Attempt to add/remove zstd extension from the input file
413 if (decompress) {
414 int stemSize = inputFile.size() - kZstdExtension.size();
415 if (stemSize > 0 && inputFile.substr(stemSize) == kZstdExtension) {
416 return inputFile.substr(0, stemSize);
417 } else {
418 return "";
419 }
420 } else {
421 return inputFile + kZstdExtension;
422 }
Nick Terrellc9325202016-09-01 15:22:19 -0700423}
424}