Yann Collet | 394bdd7 | 2017-08-29 09:24:11 -0700 | [diff] [blame] | 1 | /* |
W. Felix Handte | 5d693cc | 2022-12-20 12:49:47 -0500 | [diff] [blame] | 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. |
Yann Collet | 4ded9e5 | 2016-08-30 10:04:33 -0700 | [diff] [blame] | 3 | * All rights reserved. |
| 4 | * |
Yann Collet | 394bdd7 | 2017-08-29 09:24:11 -0700 | [diff] [blame] | 5 | * This source code is licensed under both the BSD-style license (found in the |
| 6 | * LICENSE file in the root directory of this source tree) and the GPLv2 (found |
| 7 | * in the COPYING file in the root directory of this source tree). |
Yann Collet | 3128e03 | 2017-09-08 00:09:23 -0700 | [diff] [blame] | 8 | * You may select, at your option, one of the above-listed licenses. |
Yann Collet | 4ded9e5 | 2016-08-30 10:04:33 -0700 | [diff] [blame] | 9 | */ |
Yann Collet | 553b213 | 2016-08-12 18:42:25 +0200 | [diff] [blame] | 10 | |
Yann Collet | 553b213 | 2016-08-12 18:42:25 +0200 | [diff] [blame] | 11 | |
Nick Terrell | 1d0c170 | 2019-04-05 18:11:17 -0700 | [diff] [blame] | 12 | #include <stdio.h> // printf |
| 13 | #include <stdlib.h> // free |
| 14 | #include <string.h> // memset, strcat, strlen |
Yann Collet | 553b213 | 2016-08-12 18:42:25 +0200 | [diff] [blame] | 15 | #include <zstd.h> // presumes zstd library is installed |
Nick Terrell | 1d0c170 | 2019-04-05 18:11:17 -0700 | [diff] [blame] | 16 | #include "common.h" // Helper functions, CHECK(), and CHECK_ZSTD() |
Yann Collet | 553b213 | 2016-08-12 18:42:25 +0200 | [diff] [blame] | 17 | |
Martin Liska | 926d470 | 2021-10-04 08:23:57 +0200 | [diff] [blame] | 18 | static void compressFile_orDie(const char* fname, const char* outName, int cLevel, |
| 19 | int nbThreads) |
Yann Collet | 553b213 | 2016-08-12 18:42:25 +0200 | [diff] [blame] | 20 | { |
Martin Liska | 926d470 | 2021-10-04 08:23:57 +0200 | [diff] [blame] | 21 | fprintf (stderr, "Starting compression of %s with level %d, using %d threads\n", |
| 22 | fname, cLevel, nbThreads); |
| 23 | |
Nick Terrell | f5cbee9 | 2019-03-22 14:57:23 -0700 | [diff] [blame] | 24 | /* Open the input and output files. */ |
Yann Collet | 553b213 | 2016-08-12 18:42:25 +0200 | [diff] [blame] | 25 | FILE* const fin = fopen_orDie(fname, "rb"); |
| 26 | FILE* const fout = fopen_orDie(outName, "wb"); |
Nick Terrell | f5cbee9 | 2019-03-22 14:57:23 -0700 | [diff] [blame] | 27 | /* Create the input and output buffers. |
| 28 | * They may be any size, but we recommend using these functions to size them. |
| 29 | * Performance will only suffer significantly for very tiny buffers. |
| 30 | */ |
| 31 | size_t const buffInSize = ZSTD_CStreamInSize(); |
Yann Collet | 553b213 | 2016-08-12 18:42:25 +0200 | [diff] [blame] | 32 | void* const buffIn = malloc_orDie(buffInSize); |
Nick Terrell | f5cbee9 | 2019-03-22 14:57:23 -0700 | [diff] [blame] | 33 | size_t const buffOutSize = ZSTD_CStreamOutSize(); |
Yann Collet | 553b213 | 2016-08-12 18:42:25 +0200 | [diff] [blame] | 34 | void* const buffOut = malloc_orDie(buffOutSize); |
Yann Collet | 553b213 | 2016-08-12 18:42:25 +0200 | [diff] [blame] | 35 | |
Nick Terrell | f5cbee9 | 2019-03-22 14:57:23 -0700 | [diff] [blame] | 36 | /* Create the context. */ |
| 37 | ZSTD_CCtx* const cctx = ZSTD_createCCtx(); |
| 38 | CHECK(cctx != NULL, "ZSTD_createCCtx() failed!"); |
Yann Collet | 553b213 | 2016-08-12 18:42:25 +0200 | [diff] [blame] | 39 | |
Nick Terrell | f5cbee9 | 2019-03-22 14:57:23 -0700 | [diff] [blame] | 40 | /* Set any parameters you want. |
| 41 | * Here we set the compression level, and enable the checksum. |
| 42 | */ |
| 43 | CHECK_ZSTD( ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, cLevel) ); |
| 44 | CHECK_ZSTD( ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 1) ); |
Yann Collet | 6ec18ae | 2023-04-26 12:45:23 -0700 | [diff] [blame] | 45 | if (nbThreads > 1) { |
| 46 | size_t const r = ZSTD_CCtx_setParameter(cctx, ZSTD_c_nbWorkers, nbThreads); |
| 47 | if (ZSTD_isError(r)) { |
| 48 | fprintf (stderr, "Note: the linked libzstd library doesn't support multithreading. " |
| 49 | "Reverting to single-thread mode. \n"); |
| 50 | } |
| 51 | } |
Nick Terrell | f5cbee9 | 2019-03-22 14:57:23 -0700 | [diff] [blame] | 52 | |
| 53 | /* This loop read from the input file, compresses that entire chunk, |
| 54 | * and writes all output produced to the output file. |
| 55 | */ |
| 56 | size_t const toRead = buffInSize; |
Jan Kasiak | a821990 | 2019-09-01 15:35:53 -0400 | [diff] [blame] | 57 | for (;;) { |
| 58 | size_t read = fread_orDie(buffIn, toRead, fin); |
Nick Terrell | f5cbee9 | 2019-03-22 14:57:23 -0700 | [diff] [blame] | 59 | /* Select the flush mode. |
| 60 | * If the read may not be finished (read == toRead) we use |
| 61 | * ZSTD_e_continue. If this is the last chunk, we use ZSTD_e_end. |
| 62 | * Zstd optimizes the case where the first flush mode is ZSTD_e_end, |
| 63 | * since it knows it is compressing the entire source in one pass. |
| 64 | */ |
| 65 | int const lastChunk = (read < toRead); |
| 66 | ZSTD_EndDirective const mode = lastChunk ? ZSTD_e_end : ZSTD_e_continue; |
| 67 | /* Set the input buffer to what we just read. |
| 68 | * We compress until the input buffer is empty, each time flushing the |
| 69 | * output. |
| 70 | */ |
Yann Collet | 2065879 | 2016-08-17 01:48:43 +0200 | [diff] [blame] | 71 | ZSTD_inBuffer input = { buffIn, read, 0 }; |
Nick Terrell | f5cbee9 | 2019-03-22 14:57:23 -0700 | [diff] [blame] | 72 | int finished; |
| 73 | do { |
| 74 | /* Compress into the output buffer and write all of the output to |
| 75 | * the file so we can reuse the buffer next iteration. |
| 76 | */ |
Yann Collet | 2065879 | 2016-08-17 01:48:43 +0200 | [diff] [blame] | 77 | ZSTD_outBuffer output = { buffOut, buffOutSize, 0 }; |
Nick Terrell | f5cbee9 | 2019-03-22 14:57:23 -0700 | [diff] [blame] | 78 | size_t const remaining = ZSTD_compressStream2(cctx, &output , &input, mode); |
| 79 | CHECK_ZSTD(remaining); |
Yann Collet | 2065879 | 2016-08-17 01:48:43 +0200 | [diff] [blame] | 80 | fwrite_orDie(buffOut, output.pos, fout); |
Nick Terrell | f5cbee9 | 2019-03-22 14:57:23 -0700 | [diff] [blame] | 81 | /* If we're on the last chunk we're finished when zstd returns 0, |
| 82 | * which means its consumed all the input AND finished the frame. |
| 83 | * Otherwise, we're finished when we've consumed all the input. |
| 84 | */ |
| 85 | finished = lastChunk ? (remaining == 0) : (input.pos == input.size); |
| 86 | } while (!finished); |
Nick Terrell | 1d0c170 | 2019-04-05 18:11:17 -0700 | [diff] [blame] | 87 | CHECK(input.pos == input.size, |
| 88 | "Impossible: zstd only returns 0 when the input is completely consumed!"); |
Jan Kasiak | a821990 | 2019-09-01 15:35:53 -0400 | [diff] [blame] | 89 | |
| 90 | if (lastChunk) { |
| 91 | break; |
| 92 | } |
Yann Collet | 553b213 | 2016-08-12 18:42:25 +0200 | [diff] [blame] | 93 | } |
| 94 | |
Nick Terrell | f5cbee9 | 2019-03-22 14:57:23 -0700 | [diff] [blame] | 95 | ZSTD_freeCCtx(cctx); |
Yann Collet | 553b213 | 2016-08-12 18:42:25 +0200 | [diff] [blame] | 96 | fclose_orDie(fout); |
Nick Terrell | f5cbee9 | 2019-03-22 14:57:23 -0700 | [diff] [blame] | 97 | fclose_orDie(fin); |
| 98 | free(buffIn); |
Yann Collet | 553b213 | 2016-08-12 18:42:25 +0200 | [diff] [blame] | 99 | free(buffOut); |
| 100 | } |
| 101 | |
| 102 | |
Yann Collet | 1515f0b | 2018-08-16 14:40:47 -0700 | [diff] [blame] | 103 | static char* createOutFilename_orDie(const char* filename) |
Yann Collet | 553b213 | 2016-08-12 18:42:25 +0200 | [diff] [blame] | 104 | { |
| 105 | size_t const inL = strlen(filename); |
| 106 | size_t const outL = inL + 5; |
Yann Collet | 1515f0b | 2018-08-16 14:40:47 -0700 | [diff] [blame] | 107 | void* const outSpace = malloc_orDie(outL); |
Yann Collet | 553b213 | 2016-08-12 18:42:25 +0200 | [diff] [blame] | 108 | memset(outSpace, 0, outL); |
| 109 | strcat(outSpace, filename); |
| 110 | strcat(outSpace, ".zst"); |
Yann Collet | 1515f0b | 2018-08-16 14:40:47 -0700 | [diff] [blame] | 111 | return (char*)outSpace; |
Yann Collet | 553b213 | 2016-08-12 18:42:25 +0200 | [diff] [blame] | 112 | } |
| 113 | |
| 114 | int main(int argc, const char** argv) |
| 115 | { |
| 116 | const char* const exeName = argv[0]; |
Yann Collet | 553b213 | 2016-08-12 18:42:25 +0200 | [diff] [blame] | 117 | |
Martin Liska | 926d470 | 2021-10-04 08:23:57 +0200 | [diff] [blame] | 118 | if (argc < 2) { |
Yann Collet | 553b213 | 2016-08-12 18:42:25 +0200 | [diff] [blame] | 119 | printf("wrong arguments\n"); |
| 120 | printf("usage:\n"); |
Martin Liska | 926d470 | 2021-10-04 08:23:57 +0200 | [diff] [blame] | 121 | printf("%s FILE [LEVEL] [THREADS]\n", exeName); |
Yann Collet | 553b213 | 2016-08-12 18:42:25 +0200 | [diff] [blame] | 122 | return 1; |
| 123 | } |
| 124 | |
Martin Liska | 926d470 | 2021-10-04 08:23:57 +0200 | [diff] [blame] | 125 | int cLevel = 1; |
Yann Collet | 6ec18ae | 2023-04-26 12:45:23 -0700 | [diff] [blame] | 126 | int nbThreads = 1; |
Martin Liska | 926d470 | 2021-10-04 08:23:57 +0200 | [diff] [blame] | 127 | |
| 128 | if (argc >= 3) { |
| 129 | cLevel = atoi (argv[2]); |
| 130 | CHECK(cLevel != 0, "can't parse LEVEL!"); |
| 131 | } |
| 132 | |
| 133 | if (argc >= 4) { |
| 134 | nbThreads = atoi (argv[3]); |
| 135 | CHECK(nbThreads != 0, "can't parse THREADS!"); |
| 136 | } |
| 137 | |
niXman | 65e2cda | 2017-04-26 13:04:04 +0300 | [diff] [blame] | 138 | const char* const inFilename = argv[1]; |
| 139 | |
Yann Collet | 1515f0b | 2018-08-16 14:40:47 -0700 | [diff] [blame] | 140 | char* const outFilename = createOutFilename_orDie(inFilename); |
Martin Liska | 926d470 | 2021-10-04 08:23:57 +0200 | [diff] [blame] | 141 | compressFile_orDie(inFilename, outFilename, cLevel, nbThreads); |
Yann Collet | 553b213 | 2016-08-12 18:42:25 +0200 | [diff] [blame] | 142 | |
Yann Collet | 1515f0b | 2018-08-16 14:40:47 -0700 | [diff] [blame] | 143 | free(outFilename); /* not strictly required, since program execution stops there, |
Adrian Castro | e0f9dc0 | 2021-12-11 12:02:23 +0100 | [diff] [blame] | 144 | * but some static analyzer may complain otherwise */ |
Yann Collet | 553b213 | 2016-08-12 18:42:25 +0200 | [diff] [blame] | 145 | return 0; |
| 146 | } |