Yann Collet | e21384f | 2017-08-31 12:11:57 -0700 | [diff] [blame] | 1 | /* |
Elliott Hughes | 44aba64 | 2023-09-12 20:18:59 +0000 | [diff] [blame] | 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. |
Nick Terrell | c932520 | 2016-09-01 15:22:19 -0700 | [diff] [blame] | 3 | * All rights reserved. |
| 4 | * |
Yann Collet | e21384f | 2017-08-31 12:11:57 -0700 | [diff] [blame] | 5 | * This source code is licensed under both the BSD-style license (found in the |
| 6 | * LICENSE file in the root directory of this source tree) and the GPLv2 (found |
| 7 | * in the COPYING file in the root directory of this source tree). |
Nick Terrell | c932520 | 2016-09-01 15:22:19 -0700 | [diff] [blame] | 8 | */ |
| 9 | #pragma once |
| 10 | |
| 11 | #include "ErrorHolder.h" |
Nick Terrell | baa152e | 2016-10-12 19:02:27 -0700 | [diff] [blame] | 12 | #include "Logging.h" |
Nick Terrell | c932520 | 2016-09-01 15:22:19 -0700 | [diff] [blame] | 13 | #include "Options.h" |
| 14 | #include "utils/Buffer.h" |
| 15 | #include "utils/Range.h" |
Nick Terrell | 48294b5 | 2016-10-12 15:18:16 -0700 | [diff] [blame] | 16 | #include "utils/ResourcePool.h" |
Nick Terrell | c932520 | 2016-09-01 15:22:19 -0700 | [diff] [blame] | 17 | #include "utils/ThreadPool.h" |
| 18 | #include "utils/WorkQueue.h" |
| 19 | #define ZSTD_STATIC_LINKING_ONLY |
sen | 40def70 | 2021-05-13 14:41:21 -0400 | [diff] [blame] | 20 | #define ZSTD_DISABLE_DEPRECATE_WARNINGS /* No deprecation warnings, pzstd itself is deprecated |
| 21 | * and uses deprecated functions |
| 22 | */ |
Nick Terrell | c932520 | 2016-09-01 15:22:19 -0700 | [diff] [blame] | 23 | #include "zstd.h" |
| 24 | #undef ZSTD_STATIC_LINKING_ONLY |
| 25 | |
| 26 | #include <cstddef> |
Nick Terrell | 823bf3d | 2016-09-06 20:11:02 -0700 | [diff] [blame] | 27 | #include <cstdint> |
Nick Terrell | c932520 | 2016-09-01 15:22:19 -0700 | [diff] [blame] | 28 | #include <memory> |
| 29 | |
| 30 | namespace pzstd { |
| 31 | /** |
| 32 | * Runs pzstd with `options` and returns the number of bytes written. |
| 33 | * An error occurred if `errorHandler.hasError()`. |
| 34 | * |
| 35 | * @param options The pzstd options to use for (de)compression |
Nick Terrell | 254c5b1 | 2016-09-21 14:29:47 -0700 | [diff] [blame] | 36 | * @returns 0 upon success and non-zero on failure. |
Nick Terrell | c932520 | 2016-09-01 15:22:19 -0700 | [diff] [blame] | 37 | */ |
Nick Terrell | 254c5b1 | 2016-09-21 14:29:47 -0700 | [diff] [blame] | 38 | int pzstdMain(const Options& options); |
Nick Terrell | c932520 | 2016-09-01 15:22:19 -0700 | [diff] [blame] | 39 | |
Nick Terrell | 48294b5 | 2016-10-12 15:18:16 -0700 | [diff] [blame] | 40 | class SharedState { |
| 41 | public: |
Nick Terrell | baa152e | 2016-10-12 19:02:27 -0700 | [diff] [blame] | 42 | SharedState(const Options& options) : log(options.verbosity) { |
| 43 | if (!options.decompress) { |
| 44 | auto parameters = options.determineParameters(); |
Nick Terrell | e9e151c | 2016-10-12 17:23:38 -0700 | [diff] [blame] | 45 | cStreamPool.reset(new ResourcePool<ZSTD_CStream>{ |
Nick Terrell | f147fcc | 2016-11-15 16:39:09 -0800 | [diff] [blame] | 46 | [this, parameters]() -> ZSTD_CStream* { |
Nick Terrell | 5aa5aa4 | 2020-05-22 22:26:02 -0700 | [diff] [blame] | 47 | this->log(kLogVerbose, "%s\n", "Creating new ZSTD_CStream"); |
Nick Terrell | e9e151c | 2016-10-12 17:23:38 -0700 | [diff] [blame] | 48 | auto zcs = ZSTD_createCStream(); |
| 49 | if (zcs) { |
| 50 | auto err = ZSTD_initCStream_advanced( |
| 51 | zcs, nullptr, 0, parameters, 0); |
| 52 | if (ZSTD_isError(err)) { |
| 53 | ZSTD_freeCStream(zcs); |
| 54 | return nullptr; |
| 55 | } |
| 56 | } |
| 57 | return zcs; |
| 58 | }, |
| 59 | [](ZSTD_CStream *zcs) { |
| 60 | ZSTD_freeCStream(zcs); |
| 61 | }}); |
| 62 | } else { |
| 63 | dStreamPool.reset(new ResourcePool<ZSTD_DStream>{ |
Nick Terrell | f147fcc | 2016-11-15 16:39:09 -0800 | [diff] [blame] | 64 | [this]() -> ZSTD_DStream* { |
Nick Terrell | 5aa5aa4 | 2020-05-22 22:26:02 -0700 | [diff] [blame] | 65 | this->log(kLogVerbose, "%s\n", "Creating new ZSTD_DStream"); |
Nick Terrell | e9e151c | 2016-10-12 17:23:38 -0700 | [diff] [blame] | 66 | auto zds = ZSTD_createDStream(); |
| 67 | if (zds) { |
| 68 | auto err = ZSTD_initDStream(zds); |
| 69 | if (ZSTD_isError(err)) { |
| 70 | ZSTD_freeDStream(zds); |
| 71 | return nullptr; |
| 72 | } |
| 73 | } |
| 74 | return zds; |
| 75 | }, |
| 76 | [](ZSTD_DStream *zds) { |
| 77 | ZSTD_freeDStream(zds); |
| 78 | }}); |
| 79 | } |
| 80 | } |
| 81 | |
Nick Terrell | f147fcc | 2016-11-15 16:39:09 -0800 | [diff] [blame] | 82 | ~SharedState() { |
| 83 | // The resource pools have references to this, so destroy them first. |
| 84 | cStreamPool.reset(); |
| 85 | dStreamPool.reset(); |
| 86 | } |
| 87 | |
Nick Terrell | baa152e | 2016-10-12 19:02:27 -0700 | [diff] [blame] | 88 | Logger log; |
Nick Terrell | 48294b5 | 2016-10-12 15:18:16 -0700 | [diff] [blame] | 89 | ErrorHolder errorHolder; |
Nick Terrell | e9e151c | 2016-10-12 17:23:38 -0700 | [diff] [blame] | 90 | std::unique_ptr<ResourcePool<ZSTD_CStream>> cStreamPool; |
| 91 | std::unique_ptr<ResourcePool<ZSTD_DStream>> dStreamPool; |
Nick Terrell | 48294b5 | 2016-10-12 15:18:16 -0700 | [diff] [blame] | 92 | }; |
| 93 | |
Nick Terrell | c932520 | 2016-09-01 15:22:19 -0700 | [diff] [blame] | 94 | /** |
| 95 | * Streams input from `fd`, breaks input up into chunks, and compresses each |
| 96 | * chunk independently. Output of each chunk gets streamed to a queue, and |
| 97 | * the output queues get put into `chunks` in order. |
| 98 | * |
Nick Terrell | 48294b5 | 2016-10-12 15:18:16 -0700 | [diff] [blame] | 99 | * @param state The shared state |
Nick Terrell | c932520 | 2016-09-01 15:22:19 -0700 | [diff] [blame] | 100 | * @param chunks Each compression jobs output queue gets `pushed()` here |
| 101 | * as soon as it is available |
| 102 | * @param executor The thread pool to run compression jobs in |
| 103 | * @param fd The input file descriptor |
| 104 | * @param size The size of the input file if known, 0 otherwise |
| 105 | * @param numThreads The number of threads in the thread pool |
| 106 | * @param parameters The zstd parameters to use for compression |
Nick Terrell | d249889 | 2016-09-23 12:55:21 -0700 | [diff] [blame] | 107 | * @returns The number of bytes read from the file |
Nick Terrell | c932520 | 2016-09-01 15:22:19 -0700 | [diff] [blame] | 108 | */ |
Nick Terrell | d249889 | 2016-09-23 12:55:21 -0700 | [diff] [blame] | 109 | std::uint64_t asyncCompressChunks( |
Nick Terrell | 48294b5 | 2016-10-12 15:18:16 -0700 | [diff] [blame] | 110 | SharedState& state, |
Nick Terrell | c932520 | 2016-09-01 15:22:19 -0700 | [diff] [blame] | 111 | WorkQueue<std::shared_ptr<BufferWorkQueue>>& chunks, |
| 112 | ThreadPool& executor, |
| 113 | FILE* fd, |
Nick Terrell | 823bf3d | 2016-09-06 20:11:02 -0700 | [diff] [blame] | 114 | std::uintmax_t size, |
Nick Terrell | c932520 | 2016-09-01 15:22:19 -0700 | [diff] [blame] | 115 | std::size_t numThreads, |
| 116 | ZSTD_parameters parameters); |
| 117 | |
| 118 | /** |
| 119 | * Streams input from `fd`. If pzstd headers are available it breaks the input |
| 120 | * up into independent frames. It sends each frame to an independent |
| 121 | * decompression job. Output of each frame gets streamed to a queue, and |
| 122 | * the output queues get put into `frames` in order. |
| 123 | * |
Nick Terrell | 48294b5 | 2016-10-12 15:18:16 -0700 | [diff] [blame] | 124 | * @param state The shared state |
Nick Terrell | c932520 | 2016-09-01 15:22:19 -0700 | [diff] [blame] | 125 | * @param frames Each decompression jobs output queue gets `pushed()` here |
| 126 | * as soon as it is available |
| 127 | * @param executor The thread pool to run compression jobs in |
| 128 | * @param fd The input file descriptor |
Nick Terrell | d249889 | 2016-09-23 12:55:21 -0700 | [diff] [blame] | 129 | * @returns The number of bytes read from the file |
Nick Terrell | c932520 | 2016-09-01 15:22:19 -0700 | [diff] [blame] | 130 | */ |
Nick Terrell | d249889 | 2016-09-23 12:55:21 -0700 | [diff] [blame] | 131 | std::uint64_t asyncDecompressFrames( |
Nick Terrell | 48294b5 | 2016-10-12 15:18:16 -0700 | [diff] [blame] | 132 | SharedState& state, |
Nick Terrell | c932520 | 2016-09-01 15:22:19 -0700 | [diff] [blame] | 133 | WorkQueue<std::shared_ptr<BufferWorkQueue>>& frames, |
| 134 | ThreadPool& executor, |
| 135 | FILE* fd); |
| 136 | |
| 137 | /** |
| 138 | * Streams input in from each queue in `outs` in order, and writes the data to |
| 139 | * `outputFd`. |
| 140 | * |
Nick Terrell | 48294b5 | 2016-10-12 15:18:16 -0700 | [diff] [blame] | 141 | * @param state The shared state |
Nick Terrell | 254c5b1 | 2016-09-21 14:29:47 -0700 | [diff] [blame] | 142 | * @param outs A queue of output queues, one for each |
| 143 | * (de)compression job. |
| 144 | * @param outputFd The file descriptor to write to |
| 145 | * @param decompress Are we decompressing? |
| 146 | * @returns The number of bytes written |
Nick Terrell | c932520 | 2016-09-01 15:22:19 -0700 | [diff] [blame] | 147 | */ |
Nick Terrell | d249889 | 2016-09-23 12:55:21 -0700 | [diff] [blame] | 148 | std::uint64_t writeFile( |
Nick Terrell | 48294b5 | 2016-10-12 15:18:16 -0700 | [diff] [blame] | 149 | SharedState& state, |
Nick Terrell | c932520 | 2016-09-01 15:22:19 -0700 | [diff] [blame] | 150 | WorkQueue<std::shared_ptr<BufferWorkQueue>>& outs, |
| 151 | FILE* outputFd, |
Nick Terrell | baa152e | 2016-10-12 19:02:27 -0700 | [diff] [blame] | 152 | bool decompress); |
Nick Terrell | c932520 | 2016-09-01 15:22:19 -0700 | [diff] [blame] | 153 | } |