alecflett@chromium.org | d6d082e | 2013-05-03 23:02:57 +0000 | [diff] [blame] | 1 | // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style license that can be |
| 3 | // found in the LICENSE file. |
alecflett@chromium.org | d6d082e | 2013-05-03 23:02:57 +0000 | [diff] [blame] | 4 | #ifndef THIRD_PARTY_ZLIB_GOOGLE_ZIP_READER_H_ |
| 5 | #define THIRD_PARTY_ZLIB_GOOGLE_ZIP_READER_H_ |
| 6 | |
avi | aa96948 | 2015-12-27 13:36:49 -0800 | [diff] [blame] | 7 | #include <stddef.h> |
| 8 | #include <stdint.h> |
| 9 | |
dcheng | a90aed5 | 2016-04-22 16:49:07 -0700 | [diff] [blame] | 10 | #include <memory> |
alecflett@chromium.org | d6d082e | 2013-05-03 23:02:57 +0000 | [diff] [blame] | 11 | #include <string> |
| 12 | |
haven@chromium.org | 84ed265 | 2014-01-17 00:36:28 +0000 | [diff] [blame] | 13 | #include "base/callback.h" |
rvargas@chromium.org | 0d73765 | 2014-02-27 05:58:13 +0000 | [diff] [blame] | 14 | #include "base/files/file.h" |
alecflett@chromium.org | d6d082e | 2013-05-03 23:02:57 +0000 | [diff] [blame] | 15 | #include "base/files/file_path.h" |
thestig | f698159 | 2014-09-22 12:06:21 -0700 | [diff] [blame] | 16 | #include "base/files/file_util.h" |
avi | aa96948 | 2015-12-27 13:36:49 -0800 | [diff] [blame] | 17 | #include "base/macros.h" |
haven@chromium.org | 84ed265 | 2014-01-17 00:36:28 +0000 | [diff] [blame] | 18 | #include "base/memory/weak_ptr.h" |
avi@chromium.org | 47f1b55 | 2013-06-28 15:23:55 +0000 | [diff] [blame] | 19 | #include "base/time/time.h" |
alecflett@chromium.org | d6d082e | 2013-05-03 23:02:57 +0000 | [diff] [blame] | 20 | |
| 21 | #if defined(USE_SYSTEM_MINIZIP) |
| 22 | #include <minizip/unzip.h> |
| 23 | #else |
| 24 | #include "third_party/zlib/contrib/minizip/unzip.h" |
| 25 | #endif |
| 26 | |
| 27 | namespace zip { |
| 28 | |
grt | ebc765a | 2015-03-18 14:22:34 -0700 | [diff] [blame] | 29 | // A delegate interface used to stream out an entry; see |
| 30 | // ZipReader::ExtractCurrentEntry. |
| 31 | class WriterDelegate { |
| 32 | public: |
| 33 | virtual ~WriterDelegate() {} |
| 34 | |
| 35 | // Invoked once before any data is streamed out to pave the way (e.g., to open |
| 36 | // the output file). Return false on failure to cancel extraction. |
| 37 | virtual bool PrepareOutput() = 0; |
| 38 | |
| 39 | // Invoked to write the next chunk of data. Return false on failure to cancel |
| 40 | // extraction. |
| 41 | virtual bool WriteBytes(const char* data, int num_bytes) = 0; |
Joshua Pawlicki | e31b503 | 2018-02-06 20:24:51 +0000 | [diff] [blame] | 42 | |
| 43 | // Sets the last-modified time of the data. |
| 44 | virtual void SetTimeModified(const base::Time& time) = 0; |
grt | ebc765a | 2015-03-18 14:22:34 -0700 | [diff] [blame] | 45 | }; |
| 46 | |
alecflett@chromium.org | d6d082e | 2013-05-03 23:02:57 +0000 | [diff] [blame] | 47 | // This class is used for reading zip files. A typical use case of this |
| 48 | // class is to scan entries in a zip file and extract them. The code will |
| 49 | // look like: |
| 50 | // |
| 51 | // ZipReader reader; |
| 52 | // reader.Open(zip_file_path); |
| 53 | // while (reader.HasMore()) { |
| 54 | // reader.OpenCurrentEntryInZip(); |
Joshua Pawlicki | e31b503 | 2018-02-06 20:24:51 +0000 | [diff] [blame] | 55 | // const base::FilePath& entry_path = |
| 56 | // reader.current_entry_info()->file_path(); |
| 57 | // auto writer = CreateFilePathWriterDelegate(extract_dir, entry_path); |
| 58 | // reader.ExtractCurrentEntry(writer, std::numeric_limits<uint64_t>::max()); |
alecflett@chromium.org | d6d082e | 2013-05-03 23:02:57 +0000 | [diff] [blame] | 59 | // reader.AdvanceToNextEntry(); |
| 60 | // } |
| 61 | // |
joaoe@opera.com | 0002429 | 2014-06-20 18:12:13 +0000 | [diff] [blame] | 62 | // For simplicity, error checking is omitted in the example code above. The |
alecflett@chromium.org | d6d082e | 2013-05-03 23:02:57 +0000 | [diff] [blame] | 63 | // production code should check return values from all of these functions. |
| 64 | // |
alecflett@chromium.org | d6d082e | 2013-05-03 23:02:57 +0000 | [diff] [blame] | 65 | class ZipReader { |
| 66 | public: |
haven@chromium.org | 84ed265 | 2014-01-17 00:36:28 +0000 | [diff] [blame] | 67 | // A callback that is called when the operation is successful. |
Sylvain Defresne | ddebad2 | 2019-10-01 15:40:56 +0000 | [diff] [blame] | 68 | using SuccessCallback = base::OnceClosure; |
haven@chromium.org | 84ed265 | 2014-01-17 00:36:28 +0000 | [diff] [blame] | 69 | // A callback that is called when the operation fails. |
Sylvain Defresne | ddebad2 | 2019-10-01 15:40:56 +0000 | [diff] [blame] | 70 | using FailureCallback = base::OnceClosure; |
haven@chromium.org | 84ed265 | 2014-01-17 00:36:28 +0000 | [diff] [blame] | 71 | // A callback that is called periodically during the operation with the number |
| 72 | // of bytes that have been processed so far. |
Sylvain Defresne | ddebad2 | 2019-10-01 15:40:56 +0000 | [diff] [blame] | 73 | using ProgressCallback = base::RepeatingCallback<void(int64_t)>; |
haven@chromium.org | 84ed265 | 2014-01-17 00:36:28 +0000 | [diff] [blame] | 74 | |
alecflett@chromium.org | d6d082e | 2013-05-03 23:02:57 +0000 | [diff] [blame] | 75 | // This class represents information of an entry (file or directory) in |
| 76 | // a zip file. |
| 77 | class EntryInfo { |
| 78 | public: |
| 79 | EntryInfo(const std::string& filename_in_zip, |
| 80 | const unz_file_info& raw_file_info); |
| 81 | |
| 82 | // Returns the file path. The path is usually relative like |
| 83 | // "foo/bar.txt", but if it's absolute, is_unsafe() returns true. |
| 84 | const base::FilePath& file_path() const { return file_path_; } |
| 85 | |
| 86 | // Returns the size of the original file (i.e. after uncompressed). |
| 87 | // Returns 0 if the entry is a directory. |
joaoe@opera.com | 7e84479 | 2014-05-07 20:53:02 +0000 | [diff] [blame] | 88 | // Note: this value should not be trusted, because it is stored as metadata |
| 89 | // in the zip archive and can be different from the real uncompressed size. |
avi | aa96948 | 2015-12-27 13:36:49 -0800 | [diff] [blame] | 90 | int64_t original_size() const { return original_size_; } |
alecflett@chromium.org | d6d082e | 2013-05-03 23:02:57 +0000 | [diff] [blame] | 91 | |
joaoe@opera.com | ce54c1b | 2013-12-17 14:40:31 +0000 | [diff] [blame] | 92 | // Returns the last modified time. If the time stored in the zip file was |
| 93 | // not valid, the unix epoch will be returned. |
| 94 | // |
| 95 | // The time stored in the zip archive uses the MS-DOS date and time format. |
| 96 | // http://msdn.microsoft.com/en-us/library/ms724247(v=vs.85).aspx |
| 97 | // As such the following limitations apply: |
| 98 | // * only years from 1980 to 2107 can be represented. |
| 99 | // * the time stamp has a 2 second resolution. |
| 100 | // * there's no timezone information, so the time is interpreted as local. |
alecflett@chromium.org | d6d082e | 2013-05-03 23:02:57 +0000 | [diff] [blame] | 101 | base::Time last_modified() const { return last_modified_; } |
| 102 | |
| 103 | // Returns true if the entry is a directory. |
| 104 | bool is_directory() const { return is_directory_; } |
| 105 | |
| 106 | // Returns true if the entry is unsafe, like having ".." or invalid |
| 107 | // UTF-8 characters in its file name, or the file path is absolute. |
| 108 | bool is_unsafe() const { return is_unsafe_; } |
| 109 | |
Daniel Rubery | ad6f586 | 2018-08-22 16:53:41 +0000 | [diff] [blame] | 110 | // Returns true if the entry is encrypted. |
| 111 | bool is_encrypted() const { return is_encrypted_; } |
| 112 | |
alecflett@chromium.org | d6d082e | 2013-05-03 23:02:57 +0000 | [diff] [blame] | 113 | private: |
| 114 | const base::FilePath file_path_; |
avi | aa96948 | 2015-12-27 13:36:49 -0800 | [diff] [blame] | 115 | int64_t original_size_; |
alecflett@chromium.org | d6d082e | 2013-05-03 23:02:57 +0000 | [diff] [blame] | 116 | base::Time last_modified_; |
| 117 | bool is_directory_; |
| 118 | bool is_unsafe_; |
Daniel Rubery | ad6f586 | 2018-08-22 16:53:41 +0000 | [diff] [blame] | 119 | bool is_encrypted_; |
alecflett@chromium.org | d6d082e | 2013-05-03 23:02:57 +0000 | [diff] [blame] | 120 | DISALLOW_COPY_AND_ASSIGN(EntryInfo); |
| 121 | }; |
| 122 | |
| 123 | ZipReader(); |
| 124 | ~ZipReader(); |
| 125 | |
| 126 | // Opens the zip file specified by |zip_file_path|. Returns true on |
| 127 | // success. |
| 128 | bool Open(const base::FilePath& zip_file_path); |
| 129 | |
jeremysspiegel | a6bba37 | 2014-11-19 15:53:16 -0800 | [diff] [blame] | 130 | // Opens the zip file referred to by the platform file |zip_fd|, without |
| 131 | // taking ownership of |zip_fd|. Returns true on success. |
alecflett@chromium.org | d6d082e | 2013-05-03 23:02:57 +0000 | [diff] [blame] | 132 | bool OpenFromPlatformFile(base::PlatformFile zip_fd); |
| 133 | |
| 134 | // Opens the zip data stored in |data|. This class uses a weak reference to |
| 135 | // the given sring while extracting files, i.e. the caller should keep the |
| 136 | // string until it finishes extracting files. |
| 137 | bool OpenFromString(const std::string& data); |
| 138 | |
| 139 | // Closes the currently opened zip file. This function is called in the |
| 140 | // destructor of the class, so you usually don't need to call this. |
| 141 | void Close(); |
| 142 | |
| 143 | // Returns true if there is at least one entry to read. This function is |
| 144 | // used to scan entries with AdvanceToNextEntry(), like: |
| 145 | // |
| 146 | // while (reader.HasMore()) { |
| 147 | // // Do something with the current file here. |
| 148 | // reader.AdvanceToNextEntry(); |
| 149 | // } |
| 150 | bool HasMore(); |
| 151 | |
| 152 | // Advances the next entry. Returns true on success. |
| 153 | bool AdvanceToNextEntry(); |
| 154 | |
| 155 | // Opens the current entry in the zip file. On success, returns true and |
| 156 | // updates the the current entry state (i.e. current_entry_info() is |
| 157 | // updated). This function should be called before operations over the |
| 158 | // current entry like ExtractCurrentEntryToFile(). |
| 159 | // |
| 160 | // Note that there is no CloseCurrentEntryInZip(). The the current entry |
| 161 | // state is reset automatically as needed. |
| 162 | bool OpenCurrentEntryInZip(); |
| 163 | |
mortonm | b4298b0 | 2017-08-04 07:57:41 -0700 | [diff] [blame] | 164 | // Extracts |num_bytes_to_extract| bytes of the current entry to |delegate|, |
| 165 | // starting from the beginning of the entry. Return value specifies whether |
| 166 | // the entire file was extracted. |
| 167 | bool ExtractCurrentEntry(WriterDelegate* delegate, |
| 168 | uint64_t num_bytes_to_extract) const; |
grt | ebc765a | 2015-03-18 14:22:34 -0700 | [diff] [blame] | 169 | |
haven@chromium.org | 84ed265 | 2014-01-17 00:36:28 +0000 | [diff] [blame] | 170 | // Asynchronously extracts the current entry to the given output file path. |
| 171 | // If the current entry is a directory it just creates the directory |
| 172 | // synchronously instead. OpenCurrentEntryInZip() must be called beforehand. |
| 173 | // success_callback will be called on success and failure_callback will be |
| 174 | // called on failure. progress_callback will be called at least once. |
| 175 | // Callbacks will be posted to the current MessageLoop in-order. |
| 176 | void ExtractCurrentEntryToFilePathAsync( |
| 177 | const base::FilePath& output_file_path, |
Sylvain Defresne | ddebad2 | 2019-10-01 15:40:56 +0000 | [diff] [blame] | 178 | SuccessCallback success_callback, |
| 179 | FailureCallback failure_callback, |
haven@chromium.org | 84ed265 | 2014-01-17 00:36:28 +0000 | [diff] [blame] | 180 | const ProgressCallback& progress_callback); |
| 181 | |
mortonm | b4298b0 | 2017-08-04 07:57:41 -0700 | [diff] [blame] | 182 | // Extracts the current entry into memory. If the current entry is a |
| 183 | // directory, the |output| parameter is set to the empty string. If the |
| 184 | // current entry is a file, the |output| parameter is filled with its |
| 185 | // contents. OpenCurrentEntryInZip() must be called beforehand. Note: the |
| 186 | // |output| parameter can be filled with a big amount of data, avoid passing |
| 187 | // it around by value, but by reference or pointer. Note: the value returned |
| 188 | // by EntryInfo::original_size() cannot be trusted, so the real size of the |
| 189 | // uncompressed contents can be different. |max_read_bytes| limits the ammount |
| 190 | // of memory used to carry the entry. Returns true if the entire content is |
| 191 | // read. If the entry is bigger than |max_read_bytes|, returns false and |
| 192 | // |output| is filled with |max_read_bytes| of data. If an error occurs, |
| 193 | // returns false, and |output| is set to the empty string. |
| 194 | bool ExtractCurrentEntryToString(uint64_t max_read_bytes, |
| 195 | std::string* output) const; |
joaoe@opera.com | 0002429 | 2014-06-20 18:12:13 +0000 | [diff] [blame] | 196 | |
alecflett@chromium.org | d6d082e | 2013-05-03 23:02:57 +0000 | [diff] [blame] | 197 | // Returns the current entry info. Returns NULL if the current entry is |
| 198 | // not yet opened. OpenCurrentEntryInZip() must be called beforehand. |
| 199 | EntryInfo* current_entry_info() const { |
| 200 | return current_entry_info_.get(); |
| 201 | } |
| 202 | |
| 203 | // Returns the number of entries in the zip file. |
| 204 | // Open() must be called beforehand. |
| 205 | int num_entries() const { return num_entries_; } |
| 206 | |
| 207 | private: |
| 208 | // Common code used both in Open and OpenFromFd. |
| 209 | bool OpenInternal(); |
| 210 | |
| 211 | // Resets the internal state. |
| 212 | void Reset(); |
| 213 | |
haven@chromium.org | 84ed265 | 2014-01-17 00:36:28 +0000 | [diff] [blame] | 214 | // Extracts a chunk of the file to the target. Will post a task for the next |
| 215 | // chunk and success/failure/progress callbacks as necessary. |
rvargas@chromium.org | 0d73765 | 2014-02-27 05:58:13 +0000 | [diff] [blame] | 216 | void ExtractChunk(base::File target_file, |
Sylvain Defresne | ddebad2 | 2019-10-01 15:40:56 +0000 | [diff] [blame] | 217 | SuccessCallback success_callback, |
| 218 | FailureCallback failure_callback, |
haven@chromium.org | 84ed265 | 2014-01-17 00:36:28 +0000 | [diff] [blame] | 219 | const ProgressCallback& progress_callback, |
avi | aa96948 | 2015-12-27 13:36:49 -0800 | [diff] [blame] | 220 | const int64_t offset); |
haven@chromium.org | 84ed265 | 2014-01-17 00:36:28 +0000 | [diff] [blame] | 221 | |
alecflett@chromium.org | d6d082e | 2013-05-03 23:02:57 +0000 | [diff] [blame] | 222 | unzFile zip_file_; |
| 223 | int num_entries_; |
| 224 | bool reached_end_; |
dcheng | a90aed5 | 2016-04-22 16:49:07 -0700 | [diff] [blame] | 225 | std::unique_ptr<EntryInfo> current_entry_info_; |
alecflett@chromium.org | d6d082e | 2013-05-03 23:02:57 +0000 | [diff] [blame] | 226 | |
Jeremy Roman | 2610552 | 2019-08-12 15:45:27 +0000 | [diff] [blame] | 227 | base::WeakPtrFactory<ZipReader> weak_ptr_factory_{this}; |
haven@chromium.org | 84ed265 | 2014-01-17 00:36:28 +0000 | [diff] [blame] | 228 | |
alecflett@chromium.org | d6d082e | 2013-05-03 23:02:57 +0000 | [diff] [blame] | 229 | DISALLOW_COPY_AND_ASSIGN(ZipReader); |
| 230 | }; |
| 231 | |
grt | ebc765a | 2015-03-18 14:22:34 -0700 | [diff] [blame] | 232 | // A writer delegate that writes to a given File. |
| 233 | class FileWriterDelegate : public WriterDelegate { |
| 234 | public: |
Joshua Pawlicki | e31b503 | 2018-02-06 20:24:51 +0000 | [diff] [blame] | 235 | // Constructs a FileWriterDelegate that manipulates |file|. The delegate will |
| 236 | // not own |file|, therefore the caller must guarantee |file| will outlive the |
| 237 | // delegate. |
grt | ebc765a | 2015-03-18 14:22:34 -0700 | [diff] [blame] | 238 | explicit FileWriterDelegate(base::File* file); |
| 239 | |
Joshua Pawlicki | e31b503 | 2018-02-06 20:24:51 +0000 | [diff] [blame] | 240 | // Constructs a FileWriterDelegate that takes ownership of |file|. |
| 241 | explicit FileWriterDelegate(std::unique_ptr<base::File> file); |
| 242 | |
grt | ebc765a | 2015-03-18 14:22:34 -0700 | [diff] [blame] | 243 | // Truncates the file to the number of bytes written. |
| 244 | ~FileWriterDelegate() override; |
| 245 | |
| 246 | // WriterDelegate methods: |
| 247 | |
| 248 | // Seeks to the beginning of the file, returning false if the seek fails. |
| 249 | bool PrepareOutput() override; |
| 250 | |
| 251 | // Writes |num_bytes| bytes of |data| to the file, returning false on error or |
| 252 | // if not all bytes could be written. |
| 253 | bool WriteBytes(const char* data, int num_bytes) override; |
| 254 | |
Joshua Pawlicki | e31b503 | 2018-02-06 20:24:51 +0000 | [diff] [blame] | 255 | // Sets the last-modified time of the data. |
| 256 | void SetTimeModified(const base::Time& time) override; |
| 257 | |
Daniel Rubery | bb293c3 | 2019-03-29 16:42:36 +0000 | [diff] [blame] | 258 | // Return the actual size of the file. |
| 259 | int64_t file_length() { return file_length_; } |
| 260 | |
grt | ebc765a | 2015-03-18 14:22:34 -0700 | [diff] [blame] | 261 | private: |
Joshua Pawlicki | e31b503 | 2018-02-06 20:24:51 +0000 | [diff] [blame] | 262 | // The file the delegate modifies. |
grt | ebc765a | 2015-03-18 14:22:34 -0700 | [diff] [blame] | 263 | base::File* file_; |
Joshua Pawlicki | e31b503 | 2018-02-06 20:24:51 +0000 | [diff] [blame] | 264 | |
| 265 | // The delegate can optionally own the file it modifies, in which case |
| 266 | // owned_file_ is set and file_ is an alias for owned_file_. |
| 267 | std::unique_ptr<base::File> owned_file_; |
| 268 | |
| 269 | int64_t file_length_ = 0; |
grt | ebc765a | 2015-03-18 14:22:34 -0700 | [diff] [blame] | 270 | |
| 271 | DISALLOW_COPY_AND_ASSIGN(FileWriterDelegate); |
| 272 | }; |
| 273 | |
Joshua Pawlicki | e31b503 | 2018-02-06 20:24:51 +0000 | [diff] [blame] | 274 | // A writer delegate that writes a file at a given path. |
| 275 | class FilePathWriterDelegate : public WriterDelegate { |
| 276 | public: |
| 277 | explicit FilePathWriterDelegate(const base::FilePath& output_file_path); |
| 278 | ~FilePathWriterDelegate() override; |
| 279 | |
| 280 | // WriterDelegate methods: |
| 281 | |
| 282 | // Creates the output file and any necessary intermediate directories. |
| 283 | bool PrepareOutput() override; |
| 284 | |
| 285 | // Writes |num_bytes| bytes of |data| to the file, returning false if not all |
| 286 | // bytes could be written. |
| 287 | bool WriteBytes(const char* data, int num_bytes) override; |
| 288 | |
| 289 | // Sets the last-modified time of the data. |
| 290 | void SetTimeModified(const base::Time& time) override; |
| 291 | |
| 292 | private: |
| 293 | base::FilePath output_file_path_; |
| 294 | base::File file_; |
| 295 | |
| 296 | DISALLOW_COPY_AND_ASSIGN(FilePathWriterDelegate); |
| 297 | }; |
| 298 | |
alecflett@chromium.org | d6d082e | 2013-05-03 23:02:57 +0000 | [diff] [blame] | 299 | } // namespace zip |
| 300 | |
| 301 | #endif // THIRD_PARTY_ZLIB_GOOGLE_ZIP_READER_H_ |