blob: d442d428591e50aa339c96625547f8aadd2a9310 [file] [log] [blame]
alecflett@chromium.orgd6d082e2013-05-03 23:02:57 +00001// Copyright (c) 2011 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
alecflett@chromium.orgd6d082e2013-05-03 23:02:57 +00004#ifndef THIRD_PARTY_ZLIB_GOOGLE_ZIP_READER_H_
5#define THIRD_PARTY_ZLIB_GOOGLE_ZIP_READER_H_
6
aviaa969482015-12-27 13:36:49 -08007#include <stddef.h>
8#include <stdint.h>
9
dchenga90aed52016-04-22 16:49:07 -070010#include <memory>
alecflett@chromium.orgd6d082e2013-05-03 23:02:57 +000011#include <string>
12
haven@chromium.org84ed2652014-01-17 00:36:28 +000013#include "base/callback.h"
rvargas@chromium.org0d737652014-02-27 05:58:13 +000014#include "base/files/file.h"
alecflett@chromium.orgd6d082e2013-05-03 23:02:57 +000015#include "base/files/file_path.h"
thestigf6981592014-09-22 12:06:21 -070016#include "base/files/file_util.h"
aviaa969482015-12-27 13:36:49 -080017#include "base/macros.h"
haven@chromium.org84ed2652014-01-17 00:36:28 +000018#include "base/memory/weak_ptr.h"
avi@chromium.org47f1b552013-06-28 15:23:55 +000019#include "base/time/time.h"
alecflett@chromium.orgd6d082e2013-05-03 23:02:57 +000020
21#if defined(USE_SYSTEM_MINIZIP)
22#include <minizip/unzip.h>
23#else
24#include "third_party/zlib/contrib/minizip/unzip.h"
25#endif
26
27namespace zip {
28
grtebc765a2015-03-18 14:22:34 -070029// A delegate interface used to stream out an entry; see
30// ZipReader::ExtractCurrentEntry.
31class WriterDelegate {
32 public:
33 virtual ~WriterDelegate() {}
34
35 // Invoked once before any data is streamed out to pave the way (e.g., to open
36 // the output file). Return false on failure to cancel extraction.
37 virtual bool PrepareOutput() = 0;
38
39 // Invoked to write the next chunk of data. Return false on failure to cancel
40 // extraction.
41 virtual bool WriteBytes(const char* data, int num_bytes) = 0;
Joshua Pawlickie31b5032018-02-06 20:24:51 +000042
43 // Sets the last-modified time of the data.
44 virtual void SetTimeModified(const base::Time& time) = 0;
grtebc765a2015-03-18 14:22:34 -070045};
46
alecflett@chromium.orgd6d082e2013-05-03 23:02:57 +000047// This class is used for reading zip files. A typical use case of this
48// class is to scan entries in a zip file and extract them. The code will
49// look like:
50//
51// ZipReader reader;
52// reader.Open(zip_file_path);
53// while (reader.HasMore()) {
54// reader.OpenCurrentEntryInZip();
Joshua Pawlickie31b5032018-02-06 20:24:51 +000055// const base::FilePath& entry_path =
56// reader.current_entry_info()->file_path();
57// auto writer = CreateFilePathWriterDelegate(extract_dir, entry_path);
58// reader.ExtractCurrentEntry(writer, std::numeric_limits<uint64_t>::max());
alecflett@chromium.orgd6d082e2013-05-03 23:02:57 +000059// reader.AdvanceToNextEntry();
60// }
61//
joaoe@opera.com00024292014-06-20 18:12:13 +000062// For simplicity, error checking is omitted in the example code above. The
alecflett@chromium.orgd6d082e2013-05-03 23:02:57 +000063// production code should check return values from all of these functions.
64//
alecflett@chromium.orgd6d082e2013-05-03 23:02:57 +000065class ZipReader {
66 public:
haven@chromium.org84ed2652014-01-17 00:36:28 +000067 // A callback that is called when the operation is successful.
Sylvain Defresneddebad22019-10-01 15:40:56 +000068 using SuccessCallback = base::OnceClosure;
haven@chromium.org84ed2652014-01-17 00:36:28 +000069 // A callback that is called when the operation fails.
Sylvain Defresneddebad22019-10-01 15:40:56 +000070 using FailureCallback = base::OnceClosure;
haven@chromium.org84ed2652014-01-17 00:36:28 +000071 // A callback that is called periodically during the operation with the number
72 // of bytes that have been processed so far.
Sylvain Defresneddebad22019-10-01 15:40:56 +000073 using ProgressCallback = base::RepeatingCallback<void(int64_t)>;
haven@chromium.org84ed2652014-01-17 00:36:28 +000074
alecflett@chromium.orgd6d082e2013-05-03 23:02:57 +000075 // This class represents information of an entry (file or directory) in
76 // a zip file.
77 class EntryInfo {
78 public:
79 EntryInfo(const std::string& filename_in_zip,
80 const unz_file_info& raw_file_info);
81
82 // Returns the file path. The path is usually relative like
83 // "foo/bar.txt", but if it's absolute, is_unsafe() returns true.
84 const base::FilePath& file_path() const { return file_path_; }
85
86 // Returns the size of the original file (i.e. after uncompressed).
87 // Returns 0 if the entry is a directory.
joaoe@opera.com7e844792014-05-07 20:53:02 +000088 // Note: this value should not be trusted, because it is stored as metadata
89 // in the zip archive and can be different from the real uncompressed size.
aviaa969482015-12-27 13:36:49 -080090 int64_t original_size() const { return original_size_; }
alecflett@chromium.orgd6d082e2013-05-03 23:02:57 +000091
joaoe@opera.comce54c1b2013-12-17 14:40:31 +000092 // Returns the last modified time. If the time stored in the zip file was
93 // not valid, the unix epoch will be returned.
94 //
95 // The time stored in the zip archive uses the MS-DOS date and time format.
96 // http://msdn.microsoft.com/en-us/library/ms724247(v=vs.85).aspx
97 // As such the following limitations apply:
98 // * only years from 1980 to 2107 can be represented.
99 // * the time stamp has a 2 second resolution.
100 // * there's no timezone information, so the time is interpreted as local.
alecflett@chromium.orgd6d082e2013-05-03 23:02:57 +0000101 base::Time last_modified() const { return last_modified_; }
102
103 // Returns true if the entry is a directory.
104 bool is_directory() const { return is_directory_; }
105
106 // Returns true if the entry is unsafe, like having ".." or invalid
107 // UTF-8 characters in its file name, or the file path is absolute.
108 bool is_unsafe() const { return is_unsafe_; }
109
Daniel Ruberyad6f5862018-08-22 16:53:41 +0000110 // Returns true if the entry is encrypted.
111 bool is_encrypted() const { return is_encrypted_; }
112
alecflett@chromium.orgd6d082e2013-05-03 23:02:57 +0000113 private:
114 const base::FilePath file_path_;
aviaa969482015-12-27 13:36:49 -0800115 int64_t original_size_;
alecflett@chromium.orgd6d082e2013-05-03 23:02:57 +0000116 base::Time last_modified_;
117 bool is_directory_;
118 bool is_unsafe_;
Daniel Ruberyad6f5862018-08-22 16:53:41 +0000119 bool is_encrypted_;
alecflett@chromium.orgd6d082e2013-05-03 23:02:57 +0000120 DISALLOW_COPY_AND_ASSIGN(EntryInfo);
121 };
122
123 ZipReader();
124 ~ZipReader();
125
126 // Opens the zip file specified by |zip_file_path|. Returns true on
127 // success.
128 bool Open(const base::FilePath& zip_file_path);
129
jeremysspiegela6bba372014-11-19 15:53:16 -0800130 // Opens the zip file referred to by the platform file |zip_fd|, without
131 // taking ownership of |zip_fd|. Returns true on success.
alecflett@chromium.orgd6d082e2013-05-03 23:02:57 +0000132 bool OpenFromPlatformFile(base::PlatformFile zip_fd);
133
134 // Opens the zip data stored in |data|. This class uses a weak reference to
135 // the given sring while extracting files, i.e. the caller should keep the
136 // string until it finishes extracting files.
137 bool OpenFromString(const std::string& data);
138
139 // Closes the currently opened zip file. This function is called in the
140 // destructor of the class, so you usually don't need to call this.
141 void Close();
142
143 // Returns true if there is at least one entry to read. This function is
144 // used to scan entries with AdvanceToNextEntry(), like:
145 //
146 // while (reader.HasMore()) {
147 // // Do something with the current file here.
148 // reader.AdvanceToNextEntry();
149 // }
150 bool HasMore();
151
152 // Advances the next entry. Returns true on success.
153 bool AdvanceToNextEntry();
154
155 // Opens the current entry in the zip file. On success, returns true and
156 // updates the the current entry state (i.e. current_entry_info() is
157 // updated). This function should be called before operations over the
158 // current entry like ExtractCurrentEntryToFile().
159 //
160 // Note that there is no CloseCurrentEntryInZip(). The the current entry
161 // state is reset automatically as needed.
162 bool OpenCurrentEntryInZip();
163
mortonmb4298b02017-08-04 07:57:41 -0700164 // Extracts |num_bytes_to_extract| bytes of the current entry to |delegate|,
165 // starting from the beginning of the entry. Return value specifies whether
166 // the entire file was extracted.
167 bool ExtractCurrentEntry(WriterDelegate* delegate,
168 uint64_t num_bytes_to_extract) const;
grtebc765a2015-03-18 14:22:34 -0700169
haven@chromium.org84ed2652014-01-17 00:36:28 +0000170 // Asynchronously extracts the current entry to the given output file path.
171 // If the current entry is a directory it just creates the directory
172 // synchronously instead. OpenCurrentEntryInZip() must be called beforehand.
173 // success_callback will be called on success and failure_callback will be
174 // called on failure. progress_callback will be called at least once.
175 // Callbacks will be posted to the current MessageLoop in-order.
176 void ExtractCurrentEntryToFilePathAsync(
177 const base::FilePath& output_file_path,
Sylvain Defresneddebad22019-10-01 15:40:56 +0000178 SuccessCallback success_callback,
179 FailureCallback failure_callback,
haven@chromium.org84ed2652014-01-17 00:36:28 +0000180 const ProgressCallback& progress_callback);
181
mortonmb4298b02017-08-04 07:57:41 -0700182 // Extracts the current entry into memory. If the current entry is a
183 // directory, the |output| parameter is set to the empty string. If the
184 // current entry is a file, the |output| parameter is filled with its
185 // contents. OpenCurrentEntryInZip() must be called beforehand. Note: the
186 // |output| parameter can be filled with a big amount of data, avoid passing
187 // it around by value, but by reference or pointer. Note: the value returned
188 // by EntryInfo::original_size() cannot be trusted, so the real size of the
189 // uncompressed contents can be different. |max_read_bytes| limits the ammount
190 // of memory used to carry the entry. Returns true if the entire content is
191 // read. If the entry is bigger than |max_read_bytes|, returns false and
192 // |output| is filled with |max_read_bytes| of data. If an error occurs,
193 // returns false, and |output| is set to the empty string.
194 bool ExtractCurrentEntryToString(uint64_t max_read_bytes,
195 std::string* output) const;
joaoe@opera.com00024292014-06-20 18:12:13 +0000196
alecflett@chromium.orgd6d082e2013-05-03 23:02:57 +0000197 // Returns the current entry info. Returns NULL if the current entry is
198 // not yet opened. OpenCurrentEntryInZip() must be called beforehand.
199 EntryInfo* current_entry_info() const {
200 return current_entry_info_.get();
201 }
202
203 // Returns the number of entries in the zip file.
204 // Open() must be called beforehand.
205 int num_entries() const { return num_entries_; }
206
207 private:
208 // Common code used both in Open and OpenFromFd.
209 bool OpenInternal();
210
211 // Resets the internal state.
212 void Reset();
213
haven@chromium.org84ed2652014-01-17 00:36:28 +0000214 // Extracts a chunk of the file to the target. Will post a task for the next
215 // chunk and success/failure/progress callbacks as necessary.
rvargas@chromium.org0d737652014-02-27 05:58:13 +0000216 void ExtractChunk(base::File target_file,
Sylvain Defresneddebad22019-10-01 15:40:56 +0000217 SuccessCallback success_callback,
218 FailureCallback failure_callback,
haven@chromium.org84ed2652014-01-17 00:36:28 +0000219 const ProgressCallback& progress_callback,
aviaa969482015-12-27 13:36:49 -0800220 const int64_t offset);
haven@chromium.org84ed2652014-01-17 00:36:28 +0000221
alecflett@chromium.orgd6d082e2013-05-03 23:02:57 +0000222 unzFile zip_file_;
223 int num_entries_;
224 bool reached_end_;
dchenga90aed52016-04-22 16:49:07 -0700225 std::unique_ptr<EntryInfo> current_entry_info_;
alecflett@chromium.orgd6d082e2013-05-03 23:02:57 +0000226
Jeremy Roman26105522019-08-12 15:45:27 +0000227 base::WeakPtrFactory<ZipReader> weak_ptr_factory_{this};
haven@chromium.org84ed2652014-01-17 00:36:28 +0000228
alecflett@chromium.orgd6d082e2013-05-03 23:02:57 +0000229 DISALLOW_COPY_AND_ASSIGN(ZipReader);
230};
231
grtebc765a2015-03-18 14:22:34 -0700232// A writer delegate that writes to a given File.
233class FileWriterDelegate : public WriterDelegate {
234 public:
Joshua Pawlickie31b5032018-02-06 20:24:51 +0000235 // Constructs a FileWriterDelegate that manipulates |file|. The delegate will
236 // not own |file|, therefore the caller must guarantee |file| will outlive the
237 // delegate.
grtebc765a2015-03-18 14:22:34 -0700238 explicit FileWriterDelegate(base::File* file);
239
Joshua Pawlickie31b5032018-02-06 20:24:51 +0000240 // Constructs a FileWriterDelegate that takes ownership of |file|.
241 explicit FileWriterDelegate(std::unique_ptr<base::File> file);
242
grtebc765a2015-03-18 14:22:34 -0700243 // Truncates the file to the number of bytes written.
244 ~FileWriterDelegate() override;
245
246 // WriterDelegate methods:
247
248 // Seeks to the beginning of the file, returning false if the seek fails.
249 bool PrepareOutput() override;
250
251 // Writes |num_bytes| bytes of |data| to the file, returning false on error or
252 // if not all bytes could be written.
253 bool WriteBytes(const char* data, int num_bytes) override;
254
Joshua Pawlickie31b5032018-02-06 20:24:51 +0000255 // Sets the last-modified time of the data.
256 void SetTimeModified(const base::Time& time) override;
257
Daniel Ruberybb293c32019-03-29 16:42:36 +0000258 // Return the actual size of the file.
259 int64_t file_length() { return file_length_; }
260
grtebc765a2015-03-18 14:22:34 -0700261 private:
Joshua Pawlickie31b5032018-02-06 20:24:51 +0000262 // The file the delegate modifies.
grtebc765a2015-03-18 14:22:34 -0700263 base::File* file_;
Joshua Pawlickie31b5032018-02-06 20:24:51 +0000264
265 // The delegate can optionally own the file it modifies, in which case
266 // owned_file_ is set and file_ is an alias for owned_file_.
267 std::unique_ptr<base::File> owned_file_;
268
269 int64_t file_length_ = 0;
grtebc765a2015-03-18 14:22:34 -0700270
271 DISALLOW_COPY_AND_ASSIGN(FileWriterDelegate);
272};
273
Joshua Pawlickie31b5032018-02-06 20:24:51 +0000274// A writer delegate that writes a file at a given path.
275class FilePathWriterDelegate : public WriterDelegate {
276 public:
277 explicit FilePathWriterDelegate(const base::FilePath& output_file_path);
278 ~FilePathWriterDelegate() override;
279
280 // WriterDelegate methods:
281
282 // Creates the output file and any necessary intermediate directories.
283 bool PrepareOutput() override;
284
285 // Writes |num_bytes| bytes of |data| to the file, returning false if not all
286 // bytes could be written.
287 bool WriteBytes(const char* data, int num_bytes) override;
288
289 // Sets the last-modified time of the data.
290 void SetTimeModified(const base::Time& time) override;
291
292 private:
293 base::FilePath output_file_path_;
294 base::File file_;
295
296 DISALLOW_COPY_AND_ASSIGN(FilePathWriterDelegate);
297};
298
alecflett@chromium.orgd6d082e2013-05-03 23:02:57 +0000299} // namespace zip
300
301#endif // THIRD_PARTY_ZLIB_GOOGLE_ZIP_READER_H_