1// Copyright (c) 2011 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4#ifndef THIRD_PARTY_ZLIB_GOOGLE_ZIP_READER_H_
5#define THIRD_PARTY_ZLIB_GOOGLE_ZIP_READER_H_
6
7#include <stddef.h>
8#include <stdint.h>
9
10#include <memory>
11#include <string>
12
13#include "base/callback.h"
14#include "base/files/file.h"
15#include "base/files/file_path.h"
16#include "base/files/file_util.h"
17#include "base/macros.h"
18#include "base/memory/weak_ptr.h"
19#include "base/time/time.h"
20
21#if defined(USE_SYSTEM_MINIZIP)
22#include <minizip/unzip.h>
23#else
24#include "third_party/zlib/contrib/minizip/unzip.h"
25#endif
26
27namespace zip {
28
29// A delegate interface used to stream out an entry; see
30// ZipReader::ExtractCurrentEntry.
31class WriterDelegate {
32 public:
33  virtual ~WriterDelegate() {}
34
35  // Invoked once before any data is streamed out to pave the way (e.g., to open
36  // the output file). Return false on failure to cancel extraction.
37  virtual bool PrepareOutput() = 0;
38
39  // Invoked to write the next chunk of data. Return false on failure to cancel
40  // extraction.
41  virtual bool WriteBytes(const char* data, int num_bytes) = 0;
42
43  // Sets the last-modified time of the data.
44  virtual void SetTimeModified(const base::Time& time) = 0;
45};
46
47// This class is used for reading zip files. A typical use case of this
48// class is to scan entries in a zip file and extract them. The code will
49// look like:
50//
51//   ZipReader reader;
52//   reader.Open(zip_file_path);
53//   while (reader.HasMore()) {
54//     reader.OpenCurrentEntryInZip();
55//     const base::FilePath& entry_path =
56//        reader.current_entry_info()->file_path();
57//     auto writer = CreateFilePathWriterDelegate(extract_dir, entry_path);
58//     reader.ExtractCurrentEntry(writer, std::numeric_limits<uint64_t>::max());
59//     reader.AdvanceToNextEntry();
60//   }
61//
62// For simplicity, error checking is omitted in the example code above. The
63// production code should check return values from all of these functions.
64//
65class ZipReader {
66 public:
67  // A callback that is called when the operation is successful.
68  using SuccessCallback = base::OnceClosure;
69  // A callback that is called when the operation fails.
70  using FailureCallback = base::OnceClosure;
71  // A callback that is called periodically during the operation with the number
72  // of bytes that have been processed so far.
73  using ProgressCallback = base::RepeatingCallback<void(int64_t)>;
74
75  // This class represents information of an entry (file or directory) in
76  // a zip file.
77  class EntryInfo {
78   public:
79    EntryInfo(const std::string& filename_in_zip,
80              const unz_file_info& raw_file_info);
81
82    // Returns the file path. The path is usually relative like
83    // "foo/bar.txt", but if it's absolute, is_unsafe() returns true.
84    const base::FilePath& file_path() const { return file_path_; }
85
86    // Returns the size of the original file (i.e. after uncompressed).
87    // Returns 0 if the entry is a directory.
88    // Note: this value should not be trusted, because it is stored as metadata
89    // in the zip archive and can be different from the real uncompressed size.
90    int64_t original_size() const { return original_size_; }
91
92    // Returns the last modified time. If the time stored in the zip file was
93    // not valid, the unix epoch will be returned.
94    //
95    // The time stored in the zip archive uses the MS-DOS date and time format.
96    // http://msdn.microsoft.com/en-us/library/ms724247(v=vs.85).aspx
97    // As such the following limitations apply:
98    // * only years from 1980 to 2107 can be represented.
99    // * the time stamp has a 2 second resolution.
100    // * there's no timezone information, so the time is interpreted as local.
101    base::Time last_modified() const { return last_modified_; }
102
103    // Returns true if the entry is a directory.
104    bool is_directory() const { return is_directory_; }
105
106    // Returns true if the entry is unsafe, like having ".." or invalid
107    // UTF-8 characters in its file name, or the file path is absolute.
108    bool is_unsafe() const { return is_unsafe_; }
109
110    // Returns true if the entry is encrypted.
111    bool is_encrypted() const { return is_encrypted_; }
112
113   private:
114    const base::FilePath file_path_;
115    int64_t original_size_;
116    base::Time last_modified_;
117    bool is_directory_;
118    bool is_unsafe_;
119    bool is_encrypted_;
120    DISALLOW_COPY_AND_ASSIGN(EntryInfo);
121  };
122
123  ZipReader();
124  ~ZipReader();
125
126  // Opens the zip file specified by |zip_file_path|. Returns true on
127  // success.
128  bool Open(const base::FilePath& zip_file_path);
129
130  // Opens the zip file referred to by the platform file |zip_fd|, without
131  // taking ownership of |zip_fd|. Returns true on success.
132  bool OpenFromPlatformFile(base::PlatformFile zip_fd);
133
134  // Opens the zip data stored in |data|. This class uses a weak reference to
135  // the given sring while extracting files, i.e. the caller should keep the
136  // string until it finishes extracting files.
137  bool OpenFromString(const std::string& data);
138
139  // Closes the currently opened zip file. This function is called in the
140  // destructor of the class, so you usually don't need to call this.
141  void Close();
142
143  // Returns true if there is at least one entry to read. This function is
144  // used to scan entries with AdvanceToNextEntry(), like:
145  //
146  // while (reader.HasMore()) {
147  //   // Do something with the current file here.
148  //   reader.AdvanceToNextEntry();
149  // }
150  bool HasMore();
151
152  // Advances the next entry. Returns true on success.
153  bool AdvanceToNextEntry();
154
155  // Opens the current entry in the zip file. On success, returns true and
156  // updates the the current entry state (i.e. current_entry_info() is
157  // updated). This function should be called before operations over the
158  // current entry like ExtractCurrentEntryToFile().
159  //
160  // Note that there is no CloseCurrentEntryInZip(). The the current entry
161  // state is reset automatically as needed.
162  bool OpenCurrentEntryInZip();
163
164  // Extracts |num_bytes_to_extract| bytes of the current entry to |delegate|,
165  // starting from the beginning of the entry. Return value specifies whether
166  // the entire file was extracted.
167  bool ExtractCurrentEntry(WriterDelegate* delegate,
168                           uint64_t num_bytes_to_extract) const;
169
170  // Asynchronously extracts the current entry to the given output file path.
171  // If the current entry is a directory it just creates the directory
172  // synchronously instead.  OpenCurrentEntryInZip() must be called beforehand.
173  // success_callback will be called on success and failure_callback will be
174  // called on failure.  progress_callback will be called at least once.
175  // Callbacks will be posted to the current MessageLoop in-order.
176  void ExtractCurrentEntryToFilePathAsync(
177      const base::FilePath& output_file_path,
178      SuccessCallback success_callback,
179      FailureCallback failure_callback,
180      const ProgressCallback& progress_callback);
181
182  // Extracts the current entry into memory. If the current entry is a
183  // directory, the |output| parameter is set to the empty string. If the
184  // current entry is a file, the |output| parameter is filled with its
185  // contents. OpenCurrentEntryInZip() must be called beforehand. Note: the
186  // |output| parameter can be filled with a big amount of data, avoid passing
187  // it around by value, but by reference or pointer. Note: the value returned
188  // by EntryInfo::original_size() cannot be trusted, so the real size of the
189  // uncompressed contents can be different. |max_read_bytes| limits the ammount
190  // of memory used to carry the entry. Returns true if the entire content is
191  // read. If the entry is bigger than |max_read_bytes|, returns false and
192  // |output| is filled with |max_read_bytes| of data. If an error occurs,
193  // returns false, and |output| is set to the empty string.
194  bool ExtractCurrentEntryToString(uint64_t max_read_bytes,
195                                   std::string* output) const;
196
197  // Returns the current entry info. Returns NULL if the current entry is
198  // not yet opened. OpenCurrentEntryInZip() must be called beforehand.
199  EntryInfo* current_entry_info() const {
200    return current_entry_info_.get();
201  }
202
203  // Returns the number of entries in the zip file.
204  // Open() must be called beforehand.
205  int num_entries() const { return num_entries_; }
206
207 private:
208  // Common code used both in Open and OpenFromFd.
209  bool OpenInternal();
210
211  // Resets the internal state.
212  void Reset();
213
214  // Extracts a chunk of the file to the target.  Will post a task for the next
215  // chunk and success/failure/progress callbacks as necessary.
216  void ExtractChunk(base::File target_file,
217                    SuccessCallback success_callback,
218                    FailureCallback failure_callback,
219                    const ProgressCallback& progress_callback,
220                    const int64_t offset);
221
222  unzFile zip_file_;
223  int num_entries_;
224  bool reached_end_;
225  std::unique_ptr<EntryInfo> current_entry_info_;
226
227  base::WeakPtrFactory<ZipReader> weak_ptr_factory_{this};
228
229  DISALLOW_COPY_AND_ASSIGN(ZipReader);
230};
231
232// A writer delegate that writes to a given File.
233class FileWriterDelegate : public WriterDelegate {
234 public:
235  // Constructs a FileWriterDelegate that manipulates |file|. The delegate will
236  // not own |file|, therefore the caller must guarantee |file| will outlive the
237  // delegate.
238  explicit FileWriterDelegate(base::File* file);
239
240  // Constructs a FileWriterDelegate that takes ownership of |file|.
241  explicit FileWriterDelegate(std::unique_ptr<base::File> file);
242
243  // Truncates the file to the number of bytes written.
244  ~FileWriterDelegate() override;
245
246  // WriterDelegate methods:
247
248  // Seeks to the beginning of the file, returning false if the seek fails.
249  bool PrepareOutput() override;
250
251  // Writes |num_bytes| bytes of |data| to the file, returning false on error or
252  // if not all bytes could be written.
253  bool WriteBytes(const char* data, int num_bytes) override;
254
255  // Sets the last-modified time of the data.
256  void SetTimeModified(const base::Time& time) override;
257
258  // Return the actual size of the file.
259  int64_t file_length() { return file_length_; }
260
261 private:
262  // The file the delegate modifies.
263  base::File* file_;
264
265  // The delegate can optionally own the file it modifies, in which case
266  // owned_file_ is set and file_ is an alias for owned_file_.
267  std::unique_ptr<base::File> owned_file_;
268
269  int64_t file_length_ = 0;
270
271  DISALLOW_COPY_AND_ASSIGN(FileWriterDelegate);
272};
273
274// A writer delegate that writes a file at a given path.
275class FilePathWriterDelegate : public WriterDelegate {
276 public:
277  explicit FilePathWriterDelegate(const base::FilePath& output_file_path);
278  ~FilePathWriterDelegate() override;
279
280  // WriterDelegate methods:
281
282  // Creates the output file and any necessary intermediate directories.
283  bool PrepareOutput() override;
284
285  // Writes |num_bytes| bytes of |data| to the file, returning false if not all
286  // bytes could be written.
287  bool WriteBytes(const char* data, int num_bytes) override;
288
289  // Sets the last-modified time of the data.
290  void SetTimeModified(const base::Time& time) override;
291
292 private:
293  base::FilePath output_file_path_;
294  base::File file_;
295
296  DISALLOW_COPY_AND_ASSIGN(FilePathWriterDelegate);
297};
298
299}  // namespace zip
300
301#endif  // THIRD_PARTY_ZLIB_GOOGLE_ZIP_READER_H_
302