Merge "libziparchive: Add ability to backup in ZipWriter" am: 0e19795a62
am: a41597ba0e

Change-Id: Iaab0c1908c7a936462ade0157de9e21434dbbce3
diff --git a/include/ziparchive/zip_writer.h b/include/ziparchive/zip_writer.h
index 0b6ede4..41ca2e1 100644
--- a/include/ziparchive/zip_writer.h
+++ b/include/ziparchive/zip_writer.h
@@ -17,15 +17,16 @@
 #ifndef LIBZIPARCHIVE_ZIPWRITER_H_
 #define LIBZIPARCHIVE_ZIPWRITER_H_
 
-#include "android-base/macros.h"
-#include <utils/Compat.h>
-
 #include <cstdio>
 #include <ctime>
+#include <zlib.h>
+
 #include <memory>
 #include <string>
 #include <vector>
-#include <zlib.h>
+
+#include "android-base/macros.h"
+#include "utils/Compat.h"
 
 /**
  * Writes a Zip file via a stateful interface.
@@ -63,6 +64,20 @@
     kAlign32 = 0x02,
   };
 
+  /**
+   * A struct representing a zip file entry.
+   */
+  struct FileEntry {
+    std::string path;
+    uint16_t compression_method;
+    uint32_t crc32;
+    uint32_t compressed_size;
+    uint32_t uncompressed_size;
+    uint16_t last_mod_time;
+    uint16_t last_mod_date;
+    uint32_t local_file_header_offset;
+  };
+
   static const char* ErrorCodeString(int32_t error_code);
 
   /**
@@ -122,6 +137,19 @@
   int32_t FinishEntry();
 
   /**
+   * Discards the last-written entry. Can only be called after an entry has been written using
+   * FinishEntry().
+   * Returns 0 on success, and an error value < 0 on failure.
+   */
+  int32_t DiscardLastEntry();
+
+  /**
+   * Sets `out_entry` to the last entry written after a call to FinishEntry().
+   * Returns 0 on success, and an error value < 0 if no entries have been written.
+   */
+  int32_t GetLastEntry(FileEntry* out_entry);
+
+  /**
    * Writes the Central Directory Headers and flushes the zip file stream.
    * Returns 0 on success, and an error value < 0 on failure.
    */
@@ -130,22 +158,11 @@
 private:
   DISALLOW_COPY_AND_ASSIGN(ZipWriter);
 
-  struct FileInfo {
-    std::string path;
-    uint16_t compression_method;
-    uint32_t crc32;
-    uint32_t compressed_size;
-    uint32_t uncompressed_size;
-    uint16_t last_mod_time;
-    uint16_t last_mod_date;
-    uint32_t local_file_header_offset;
-  };
-
   int32_t HandleError(int32_t error_code);
   int32_t PrepareDeflate();
-  int32_t StoreBytes(FileInfo* file, const void* data, size_t len);
-  int32_t CompressBytes(FileInfo* file, const void* data, size_t len);
-  int32_t FlushCompressedBytes(FileInfo* file);
+  int32_t StoreBytes(FileEntry* file, const void* data, size_t len);
+  int32_t CompressBytes(FileEntry* file, const void* data, size_t len);
+  int32_t FlushCompressedBytes(FileEntry* file);
 
   enum class State {
     kWritingZip,
@@ -157,7 +174,8 @@
   FILE* file_;
   off64_t current_offset_;
   State state_;
-  std::vector<FileInfo> files_;
+  std::vector<FileEntry> files_;
+  FileEntry current_file_entry_;
 
   std::unique_ptr<z_stream, void(*)(z_stream*)> z_stream_;
   std::vector<uint8_t> buffer_;
diff --git a/libziparchive/zip_writer.cc b/libziparchive/zip_writer.cc
index b72ed7f..ddd4dd5 100644
--- a/libziparchive/zip_writer.cc
+++ b/libziparchive/zip_writer.cc
@@ -14,21 +14,22 @@
  * limitations under the License.
  */
 
-#include "entry_name_utils-inl.h"
-#include "zip_archive_common.h"
 #include "ziparchive/zip_writer.h"
 
-#include <utils/Log.h>
-
-#include <sys/param.h>
-
-#include <cassert>
 #include <cstdio>
-#include <memory>
-#include <vector>
+#include <sys/param.h>
 #include <zlib.h>
 #define DEF_MEM_LEVEL 8                // normally in zutil.h?
 
+#include <memory>
+#include <vector>
+
+#include "android-base/logging.h"
+#include "utils/Log.h"
+
+#include "entry_name_utils-inl.h"
+#include "zip_archive_common.h"
+
 #if !defined(powerof2)
 #define powerof2(x) ((((x)-1)&(x))==0)
 #endif
@@ -171,12 +172,12 @@
     return kInvalidAlignment;
   }
 
-  FileInfo fileInfo = {};
-  fileInfo.path = std::string(path);
-  fileInfo.local_file_header_offset = current_offset_;
+  current_file_entry_ = {};
+  current_file_entry_.path = path;
+  current_file_entry_.local_file_header_offset = current_offset_;
 
-  if (!IsValidEntryName(reinterpret_cast<const uint8_t*>(fileInfo.path.data()),
-                       fileInfo.path.size())) {
+  if (!IsValidEntryName(reinterpret_cast<const uint8_t*>(current_file_entry_.path.data()),
+                        current_file_entry_.path.size())) {
     return kInvalidEntryName;
   }
 
@@ -188,24 +189,24 @@
   header.gpb_flags |= kGPBDDFlagMask;
 
   if (flags & ZipWriter::kCompress) {
-    fileInfo.compression_method = kCompressDeflated;
+    current_file_entry_.compression_method = kCompressDeflated;
 
     int32_t result = PrepareDeflate();
     if (result != kNoError) {
       return result;
     }
   } else {
-    fileInfo.compression_method = kCompressStored;
+    current_file_entry_.compression_method = kCompressStored;
   }
-  header.compression_method = fileInfo.compression_method;
+  header.compression_method = current_file_entry_.compression_method;
 
-  ExtractTimeAndDate(time, &fileInfo.last_mod_time, &fileInfo.last_mod_date);
-  header.last_mod_time = fileInfo.last_mod_time;
-  header.last_mod_date = fileInfo.last_mod_date;
+  ExtractTimeAndDate(time, &current_file_entry_.last_mod_time, &current_file_entry_.last_mod_date);
+  header.last_mod_time = current_file_entry_.last_mod_time;
+  header.last_mod_date = current_file_entry_.last_mod_date;
 
-  header.file_name_length = fileInfo.path.size();
+  header.file_name_length = current_file_entry_.path.size();
 
-  off64_t offset = current_offset_ + sizeof(header) + fileInfo.path.size();
+  off64_t offset = current_offset_ + sizeof(header) + current_file_entry_.path.size();
   std::vector<char> zero_padding;
   if (alignment != 0 && (offset & (alignment - 1))) {
     // Pad the extra field so the data will be aligned.
@@ -220,7 +221,8 @@
     return HandleError(kIoError);
   }
 
-  if (fwrite(path, sizeof(*path), fileInfo.path.size(), file_) != fileInfo.path.size()) {
+  if (fwrite(path, sizeof(*path), current_file_entry_.path.size(), file_)
+      != current_file_entry_.path.size()) {
     return HandleError(kIoError);
   }
 
@@ -230,15 +232,37 @@
     return HandleError(kIoError);
   }
 
-  files_.emplace_back(std::move(fileInfo));
-
   current_offset_ = offset;
   state_ = State::kWritingEntry;
   return kNoError;
 }
 
+int32_t ZipWriter::DiscardLastEntry() {
+  if (state_ != State::kWritingZip || files_.empty()) {
+    return kInvalidState;
+  }
+
+  FileEntry& last_entry = files_.back();
+  current_offset_ = last_entry.local_file_header_offset;
+  if (fseeko(file_, current_offset_, SEEK_SET) != 0) {
+    return HandleError(kIoError);
+  }
+  files_.pop_back();
+  return kNoError;
+}
+
+int32_t ZipWriter::GetLastEntry(FileEntry* out_entry) {
+  CHECK(out_entry != nullptr);
+
+  if (files_.empty()) {
+    return kInvalidState;
+  }
+  *out_entry = files_.back();
+  return kNoError;
+}
+
 int32_t ZipWriter::PrepareDeflate() {
-  assert(state_ == State::kWritingZip);
+  CHECK(state_ == State::kWritingZip);
 
   // Initialize the z_stream for compression.
   z_stream_ = std::unique_ptr<z_stream, void(*)(z_stream*)>(new z_stream(), DeleteZStream);
@@ -269,25 +293,25 @@
     return HandleError(kInvalidState);
   }
 
-  FileInfo& currentFile = files_.back();
   int32_t result = kNoError;
-  if (currentFile.compression_method & kCompressDeflated) {
-    result = CompressBytes(&currentFile, data, len);
+  if (current_file_entry_.compression_method & kCompressDeflated) {
+    result = CompressBytes(&current_file_entry_, data, len);
   } else {
-    result = StoreBytes(&currentFile, data, len);
+    result = StoreBytes(&current_file_entry_, data, len);
   }
 
   if (result != kNoError) {
     return result;
   }
 
-  currentFile.crc32 = crc32(currentFile.crc32, reinterpret_cast<const Bytef*>(data), len);
-  currentFile.uncompressed_size += len;
+  current_file_entry_.crc32 = crc32(current_file_entry_.crc32,
+                                    reinterpret_cast<const Bytef*>(data), len);
+  current_file_entry_.uncompressed_size += len;
   return kNoError;
 }
 
-int32_t ZipWriter::StoreBytes(FileInfo* file, const void* data, size_t len) {
-  assert(state_ == State::kWritingEntry);
+int32_t ZipWriter::StoreBytes(FileEntry* file, const void* data, size_t len) {
+  CHECK(state_ == State::kWritingEntry);
 
   if (fwrite(data, 1, len, file_) != len) {
     return HandleError(kIoError);
@@ -297,11 +321,11 @@
   return kNoError;
 }
 
-int32_t ZipWriter::CompressBytes(FileInfo* file, const void* data, size_t len) {
-  assert(state_ == State::kWritingEntry);
-  assert(z_stream_);
-  assert(z_stream_->next_out != nullptr);
-  assert(z_stream_->avail_out != 0);
+int32_t ZipWriter::CompressBytes(FileEntry* file, const void* data, size_t len) {
+  CHECK(state_ == State::kWritingEntry);
+  CHECK(z_stream_);
+  CHECK(z_stream_->next_out != nullptr);
+  CHECK(z_stream_->avail_out != 0);
 
   // Prepare the input.
   z_stream_->next_in = reinterpret_cast<const uint8_t*>(data);
@@ -331,17 +355,17 @@
   return kNoError;
 }
 
-int32_t ZipWriter::FlushCompressedBytes(FileInfo* file) {
-  assert(state_ == State::kWritingEntry);
-  assert(z_stream_);
-  assert(z_stream_->next_out != nullptr);
-  assert(z_stream_->avail_out != 0);
+int32_t ZipWriter::FlushCompressedBytes(FileEntry* file) {
+  CHECK(state_ == State::kWritingEntry);
+  CHECK(z_stream_);
+  CHECK(z_stream_->next_out != nullptr);
+  CHECK(z_stream_->avail_out != 0);
 
   // Keep deflating while there isn't enough space in the buffer to
   // to complete the compress.
   int zerr;
   while ((zerr = deflate(z_stream_.get(), Z_FINISH)) == Z_OK) {
-    assert(z_stream_->avail_out == 0);
+    CHECK(z_stream_->avail_out == 0);
     size_t write_bytes = z_stream_->next_out - buffer_.data();
     if (fwrite(buffer_.data(), 1, write_bytes, file_) != write_bytes) {
       return HandleError(kIoError);
@@ -373,9 +397,8 @@
     return kInvalidState;
   }
 
-  FileInfo& currentFile = files_.back();
-  if (currentFile.compression_method & kCompressDeflated) {
-    int32_t result = FlushCompressedBytes(&currentFile);
+  if (current_file_entry_.compression_method & kCompressDeflated) {
+    int32_t result = FlushCompressedBytes(&current_file_entry_);
     if (result != kNoError) {
       return result;
     }
@@ -388,13 +411,15 @@
   }
 
   DataDescriptor dd = {};
-  dd.crc32 = currentFile.crc32;
-  dd.compressed_size = currentFile.compressed_size;
-  dd.uncompressed_size = currentFile.uncompressed_size;
+  dd.crc32 = current_file_entry_.crc32;
+  dd.compressed_size = current_file_entry_.compressed_size;
+  dd.uncompressed_size = current_file_entry_.uncompressed_size;
   if (fwrite(&dd, sizeof(dd), 1, file_) != 1) {
     return HandleError(kIoError);
   }
 
+  files_.emplace_back(std::move(current_file_entry_));
+
   current_offset_ += sizeof(DataDescriptor::kOptSignature) + sizeof(dd);
   state_ = State::kWritingZip;
   return kNoError;
@@ -406,7 +431,7 @@
   }
 
   off64_t startOfCdr = current_offset_;
-  for (FileInfo& file : files_) {
+  for (FileEntry& file : files_) {
     CentralDirectoryRecord cdr = {};
     cdr.record_signature = CentralDirectoryRecord::kSignature;
     cdr.gpb_flags |= kGPBDDFlagMask;
@@ -442,11 +467,19 @@
     return HandleError(kIoError);
   }
 
+  current_offset_ += sizeof(er);
+
+  // Since we can BackUp() and potentially finish writing at an offset less than one we had
+  // already written at, we must truncate the file.
+
+  if (ftruncate64(fileno(file_), current_offset_) != 0) {
+    return HandleError(kIoError);
+  }
+
   if (fflush(file_) != 0) {
     return HandleError(kIoError);
   }
 
-  current_offset_ += sizeof(er);
   state_ = State::kDone;
   return kNoError;
 }
diff --git a/libziparchive/zip_writer_test.cc b/libziparchive/zip_writer_test.cc
index 16a574d..259bcff 100644
--- a/libziparchive/zip_writer_test.cc
+++ b/libziparchive/zip_writer_test.cc
@@ -23,6 +23,10 @@
 #include <memory>
 #include <vector>
 
+static ::testing::AssertionResult AssertFileEntryContentsEq(const std::string& expected,
+                                                            ZipArchiveHandle handle,
+                                                            ZipEntry* zip_entry);
+
 struct zipwriter : public ::testing::Test {
   TemporaryFile* temp_file_;
   int fd_;
@@ -59,16 +63,10 @@
 
   ZipEntry data;
   ASSERT_EQ(0, FindEntry(handle, ZipString("file.txt"), &data));
-  EXPECT_EQ(strlen(expected), data.compressed_length);
-  EXPECT_EQ(strlen(expected), data.uncompressed_length);
   EXPECT_EQ(kCompressStored, data.method);
-
-  char buffer[6];
-  EXPECT_EQ(0,
-            ExtractToMemory(handle, &data, reinterpret_cast<uint8_t*>(&buffer), sizeof(buffer)));
-  buffer[5] = 0;
-
-  EXPECT_STREQ(expected, buffer);
+  EXPECT_EQ(strlen(expected), data.compressed_length);
+  ASSERT_EQ(strlen(expected), data.uncompressed_length);
+  ASSERT_TRUE(AssertFileEntryContentsEq(expected, handle, &data));
 
   CloseArchive(handle);
 }
@@ -94,26 +92,19 @@
   ZipArchiveHandle handle;
   ASSERT_EQ(0, OpenArchiveFd(fd_, "temp", &handle, false));
 
-  char buffer[4];
   ZipEntry data;
 
   ASSERT_EQ(0, FindEntry(handle, ZipString("file.txt"), &data));
   EXPECT_EQ(kCompressStored, data.method);
   EXPECT_EQ(2u, data.compressed_length);
-  EXPECT_EQ(2u, data.uncompressed_length);
-  ASSERT_EQ(0,
-            ExtractToMemory(handle, &data, reinterpret_cast<uint8_t*>(buffer), arraysize(buffer)));
-  buffer[2] = 0;
-  EXPECT_STREQ("he", buffer);
+  ASSERT_EQ(2u, data.uncompressed_length);
+  ASSERT_TRUE(AssertFileEntryContentsEq("he", handle, &data));
 
   ASSERT_EQ(0, FindEntry(handle, ZipString("file/file.txt"), &data));
   EXPECT_EQ(kCompressStored, data.method);
   EXPECT_EQ(3u, data.compressed_length);
-  EXPECT_EQ(3u, data.uncompressed_length);
-  ASSERT_EQ(0,
-            ExtractToMemory(handle, &data, reinterpret_cast<uint8_t*>(buffer), arraysize(buffer)));
-  buffer[3] = 0;
-  EXPECT_STREQ("llo", buffer);
+  ASSERT_EQ(3u, data.uncompressed_length);
+  ASSERT_TRUE(AssertFileEntryContentsEq("llo", handle, &data));
 
   ASSERT_EQ(0, FindEntry(handle, ZipString("file/file2.txt"), &data));
   EXPECT_EQ(kCompressStored, data.method);
@@ -143,7 +134,7 @@
   CloseArchive(handle);
 }
 
-void ConvertZipTimeToTm(uint32_t& zip_time, struct tm* tm) {
+static void ConvertZipTimeToTm(uint32_t& zip_time, struct tm* tm) {
   memset(tm, 0, sizeof(struct tm));
   tm->tm_hour = (zip_time >> 11) & 0x1f;
   tm->tm_min = (zip_time >> 5) & 0x3f;
@@ -264,14 +255,8 @@
   ZipEntry data;
   ASSERT_EQ(0, FindEntry(handle, ZipString("file.txt"), &data));
   EXPECT_EQ(kCompressDeflated, data.method);
-  EXPECT_EQ(4u, data.uncompressed_length);
-
-  char buffer[5];
-  ASSERT_EQ(0,
-            ExtractToMemory(handle, &data, reinterpret_cast<uint8_t*>(buffer), arraysize(buffer)));
-  buffer[4] = 0;
-
-  EXPECT_STREQ("helo", buffer);
+  ASSERT_EQ(4u, data.uncompressed_length);
+  ASSERT_TRUE(AssertFileEntryContentsEq("helo", handle, &data));
 
   CloseArchive(handle);
 }
@@ -319,3 +304,111 @@
   ASSERT_EQ(-5, writer.StartAlignedEntry("align.txt", ZipWriter::kAlign32, 4096));
   ASSERT_EQ(-6, writer.StartAlignedEntry("align.txt", 0, 3));
 }
+
+TEST_F(zipwriter, BackupRemovesTheLastFile) {
+  ZipWriter writer(file_);
+
+  const char* kKeepThis = "keep this";
+  const char* kDropThis = "drop this";
+  const char* kReplaceWithThis = "replace with this";
+
+  ZipWriter::FileEntry entry;
+  EXPECT_LT(writer.GetLastEntry(&entry), 0);
+
+  ASSERT_EQ(0, writer.StartEntry("keep.txt", 0));
+  ASSERT_EQ(0, writer.WriteBytes(kKeepThis, strlen(kKeepThis)));
+  ASSERT_EQ(0, writer.FinishEntry());
+
+  ASSERT_EQ(0, writer.GetLastEntry(&entry));
+  EXPECT_EQ("keep.txt", entry.path);
+
+  ASSERT_EQ(0, writer.StartEntry("drop.txt", 0));
+  ASSERT_EQ(0, writer.WriteBytes(kDropThis, strlen(kDropThis)));
+  ASSERT_EQ(0, writer.FinishEntry());
+
+  ASSERT_EQ(0, writer.GetLastEntry(&entry));
+  EXPECT_EQ("drop.txt", entry.path);
+
+  ASSERT_EQ(0, writer.DiscardLastEntry());
+
+  ASSERT_EQ(0, writer.GetLastEntry(&entry));
+  EXPECT_EQ("keep.txt", entry.path);
+
+  ASSERT_EQ(0, writer.StartEntry("replace.txt", 0));
+  ASSERT_EQ(0, writer.WriteBytes(kReplaceWithThis, strlen(kReplaceWithThis)));
+  ASSERT_EQ(0, writer.FinishEntry());
+
+  ASSERT_EQ(0, writer.GetLastEntry(&entry));
+  EXPECT_EQ("replace.txt", entry.path);
+
+  ASSERT_EQ(0, writer.Finish());
+
+  // Verify that "drop.txt" does not exist.
+
+  ASSERT_GE(0, lseek(fd_, 0, SEEK_SET));
+
+  ZipArchiveHandle handle;
+  ASSERT_EQ(0, OpenArchiveFd(fd_, "temp", &handle, false));
+
+  ZipEntry data;
+  ASSERT_EQ(0, FindEntry(handle, ZipString("keep.txt"), &data));
+  ASSERT_TRUE(AssertFileEntryContentsEq(kKeepThis, handle, &data));
+
+  ASSERT_NE(0, FindEntry(handle, ZipString("drop.txt"), &data));
+
+  ASSERT_EQ(0, FindEntry(handle, ZipString("replace.txt"), &data));
+  ASSERT_TRUE(AssertFileEntryContentsEq(kReplaceWithThis, handle, &data));
+
+  CloseArchive(handle);
+}
+
+TEST_F(zipwriter, TruncateFileAfterBackup) {
+  ZipWriter writer(file_);
+
+  const char* kSmall = "small";
+
+  ASSERT_EQ(0, writer.StartEntry("small.txt", 0));
+  ASSERT_EQ(0, writer.WriteBytes(kSmall, strlen(kSmall)));
+  ASSERT_EQ(0, writer.FinishEntry());
+
+  ASSERT_EQ(0, writer.StartEntry("large.txt", 0));
+  std::vector<uint8_t> data;
+  data.resize(1024*1024, 0xef);
+  ASSERT_EQ(0, writer.WriteBytes(data.data(), data.size()));
+  ASSERT_EQ(0, writer.FinishEntry());
+
+  off64_t before_len = ftello64(file_);
+
+  ZipWriter::FileEntry entry;
+  ASSERT_EQ(0, writer.GetLastEntry(&entry));
+  ASSERT_EQ(0, writer.DiscardLastEntry());
+
+  ASSERT_EQ(0, writer.Finish());
+
+  off64_t after_len = ftello64(file_);
+
+  ASSERT_GT(before_len, after_len);
+}
+
+static ::testing::AssertionResult AssertFileEntryContentsEq(const std::string& expected,
+                                                            ZipArchiveHandle handle,
+                                                            ZipEntry* zip_entry) {
+  if (expected.size() != zip_entry->uncompressed_length) {
+    return ::testing::AssertionFailure() << "uncompressed entry size "
+        << zip_entry->uncompressed_length << " does not match expected size " << expected.size();
+  }
+
+  std::string actual;
+  actual.resize(expected.size());
+
+  uint8_t* buffer = reinterpret_cast<uint8_t*>(&*actual.begin());
+  if (ExtractToMemory(handle, zip_entry, buffer, actual.size()) != 0) {
+    return ::testing::AssertionFailure() << "failed to extract entry";
+  }
+
+  if (expected != actual) {
+    return ::testing::AssertionFailure() << "actual zip_entry data '" << actual
+        << "' does not match expected '" << expected << "'";
+  }
+  return ::testing::AssertionSuccess();
+}