Reimplement ZipFileRO in terms of libziparchive. This lets us share zip archive processing code with both the runtime (Art, dalvik) and critical java code (StrictJarFile). This change also moves several utility methods to ZipUtils and dedups code across several zip inflation methods. One of the side effects of this change is that several processing loops are now O(n) instead of O(n^2). bug: 10193060 Change-Id: I3c7188496837a47246c4f342e45485a70fef3169

commit: afd31e08299008fdc5c2813f21b2573f29dc53df [log] [tgz]
author: Narayan Kamath <narayan@google.com> Tue Dec 03 13:16:03 2013 +0000
committer: Narayan Kamath <narayan@google.com> Mon Dec 09 16:23:16 2013 +0000
tree: 7f83dd8f1e7ed71d4fe8ce280841e43ce275ac6d
parent: 6e2d0c1d91f644ab50e0c0b7cae4306262a4ca41 [diff] [blame]
diff --git a/include/androidfw/ZipFileRO.h b/include/androidfw/ZipFileRO.h
index 547e36a..ad5be12 100644
--- a/include/androidfw/ZipFileRO.h
+++ b/include/androidfw/ZipFileRO.h

@@ -40,6 +40,8 @@
 #include <unistd.h>
 #include <time.h>
 
+typedef void* ZipArchiveHandle;
+
 namespace android {
 
 /*
@@ -51,18 +53,13 @@
 /*
  * Open a Zip archive for reading.
  *
- * We want "open" and "find entry by name" to be fast operations, and we
- * want to use as little memory as possible.  We memory-map the file,
- * and load a hash table with pointers to the filenames (which aren't
- * null-terminated).  The other fields are at a fixed offset from the
- * filename, so we don't need to extract those (but we do need to byte-read
- * and endian-swap them every time we want them).
+ * Implemented as a thin wrapper over system/core/libziparchive.
  *
- * To speed comparisons when doing a lookup by name, we could make the mapping
- * "private" (copy-on-write) and null-terminate the filenames after verifying
- * the record structure.  However, this requires a private mapping of
- * every page that the Central Directory touches.  Easier to tuck a copy
- * of the string length into the hash table entry.
+ * "open" and "find entry by name" are fast operations and use as little
+ * memory as possible.
+ *
+ * We also support fast iteration over all entries in the file (with a
+ * stable, but unspecified iteration order).
  *
  * NOTE: If this is used on file descriptors inherited from a fork() operation,
  * you must be on a platform that implements pread() to guarantee correctness
@@ -70,48 +67,44 @@
  */
 class ZipFileRO {
 public:
-    ZipFileRO()
-        : mFd(-1), mFileName(NULL), mFileLength(-1),
-          mDirectoryMap(NULL),
-          mNumEntries(-1), mDirectoryOffset(-1),
-          mHashTableSize(-1), mHashTable(NULL)
-        {}
-
-    ~ZipFileRO();
+    /* Zip compression methods we support */
+    enum {
+        kCompressStored     = 0,        // no compression
+        kCompressDeflated   = 8,        // standard deflate
+    };
 
     /*
      * Open an archive.
      */
-    status_t open(const char* zipFileName);
+    static ZipFileRO* open(const char* zipFileName);
 
     /*
      * Find an entry, by name.  Returns the entry identifier, or NULL if
      * not found.
-     *
-     * If two entries have the same name, one will be chosen at semi-random.
      */
-    ZipEntryRO findEntryByName(const char* fileName) const;
+    ZipEntryRO findEntryByName(const char* entryName) const;
+
+
+    /*
+     * Start iterating over the list of entries in the zip file. Requires
+     * a matching call to endIteration with the same cookie.
+     */
+    bool startIteration(void** cookie);
+
+    /**
+     * Return the next entry in iteration order, or NULL if there are no more
+     * entries in this archive.
+     */
+    ZipEntryRO nextEntry(void* cookie);
+
+    void endIteration(void* cookie);
+
+    void releaseEntry(ZipEntryRO entry) const;
 
     /*
      * Return the #of entries in the Zip archive.
      */
-    int getNumEntries(void) const {
-        return mNumEntries;
-    }
-
-    /*
-     * Return the Nth entry.  Zip file entries are not stored in sorted
-     * order, and updated entries may appear at the end, so anyone walking
-     * the archive needs to avoid making ordering assumptions.  We take
-     * that further by returning the Nth non-empty entry in the hash table
-     * rather than the Nth entry in the archive.
-     *
-     * Valid values are [0..numEntries).
-     *
-     * [This is currently O(n).  If it needs to be fast we can allocate an
-     * additional data structure or provide an iterator interface.]
-     */
-    ZipEntryRO findEntryByIndex(int idx) const;
+    int getNumEntries();
 
     /*
      * Copy the filename into the supplied buffer.  Returns 0 on success,
@@ -149,112 +142,27 @@
      *
      * Returns "true" on success.
      */
-    bool uncompressEntry(ZipEntryRO entry, void* buffer) const;
+    bool uncompressEntry(ZipEntryRO entry, void* buffer, size_t size) const;
 
     /*
      * Uncompress the data to an open file descriptor.
      */
     bool uncompressEntry(ZipEntryRO entry, int fd) const;
 
-    /* Zip compression methods we support */
-    enum {
-        kCompressStored     = 0,        // no compression
-        kCompressDeflated   = 8,        // standard deflate
-    };
-
-    /*
-     * Utility function: uncompress deflated data, buffer to buffer.
-     */
-    static bool inflateBuffer(void* outBuf, const void* inBuf,
-        size_t uncompLen, size_t compLen);
-
-    /*
-     * Utility function: uncompress deflated data, buffer to fd.
-     */
-    static bool inflateBuffer(int fd, const void* inBuf,
-        size_t uncompLen, size_t compLen);
-
-    /*
-     * Utility function to convert ZIP's time format to a timespec struct.
-     */
-    static inline void zipTimeToTimespec(long when, struct tm* timespec) {
-        const long date = when >> 16;
-        timespec->tm_year = ((date >> 9) & 0x7F) + 80; // Zip is years since 1980
-        timespec->tm_mon = (date >> 5) & 0x0F;
-        timespec->tm_mday = date & 0x1F;
-
-        timespec->tm_hour = (when >> 11) & 0x1F;
-        timespec->tm_min = (when >> 5) & 0x3F;
-        timespec->tm_sec = (when & 0x1F) << 1;
-    }
-
-    /*
-     * Some basic functions for raw data manipulation.  "LE" means
-     * Little Endian.
-     */
-    static inline unsigned short get2LE(const unsigned char* buf) {
-        return buf[0] | (buf[1] << 8);
-    }
-    static inline unsigned long get4LE(const unsigned char* buf) {
-        return buf[0] | (buf[1] << 8) | (buf[2] << 16) | (buf[3] << 24);
-    }
+    ~ZipFileRO();
 
 private:
-    /* these are private and not defined */ 
+    /* these are private and not defined */
     ZipFileRO(const ZipFileRO& src);
     ZipFileRO& operator=(const ZipFileRO& src);
 
-    /* locate and parse the central directory */
-    bool mapCentralDirectory(void);
+    ZipFileRO(ZipArchiveHandle handle, char* fileName) : mHandle(handle),
+        mFileName(fileName)
+    {
+    }
 
-    /* parse the archive, prepping internal structures */
-    bool parseZipArchive(void);
-
-    /* add a new entry to the hash table */
-    void addToHash(const char* str, int strLen, unsigned int hash);
-
-    /* compute string hash code */
-    static unsigned int computeHash(const char* str, int len);
-
-    /* convert a ZipEntryRO back to a hash table index */
-    int entryToIndex(const ZipEntryRO entry) const;
-
-    /*
-     * One entry in the hash table.
-     */
-    typedef struct HashEntry {
-        const char*     name;
-        unsigned short  nameLen;
-        //unsigned int    hash;
-    } HashEntry;
-
-    /* open Zip archive */
-    int         mFd;
-
-    /* Lock for handling the file descriptor (seeks, etc) */
-    mutable Mutex mFdLock;
-
-    /* zip file name */
-    char*       mFileName;
-
-    /* length of file */
-    size_t      mFileLength;
-
-    /* mapped file */
-    FileMap*    mDirectoryMap;
-
-    /* number of entries in the Zip archive */
-    int         mNumEntries;
-
-    /* CD directory offset in the Zip archive */
-    off64_t     mDirectoryOffset;
-
-    /*
-     * We know how many entries are in the Zip archive, so we have a
-     * fixed-size hash table.  We probe for an empty slot.
-     */
-    int         mHashTableSize;
-    HashEntry*  mHashTable;
+    const ZipArchiveHandle mHandle;
+    char* mFileName;
 };
 
 }; // namespace android
commit	afd31e08299008fdc5c2813f21b2573f29dc53df	[log] [tgz]
author	Narayan Kamath <narayan@google.com>	Tue Dec 03 13:16:03 2013 +0000
committer	Narayan Kamath <narayan@google.com>	Mon Dec 09 16:23:16 2013 +0000
tree	7f83dd8f1e7ed71d4fe8ce280841e43ce275ac6d
parent	6e2d0c1d91f644ab50e0c0b7cae4306262a4ca41 [diff] [blame]