Compiler tuning

Significant reduction in memory usage by the compiler.
    o Estimated sizes of growable lists to avoid waste
    o Changed basic block predecessor structure from a growable bitmap
      to a growable list.
    o Conditionalized code which produced disassembly strings.
    o Avoided generating some dataflow-related structures when compiling
      in dataflow-disabled mode.
    o Added memory usage statistics
    o Eliminated floating point usage as a barrier to disabling expensive
      dataflow analysis for very large init routines.
    o Because iterating through sparse bit maps is much less of a concern now,
      removed earlier hack that remembered runs of leading and trailing
      zeroes.

Also, some general tuning.
    o Minor tweaks to register utilties
    o Speed up the assembly loop
    o Rewrite of the bit vector iterator

Our previous worst-case method originally consumed 360 megabytes, but through
earlier changes was whittled down to 113 megabytes.  Now it consumes 12 (which
so far appears to close to the highest compiler heap usage of anything
I've seen).

Post-wipe cold boot time is now less than 7 minutes.

Installation time for our application test cases also shows a large
gain - typically 25% to 40% speedup.

Single-threaded host compilation of core.jar down to <3.0s, boot.oat builds
in 17.2s.  Next up: multi-threaded compilation.

Change-Id: I493d0d584c4145a6deccdd9bff344473023deb46
diff --git a/src/compiler/CompilerUtility.h b/src/compiler/CompilerUtility.h
index a68ee5f..ba449a4 100644
--- a/src/compiler/CompilerUtility.h
+++ b/src/compiler/CompilerUtility.h
@@ -27,6 +27,9 @@
 /* Allocate the initial memory block for arena-based allocation */
 bool oatHeapInit(void);
 
+/* Collect memory usage statstics */
+//#define WITH_MEMSTATS
+
 typedef struct ArenaMemBlock {
     size_t blockSize;
     size_t bytesAllocated;
@@ -34,7 +37,7 @@
     char ptr[0];
 } ArenaMemBlock;
 
-void* oatNew(size_t size, bool zero);
+void* oatNew(size_t size, bool zero, oatAllocKind kind = kAllocMisc);
 
 void oatArenaReset(void);
 
@@ -42,6 +45,9 @@
     size_t numAllocated;
     size_t numUsed;
     intptr_t *elemList;
+#ifdef WITH_MEMSTATS
+    oatListKind kind;
+#endif
 } GrowableList;
 
 typedef struct GrowableListIterator {
@@ -58,20 +64,11 @@
  */
 struct ArenaBitVector {
     bool    expandable;     /* expand bitmap if we run out? */
-    bool    firstDirty;     /* when true, don't believe firstBitSet */
-    bool    lastDirty;      /* when true, don't believe lastBitSet */
     u4      storageSize;    /* current size, in 32-bit words */
     u4*     storage;
-                            /*
-                             * Opportunistically remember first and
-                             * last set bits.  This yeilds a performance
-                             * advantage in cases where large
-                             * sparse vectors are repeatedly scanned
-                             * (something that can happen a lot during
-                             * dataflow analysis.
-                             */
-    int     firstBitSet;
-    int     lastBitSet;
+#ifdef WITH_MEMSTATS
+    oatBitMapKind kind;      /* for memory use tuning */
+#endif
 };
 
 /* Handy iterator to walk through the bit positions set to 1 */
@@ -90,14 +87,17 @@
 struct BasicBlock;
 struct CompilationUnit;
 
-void oatInitGrowableList(GrowableList* gList, size_t initLength);
+void oatInitGrowableList(GrowableList* gList, size_t initLength,
+                         oatListKind kind = kListMisc);
 void oatInsertGrowableList(GrowableList* gList, intptr_t elem);
+void oatDeleteGrowableList(GrowableList* gList, intptr_t elem);
 void oatGrowableListIteratorInit(GrowableList* gList,
                                  GrowableListIterator* iterator);
 intptr_t oatGrowableListIteratorNext(GrowableListIterator* iterator);
 intptr_t oatGrowableListGetElement(const GrowableList* gList, size_t idx);
 
-ArenaBitVector* oatAllocBitVector(unsigned int startBits, bool expandable);
+ArenaBitVector* oatAllocBitVector(unsigned int startBits, bool expandable,
+                                  oatBitMapKind = kBitMapMisc);
 void oatBitVectorIteratorInit(ArenaBitVector* pBits,
                               ArenaBitVectorIterator* iterator);
 int oatBitVectorIteratorNext(ArenaBitVectorIterator* iterator);
@@ -125,6 +125,7 @@
 void oatGetBlockName(struct BasicBlock* bb, char* name);
 const char* oatGetShortyFromTargetIdx(CompilationUnit*, int);
 void oatDumpRegLocTable(struct RegLocation*, int);
+void oatDumpMemStats(CompilationUnit* cUnit);
 
 }  // namespace art