ART: Swap-space in the compiler

Introduce a swap-space and corresponding allocator to transparently
switch native allocations to memory backed by a file.

Bug: 18596910

(cherry picked from commit 62746d8d9c4400e4764f162b22bfb1a32be287a9)

Change-Id: I131448f3907115054a592af73db86d2b9257ea33
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index cbb23c2..9985d66 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -62,6 +62,7 @@
 #include "thread_pool.h"
 #include "trampolines/trampoline_compiler.h"
 #include "transaction.h"
+#include "utils/swap_space.h"
 #include "verifier/method_verifier.h"
 #include "verifier/method_verifier-inl.h"
 
@@ -339,8 +340,10 @@
                                bool image, std::set<std::string>* image_classes,
                                std::set<std::string>* compiled_classes, size_t thread_count,
                                bool dump_stats, bool dump_passes, CumulativeLogger* timer,
-                               const std::string& profile_file)
-    : profile_present_(false), compiler_options_(compiler_options),
+                               int swap_fd, const std::string& profile_file)
+    : swap_space_(swap_fd == -1 ? nullptr : new SwapSpace(swap_fd, 10 * MB)),
+      swap_space_allocator_(new SwapAllocator<void>(swap_space_.get())),
+      profile_present_(false), compiler_options_(compiler_options),
       verification_results_(verification_results),
       method_inliner_map_(method_inliner_map),
       compiler_(Compiler::Create(this, compiler_kind)),
@@ -349,7 +352,7 @@
       freezing_constructor_lock_("freezing constructor lock"),
       compiled_classes_lock_("compiled classes lock"),
       compiled_methods_lock_("compiled method lock"),
-      compiled_methods_(),
+      compiled_methods_(MethodTable::key_compare()),
       non_relative_linker_patch_count_(0u),
       image_(image),
       image_classes_(image_classes),
@@ -361,12 +364,12 @@
       timings_logger_(timer),
       compiler_context_(nullptr),
       support_boot_image_fixup_(instruction_set != kMips),
-      dedupe_code_("dedupe code"),
-      dedupe_src_mapping_table_("dedupe source mapping table"),
-      dedupe_mapping_table_("dedupe mapping table"),
-      dedupe_vmap_table_("dedupe vmap table"),
-      dedupe_gc_map_("dedupe gc map"),
-      dedupe_cfi_info_("dedupe cfi info") {
+      dedupe_code_("dedupe code", *swap_space_allocator_),
+      dedupe_src_mapping_table_("dedupe source mapping table", *swap_space_allocator_),
+      dedupe_mapping_table_("dedupe mapping table", *swap_space_allocator_),
+      dedupe_vmap_table_("dedupe vmap table", *swap_space_allocator_),
+      dedupe_gc_map_("dedupe gc map", *swap_space_allocator_),
+      dedupe_cfi_info_("dedupe cfi info", *swap_space_allocator_) {
   DCHECK(compiler_options_ != nullptr);
   DCHECK(verification_results_ != nullptr);
   DCHECK(method_inliner_map_ != nullptr);
@@ -393,31 +396,28 @@
   }
 }
 
-std::vector<uint8_t>* CompilerDriver::DeduplicateCode(const std::vector<uint8_t>& code) {
+SwapVector<uint8_t>* CompilerDriver::DeduplicateCode(const ArrayRef<const uint8_t>& code) {
   return dedupe_code_.Add(Thread::Current(), code);
 }
 
-SrcMap* CompilerDriver::DeduplicateSrcMappingTable(const SrcMap& src_map) {
+SwapSrcMap* CompilerDriver::DeduplicateSrcMappingTable(const ArrayRef<SrcMapElem>& src_map) {
   return dedupe_src_mapping_table_.Add(Thread::Current(), src_map);
 }
 
-std::vector<uint8_t>* CompilerDriver::DeduplicateMappingTable(const std::vector<uint8_t>& code) {
+SwapVector<uint8_t>* CompilerDriver::DeduplicateMappingTable(const ArrayRef<const uint8_t>& code) {
   return dedupe_mapping_table_.Add(Thread::Current(), code);
 }
 
-std::vector<uint8_t>* CompilerDriver::DeduplicateVMapTable(const std::vector<uint8_t>& code) {
+SwapVector<uint8_t>* CompilerDriver::DeduplicateVMapTable(const ArrayRef<const uint8_t>& code) {
   return dedupe_vmap_table_.Add(Thread::Current(), code);
 }
 
-std::vector<uint8_t>* CompilerDriver::DeduplicateGCMap(const std::vector<uint8_t>& code) {
+SwapVector<uint8_t>* CompilerDriver::DeduplicateGCMap(const ArrayRef<const uint8_t>& code) {
   return dedupe_gc_map_.Add(Thread::Current(), code);
 }
 
-std::vector<uint8_t>* CompilerDriver::DeduplicateCFIInfo(const std::vector<uint8_t>* cfi_info) {
-  if (cfi_info == nullptr) {
-    return nullptr;
-  }
-  return dedupe_cfi_info_.Add(Thread::Current(), *cfi_info);
+SwapVector<uint8_t>* CompilerDriver::DeduplicateCFIInfo(const ArrayRef<const uint8_t>& cfi_info) {
+  return dedupe_cfi_info_.Add(Thread::Current(), cfi_info);
 }
 
 CompilerDriver::~CompilerDriver() {
@@ -428,7 +428,9 @@
   }
   {
     MutexLock mu(self, compiled_methods_lock_);
-    STLDeleteValues(&compiled_methods_);
+    for (auto& pair : compiled_methods_) {
+      CompiledMethod::ReleaseSwapAllocatedCompiledMethod(this, pair.second);
+    }
   }
   compiler_->UnInit();
 }
@@ -2337,6 +2339,14 @@
   oss << " native alloc=" << PrettySize(allocated_space) << " free="
       << PrettySize(free_space);
 #endif
+  if (swap_space_.get() != nullptr) {
+    oss << " swap=" << PrettySize(swap_space_->GetSize());
+  }
+  oss << "\nCode dedupe: " << dedupe_code_.DumpStats();
+  oss << "\nMapping table dedupe: " << dedupe_mapping_table_.DumpStats();
+  oss << "\nVmap table dedupe: " << dedupe_vmap_table_.DumpStats();
+  oss << "\nGC map dedupe: " << dedupe_gc_map_.DumpStats();
+  oss << "\nCFI info dedupe: " << dedupe_cfi_info_.DumpStats();
   return oss.str();
 }
 
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index edc6468..7ddc32c 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -39,6 +39,8 @@
 #include "thread_pool.h"
 #include "utils/arena_allocator.h"
 #include "utils/dedupe_set.h"
+#include "utils/swap_space.h"
+#include "utils.h"
 #include "dex/verified_method.h"
 
 namespace art {
@@ -77,6 +79,8 @@
 };
 std::ostream& operator<<(std::ostream& os, const DexToDexCompilationLevel& rhs);
 
+static constexpr bool kUseMurmur3Hash = true;
+
 class CompilerDriver {
  public:
   // Create a compiler targeting the requested "instruction_set".
@@ -93,7 +97,8 @@
                           bool image, std::set<std::string>* image_classes,
                           std::set<std::string>* compiled_classes,
                           size_t thread_count, bool dump_stats, bool dump_passes,
-                          CumulativeLogger* timer, const std::string& profile_file);
+                          CumulativeLogger* timer, int swap_fd,
+                          const std::string& profile_file);
 
   ~CompilerDriver();
 
@@ -334,6 +339,9 @@
   const ArenaPool* GetArenaPool() const {
     return &arena_pool_;
   }
+  SwapAllocator<void>& GetSwapSpaceAllocator() {
+    return *swap_space_allocator_.get();
+  }
 
   bool WriteElf(const std::string& android_root,
                 bool is_host,
@@ -376,15 +384,12 @@
   void RecordClassStatus(ClassReference ref, mirror::Class::Status status)
       LOCKS_EXCLUDED(compiled_classes_lock_);
 
-  std::vector<uint8_t>* DeduplicateCode(const std::vector<uint8_t>& code);
-  SrcMap* DeduplicateSrcMappingTable(const SrcMap& src_map);
-  std::vector<uint8_t>* DeduplicateMappingTable(const std::vector<uint8_t>& code);
-  std::vector<uint8_t>* DeduplicateVMapTable(const std::vector<uint8_t>& code);
-  std::vector<uint8_t>* DeduplicateGCMap(const std::vector<uint8_t>& code);
-  std::vector<uint8_t>* DeduplicateCFIInfo(const std::vector<uint8_t>* cfi_info);
-
-  ProfileFile profile_file_;
-  bool profile_present_;
+  SwapVector<uint8_t>* DeduplicateCode(const ArrayRef<const uint8_t>& code);
+  SwapSrcMap* DeduplicateSrcMappingTable(const ArrayRef<SrcMapElem>& src_map);
+  SwapVector<uint8_t>* DeduplicateMappingTable(const ArrayRef<const uint8_t>& code);
+  SwapVector<uint8_t>* DeduplicateVMapTable(const ArrayRef<const uint8_t>& code);
+  SwapVector<uint8_t>* DeduplicateGCMap(const ArrayRef<const uint8_t>& code);
+  SwapVector<uint8_t>* DeduplicateCFIInfo(const ArrayRef<const uint8_t>& cfi_info);
 
   // Should the compiler run on this method given profile information?
   bool SkipCompilation(const std::string& method_name);
@@ -484,6 +489,14 @@
   static void CompileClass(const ParallelCompilationManager* context, size_t class_def_index)
       LOCKS_EXCLUDED(Locks::mutator_lock_);
 
+  // Swap pool and allocator used for native allocations. May be file-backed. Needs to be first
+  // as other fields rely on this.
+  std::unique_ptr<SwapSpace> swap_space_;
+  std::unique_ptr<SwapAllocator<void> > swap_space_allocator_;
+
+  ProfileFile profile_file_;
+  bool profile_present_;
+
   const CompilerOptions* const compiler_options_;
   VerificationResults* const verification_results_;
   DexFileToMethodInlinerMap* const method_inliner_map_;
@@ -551,47 +564,92 @@
   bool support_boot_image_fixup_;
 
   // DeDuplication data structures, these own the corresponding byte arrays.
-  template <typename ByteArray>
+  template <typename ContentType>
   class DedupeHashFunc {
    public:
-    size_t operator()(const ByteArray& array) const {
-      // For small arrays compute a hash using every byte.
-      static const size_t kSmallArrayThreshold = 16;
-      size_t hash = 0x811c9dc5;
-      if (array.size() <= kSmallArrayThreshold) {
-        for (auto b : array) {
-          hash = (hash * 16777619) ^ static_cast<uint8_t>(b);
+    size_t operator()(const ArrayRef<ContentType>& array) const {
+      const uint8_t* data = reinterpret_cast<const uint8_t*>(array.data());
+      static_assert(IsPowerOfTwo(sizeof(ContentType)),
+          "ContentType is not power of two, don't know whether array layout is as assumed");
+      uint32_t len = sizeof(ContentType) * array.size();
+      if (kUseMurmur3Hash) {
+        static constexpr uint32_t c1 = 0xcc9e2d51;
+        static constexpr uint32_t c2 = 0x1b873593;
+        static constexpr uint32_t r1 = 15;
+        static constexpr uint32_t r2 = 13;
+        static constexpr uint32_t m = 5;
+        static constexpr uint32_t n = 0xe6546b64;
+
+        uint32_t hash = 0;
+
+        const int nblocks = len / 4;
+        typedef __attribute__((__aligned__(1))) uint32_t unaligned_uint32_t;
+        const unaligned_uint32_t *blocks = reinterpret_cast<const uint32_t*>(data);
+        int i;
+        for (i = 0; i < nblocks; i++) {
+          uint32_t k = blocks[i];
+          k *= c1;
+          k = (k << r1) | (k >> (32 - r1));
+          k *= c2;
+
+          hash ^= k;
+          hash = ((hash << r2) | (hash >> (32 - r2))) * m + n;
         }
+
+        const uint8_t *tail = reinterpret_cast<const uint8_t*>(data + nblocks * 4);
+        uint32_t k1 = 0;
+
+        switch (len & 3) {
+          case 3:
+            k1 ^= tail[2] << 16;
+            FALLTHROUGH_INTENDED;
+          case 2:
+            k1 ^= tail[1] << 8;
+            FALLTHROUGH_INTENDED;
+          case 1:
+            k1 ^= tail[0];
+
+            k1 *= c1;
+            k1 = (k1 << r1) | (k1 >> (32 - r1));
+            k1 *= c2;
+            hash ^= k1;
+        }
+
+        hash ^= len;
+        hash ^= (hash >> 16);
+        hash *= 0x85ebca6b;
+        hash ^= (hash >> 13);
+        hash *= 0xc2b2ae35;
+        hash ^= (hash >> 16);
+
+        return hash;
       } else {
-        // For larger arrays use the 2 bytes at 6 bytes (the location of a push registers
-        // instruction field for quick generated code on ARM) and then select a number of other
-        // values at random.
-        static const size_t kRandomHashCount = 16;
-        for (size_t i = 0; i < 2; ++i) {
-          uint8_t b = static_cast<uint8_t>(array[i + 6]);
-          hash = (hash * 16777619) ^ b;
+        size_t hash = 0x811c9dc5;
+        for (uint32_t i = 0; i < len; ++i) {
+          hash = (hash * 16777619) ^ data[i];
         }
-        for (size_t i = 2; i < kRandomHashCount; ++i) {
-          size_t r = i * 1103515245 + 12345;
-          uint8_t b = static_cast<uint8_t>(array[r % array.size()]);
-          hash = (hash * 16777619) ^ b;
-        }
+        hash += hash << 13;
+        hash ^= hash >> 7;
+        hash += hash << 3;
+        hash ^= hash >> 17;
+        hash += hash << 5;
+        return hash;
       }
-      hash += hash << 13;
-      hash ^= hash >> 7;
-      hash += hash << 3;
-      hash ^= hash >> 17;
-      hash += hash << 5;
-      return hash;
     }
   };
 
-  DedupeSet<std::vector<uint8_t>, size_t, DedupeHashFunc<std::vector<uint8_t>>, 4> dedupe_code_;
-  DedupeSet<SrcMap, size_t, DedupeHashFunc<SrcMap>, 4> dedupe_src_mapping_table_;
-  DedupeSet<std::vector<uint8_t>, size_t, DedupeHashFunc<std::vector<uint8_t>>, 4> dedupe_mapping_table_;
-  DedupeSet<std::vector<uint8_t>, size_t, DedupeHashFunc<std::vector<uint8_t>>, 4> dedupe_vmap_table_;
-  DedupeSet<std::vector<uint8_t>, size_t, DedupeHashFunc<std::vector<uint8_t>>, 4> dedupe_gc_map_;
-  DedupeSet<std::vector<uint8_t>, size_t, DedupeHashFunc<std::vector<uint8_t>>, 4> dedupe_cfi_info_;
+  DedupeSet<ArrayRef<const uint8_t>,
+            SwapVector<uint8_t>, size_t, DedupeHashFunc<const uint8_t>, 4> dedupe_code_;
+  DedupeSet<ArrayRef<SrcMapElem>,
+            SwapSrcMap, size_t, DedupeHashFunc<SrcMapElem>, 4> dedupe_src_mapping_table_;
+  DedupeSet<ArrayRef<const uint8_t>,
+            SwapVector<uint8_t>, size_t, DedupeHashFunc<const uint8_t>, 4> dedupe_mapping_table_;
+  DedupeSet<ArrayRef<const uint8_t>,
+            SwapVector<uint8_t>, size_t, DedupeHashFunc<const uint8_t>, 4> dedupe_vmap_table_;
+  DedupeSet<ArrayRef<const uint8_t>,
+            SwapVector<uint8_t>, size_t, DedupeHashFunc<const uint8_t>, 4> dedupe_gc_map_;
+  DedupeSet<ArrayRef<const uint8_t>,
+            SwapVector<uint8_t>, size_t, DedupeHashFunc<const uint8_t>, 4> dedupe_cfi_info_;
 
   DISALLOW_COPY_AND_ASSIGN(CompilerDriver);
 };