Merge "Fix test failure due to wrong runtime arguments."
diff --git a/compiler/common_compiler_test.cc b/compiler/common_compiler_test.cc
index 151437b..c37ceca 100644
--- a/compiler/common_compiler_test.cc
+++ b/compiler/common_compiler_test.cc
@@ -77,11 +77,10 @@
 
     header_code_and_maps_chunks_.push_back(std::vector<uint8_t>());
     std::vector<uint8_t>* chunk = &header_code_and_maps_chunks_.back();
-    size_t size = sizeof(method_header) + code_size + vmap_table.size() + mapping_table_size +
-        gc_map_size;
-    size_t code_offset = compiled_method->AlignCode(size - code_size);
-    size_t padding = code_offset - (size - code_size);
-    chunk->reserve(padding + size);
+    const size_t max_padding = GetInstructionSetAlignment(compiled_method->GetInstructionSet());
+    const size_t size =
+        gc_map_size + mapping_table_size + vmap_table.size() + sizeof(method_header) + code_size;
+    chunk->reserve(size + max_padding);
     chunk->resize(sizeof(method_header));
     memcpy(&(*chunk)[0], &method_header, sizeof(method_header));
     chunk->insert(chunk->begin(), vmap_table.begin(), vmap_table.end());
@@ -91,10 +90,16 @@
     if (gc_map_used) {
       chunk->insert(chunk->begin(), gc_map.begin(), gc_map.end());
     }
-    chunk->insert(chunk->begin(), padding, 0);
     chunk->insert(chunk->end(), code.begin(), code.end());
-    CHECK_EQ(padding + size, chunk->size());
-    const void* code_ptr = &(*chunk)[code_offset];
+    CHECK_EQ(chunk->size(), size);
+    const void* unaligned_code_ptr = chunk->data() + (size - code_size);
+    size_t offset = dchecked_integral_cast<size_t>(reinterpret_cast<uintptr_t>(unaligned_code_ptr));
+    size_t padding = compiled_method->AlignCode(offset) - offset;
+    // Make sure no resizing takes place.
+    CHECK_GE(chunk->capacity(), chunk->size() + padding);
+    chunk->insert(chunk->begin(), padding, 0);
+    const void* code_ptr = reinterpret_cast<const uint8_t*>(unaligned_code_ptr) + padding;
+    CHECK_EQ(code_ptr, static_cast<const void*>(chunk->data() + (chunk->size() - code_size)));
     MakeExecutable(code_ptr, code.size());
     const void* method_code = CompiledMethod::CodePointer(code_ptr,
                                                           compiled_method->GetInstructionSet());
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index 6aed444..e6b9273 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -3118,15 +3118,25 @@
 }
 
 void LocationsBuilderMIPS::VisitLoadClass(HLoadClass* cls) {
-  LocationSummary::CallKind call_kind = cls->CanCallRuntime() ? LocationSummary::kCallOnSlowPath
-                                                              : LocationSummary::kNoCall;
-  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(cls, call_kind);
-  locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetOut(Location::RequiresRegister());
+  InvokeRuntimeCallingConvention calling_convention;
+  CodeGenerator::CreateLoadClassLocationSummary(
+      cls,
+      Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
+      Location::RegisterLocation(V0));
 }
 
 void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) {
   LocationSummary* locations = cls->GetLocations();
+  if (cls->NeedsAccessCheck()) {
+    codegen_->MoveConstant(locations->GetTemp(0), cls->GetTypeIndex());
+    codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pInitializeTypeAndVerifyAccess),
+                            cls,
+                            cls->GetDexPc(),
+                            nullptr,
+                            IsDirectEntrypoint(kQuickInitializeTypeAndVerifyAccess));
+    return;
+  }
+
   Register out = locations->Out().AsRegister<Register>();
   Register current_method = locations->InAt(0).AsRegister<Register>();
   if (cls->IsReferrersClass()) {
diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc
index 26a05da..659da06 100644
--- a/compiler/optimizing/reference_type_propagation.cc
+++ b/compiler/optimizing/reference_type_propagation.cc
@@ -373,12 +373,18 @@
   if (instr->IsInvokeStaticOrDirect() && instr->AsInvokeStaticOrDirect()->IsStringInit()) {
     // Calls to String.<init> are replaced with a StringFactory.
     if (kIsDebugBuild) {
-      ScopedObjectAccess soa(Thread::Current());
+      HInvoke* invoke = instr->AsInvoke();
       ClassLinker* cl = Runtime::Current()->GetClassLinker();
-      mirror::DexCache* dex_cache = cl->FindDexCache(
-          soa.Self(), instr->AsInvoke()->GetDexFile(), false);
-      ArtMethod* method = dex_cache->GetResolvedMethod(
-          instr->AsInvoke()->GetDexMethodIndex(), cl->GetImagePointerSize());
+      ScopedObjectAccess soa(Thread::Current());
+      StackHandleScope<2> hs(soa.Self());
+      Handle<mirror::DexCache> dex_cache(
+          hs.NewHandle(cl->FindDexCache(soa.Self(), invoke->GetDexFile(), false)));
+      // Use a null loader. We should probably use the compiling method's class loader,
+      // but then we would need to pass it to RTPVisitor just for this debug check. Since
+      // the method is from the String class, the null loader is good enough.
+      Handle<mirror::ClassLoader> loader;
+      ArtMethod* method = cl->ResolveMethod(
+          invoke->GetDexFile(), invoke->GetDexMethodIndex(), dex_cache, loader, nullptr, kDirect);
       DCHECK(method != nullptr);
       mirror::Class* declaring_class = method->GetDeclaringClass();
       DCHECK(declaring_class != nullptr);
diff --git a/runtime/art_method.cc b/runtime/art_method.cc
index a10d7af..fe0afa6 100644
--- a/runtime/art_method.cc
+++ b/runtime/art_method.cc
@@ -361,19 +361,6 @@
   return true;
 }
 
-ProfilingInfo* ArtMethod::CreateProfilingInfo() {
-  DCHECK(!Runtime::Current()->IsAotCompiler());
-  ProfilingInfo* info = ProfilingInfo::Create(this);
-  MemberOffset offset = ArtMethod::EntryPointFromJniOffset(sizeof(void*));
-  uintptr_t pointer = reinterpret_cast<uintptr_t>(this) + offset.Uint32Value();
-  if (!reinterpret_cast<Atomic<ProfilingInfo*>*>(pointer)->
-          CompareExchangeStrongSequentiallyConsistent(nullptr, info)) {
-    return GetProfilingInfo(sizeof(void*));
-  } else {
-    return info;
-  }
-}
-
 const uint8_t* ArtMethod::GetQuickenedInfo() {
   bool found = false;
   OatFile::OatMethod oat_method =
@@ -427,6 +414,12 @@
   bool found;
   OatFile::OatMethod oat_method = class_linker->FindOatMethodFor(this, &found);
   if (!found) {
+    if (class_linker->IsQuickResolutionStub(existing_entry_point)) {
+      // We are running the generic jni stub, but the entry point of the method has not
+      // been updated yet.
+      DCHECK(IsNative());
+      return nullptr;
+    }
     // Only for unit tests.
     // TODO(ngeoffray): Update these tests to pass the right pc?
     return OatQuickMethodHeader::FromEntryPoint(existing_entry_point);
diff --git a/runtime/art_method.h b/runtime/art_method.h
index bb9804e..551989d 100644
--- a/runtime/art_method.h
+++ b/runtime/art_method.h
@@ -305,12 +305,18 @@
         PtrSizedFields, entry_point_from_quick_compiled_code_) / sizeof(void*) * pointer_size);
   }
 
-  ProfilingInfo* CreateProfilingInfo() SHARED_REQUIRES(Locks::mutator_lock_);
-
   ProfilingInfo* GetProfilingInfo(size_t pointer_size) {
     return reinterpret_cast<ProfilingInfo*>(GetEntryPointFromJniPtrSize(pointer_size));
   }
 
+  ALWAYS_INLINE void SetProfilingInfo(ProfilingInfo* info) {
+    SetEntryPointFromJniPtrSize(info, sizeof(void*));
+  }
+
+  static MemberOffset ProfilingInfoOffset() {
+    return EntryPointFromJniOffset(sizeof(void*));
+  }
+
   void* GetEntryPointFromJni() {
     return GetEntryPointFromJniPtrSize(sizeof(void*));
   }
diff --git a/runtime/exception_test.cc b/runtime/exception_test.cc
index b1d4d35..18ccd08 100644
--- a/runtime/exception_test.cc
+++ b/runtime/exception_test.cc
@@ -92,10 +92,25 @@
     fake_header_code_and_maps_.insert(fake_header_code_and_maps_.end(),
                                       fake_code_.begin(), fake_code_.end());
 
-    // NOTE: Don't align the code (it will not be executed) but check that the Thumb2
-    // adjustment will be a NOP, see EntryPointToCodePointer().
-    CHECK_ALIGNED(mapping_table_offset, 2);
-    const uint8_t* code_ptr = &fake_header_code_and_maps_[gc_map_offset];
+    // Align the code.
+    const size_t alignment = GetInstructionSetAlignment(kRuntimeISA);
+    fake_header_code_and_maps_.reserve(fake_header_code_and_maps_.size() + alignment);
+    const void* unaligned_code_ptr =
+        fake_header_code_and_maps_.data() + (fake_header_code_and_maps_.size() - code_size);
+    size_t offset = dchecked_integral_cast<size_t>(reinterpret_cast<uintptr_t>(unaligned_code_ptr));
+    size_t padding = RoundUp(offset, alignment) - offset;
+    // Make sure no resizing takes place.
+    CHECK_GE(fake_header_code_and_maps_.capacity(), fake_header_code_and_maps_.size() + padding);
+    fake_header_code_and_maps_.insert(fake_header_code_and_maps_.begin(), padding, 0);
+    const void* code_ptr = reinterpret_cast<const uint8_t*>(unaligned_code_ptr) + padding;
+    CHECK_EQ(code_ptr,
+             static_cast<const void*>(fake_header_code_and_maps_.data() +
+                                          (fake_header_code_and_maps_.size() - code_size)));
+
+    if (kRuntimeISA == kArm) {
+      // Check that the Thumb2 adjustment will be a NOP, see EntryPointToCodePointer().
+      CHECK_ALIGNED(mapping_table_offset, 2);
+    }
 
     method_f_ = my_klass_->FindVirtualMethod("f", "()I", sizeof(void*));
     ASSERT_TRUE(method_f_ != nullptr);
diff --git a/runtime/jit/jit_code_cache.cc b/runtime/jit/jit_code_cache.cc
index 2d0a2a5..2596dd9 100644
--- a/runtime/jit/jit_code_cache.cc
+++ b/runtime/jit/jit_code_cache.cc
@@ -21,6 +21,7 @@
 #include "art_method-inl.h"
 #include "entrypoints/runtime_asm_entrypoints.h"
 #include "gc/accounting/bitmap-inl.h"
+#include "jit/profiling_info.h"
 #include "linear_alloc.h"
 #include "mem_map.h"
 #include "oat_file-inl.h"
@@ -206,10 +207,23 @@
   // We do not check if a code cache GC is in progress, as this method comes
   // with the classlinker_classes_lock_ held, and suspending ourselves could
   // lead to a deadlock.
-  for (auto it = method_code_map_.begin(); it != method_code_map_.end();) {
-    if (alloc.ContainsUnsafe(it->second)) {
-      FreeCode(it->first, it->second);
-      it = method_code_map_.erase(it);
+  {
+    ScopedCodeCacheWrite scc(code_map_.get());
+    for (auto it = method_code_map_.begin(); it != method_code_map_.end();) {
+      if (alloc.ContainsUnsafe(it->second)) {
+        FreeCode(it->first, it->second);
+        it = method_code_map_.erase(it);
+      } else {
+        ++it;
+      }
+    }
+  }
+  for (auto it = profiling_infos_.begin(); it != profiling_infos_.end();) {
+    ProfilingInfo* info = *it;
+    if (alloc.ContainsUnsafe(info->GetMethod())) {
+      info->GetMethod()->SetProfilingInfo(nullptr);
+      mspace_free(data_mspace_, reinterpret_cast<uint8_t*>(info));
+      it = profiling_infos_.erase(it);
     } else {
       ++it;
     }
@@ -387,6 +401,9 @@
     for (auto& it : method_code_map_) {
       it.second->SetEntryPointFromQuickCompiledCode(GetQuickToInterpreterBridge());
     }
+    for (ProfilingInfo* info : profiling_infos_) {
+      info->GetMethod()->SetProfilingInfo(nullptr);
+    }
   }
 
   // Run a checkpoint on all threads to mark the JIT compiled code they are running.
@@ -400,27 +417,37 @@
     }
   }
 
-  // Free unused compiled code, and restore the entry point of used compiled code.
   {
     MutexLock mu(self, lock_);
     DCHECK_EQ(map_size, method_code_map_.size());
-    ScopedCodeCacheWrite scc(code_map_.get());
-    for (auto it = method_code_map_.begin(); it != method_code_map_.end();) {
-      const void* code_ptr = it->first;
-      ArtMethod* method = it->second;
-      uintptr_t allocation = FromCodeToAllocation(code_ptr);
-      const OatQuickMethodHeader* method_header = OatQuickMethodHeader::FromCodePointer(code_ptr);
-      if (GetLiveBitmap()->Test(allocation)) {
-        method->SetEntryPointFromQuickCompiledCode(method_header->GetEntryPoint());
-        ++it;
-      } else {
-        method->ClearCounter();
-        DCHECK_NE(method->GetEntryPointFromQuickCompiledCode(), method_header->GetEntryPoint());
-        FreeCode(code_ptr, method);
-        it = method_code_map_.erase(it);
+    // Free unused compiled code, and restore the entry point of used compiled code.
+    {
+      ScopedCodeCacheWrite scc(code_map_.get());
+      for (auto it = method_code_map_.begin(); it != method_code_map_.end();) {
+        const void* code_ptr = it->first;
+        ArtMethod* method = it->second;
+        uintptr_t allocation = FromCodeToAllocation(code_ptr);
+        const OatQuickMethodHeader* method_header = OatQuickMethodHeader::FromCodePointer(code_ptr);
+        if (GetLiveBitmap()->Test(allocation)) {
+          method->SetEntryPointFromQuickCompiledCode(method_header->GetEntryPoint());
+          ++it;
+        } else {
+          method->ClearCounter();
+          DCHECK_NE(method->GetEntryPointFromQuickCompiledCode(), method_header->GetEntryPoint());
+          FreeCode(code_ptr, method);
+          it = method_code_map_.erase(it);
+        }
       }
     }
     GetLiveBitmap()->Bitmap::Clear();
+
+    // Free all profiling info.
+    for (ProfilingInfo* info : profiling_infos_) {
+      DCHECK(info->GetMethod()->GetProfilingInfo(sizeof(void*)) == nullptr);
+      mspace_free(data_mspace_, reinterpret_cast<uint8_t*>(info));
+    }
+    profiling_infos_.clear();
+
     collection_in_progress_ = false;
     lock_cond_.Broadcast(self);
   }
@@ -460,5 +487,44 @@
   return method_header;
 }
 
+ProfilingInfo* JitCodeCache::AddProfilingInfo(Thread* self,
+                                              ArtMethod* method,
+                                              const std::vector<uint32_t>& entries,
+                                              bool retry_allocation) {
+  ProfilingInfo* info = AddProfilingInfoInternal(self, method, entries);
+
+  if (info == nullptr && retry_allocation) {
+    GarbageCollectCache(self);
+    info = AddProfilingInfoInternal(self, method, entries);
+  }
+  return info;
+}
+
+ProfilingInfo* JitCodeCache::AddProfilingInfoInternal(Thread* self,
+                                                      ArtMethod* method,
+                                                      const std::vector<uint32_t>& entries) {
+  size_t profile_info_size = RoundUp(
+      sizeof(ProfilingInfo) + sizeof(ProfilingInfo::InlineCache) * entries.size(),
+      sizeof(void*));
+  ScopedThreadSuspension sts(self, kSuspended);
+  MutexLock mu(self, lock_);
+  WaitForPotentialCollectionToComplete(self);
+
+  // Check whether some other thread has concurrently created it.
+  ProfilingInfo* info = method->GetProfilingInfo(sizeof(void*));
+  if (info != nullptr) {
+    return info;
+  }
+
+  uint8_t* data = reinterpret_cast<uint8_t*>(mspace_malloc(data_mspace_, profile_info_size));
+  if (data == nullptr) {
+    return nullptr;
+  }
+  info = new (data) ProfilingInfo(method, entries);
+  method->SetProfilingInfo(info);
+  profiling_infos_.push_back(info);
+  return info;
+}
+
 }  // namespace jit
 }  // namespace art
diff --git a/runtime/jit/jit_code_cache.h b/runtime/jit/jit_code_cache.h
index 4e415b8..e10f962 100644
--- a/runtime/jit/jit_code_cache.h
+++ b/runtime/jit/jit_code_cache.h
@@ -35,6 +35,7 @@
 
 class ArtMethod;
 class LinearAlloc;
+class ProfilingInfo;
 
 namespace jit {
 
@@ -109,11 +110,21 @@
       REQUIRES(!lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
+  // Remove all methods in our cache that were allocated by 'alloc'.
   void RemoveMethodsIn(Thread* self, const LinearAlloc& alloc)
       REQUIRES(!lock_)
       REQUIRES(Locks::classlinker_classes_lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
+  // Create a 'ProfileInfo' for 'method'. If 'retry_allocation' is true,
+  // will collect and retry if the first allocation is unsuccessful.
+  ProfilingInfo* AddProfilingInfo(Thread* self,
+                                  ArtMethod* method,
+                                  const std::vector<uint32_t>& entries,
+                                  bool retry_allocation)
+      REQUIRES(!lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
  private:
   // Take ownership of code_mem_map.
   JitCodeCache(MemMap* code_map, MemMap* data_map);
@@ -133,6 +144,12 @@
       REQUIRES(!lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
+  ProfilingInfo* AddProfilingInfoInternal(Thread* self,
+                                          ArtMethod* method,
+                                          const std::vector<uint32_t>& entries)
+      REQUIRES(!lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
   // If a collection is in progress, wait for it to finish. Return
   // whether the thread actually waited.
   bool WaitForPotentialCollectionToComplete(Thread* self)
@@ -157,8 +174,10 @@
   void* data_mspace_ GUARDED_BY(lock_);
   // Bitmap for collecting code and data.
   std::unique_ptr<CodeCacheBitmap> live_bitmap_;
-  // This map holds compiled code associated to the ArtMethod
+  // This map holds compiled code associated to the ArtMethod.
   SafeMap<const void*, ArtMethod*> method_code_map_ GUARDED_BY(lock_);
+  // ProfilingInfo objects we have allocated.
+  std::vector<ProfilingInfo*> profiling_infos_ GUARDED_BY(lock_);
 
   DISALLOW_IMPLICIT_CONSTRUCTORS(JitCodeCache);
 };
diff --git a/runtime/jit/jit_instrumentation.cc b/runtime/jit/jit_instrumentation.cc
index 666b8e7..2dd953b 100644
--- a/runtime/jit/jit_instrumentation.cc
+++ b/runtime/jit/jit_instrumentation.cc
@@ -26,7 +26,12 @@
 
 class JitCompileTask FINAL : public Task {
  public:
-  explicit JitCompileTask(ArtMethod* method) : method_(method) {
+  enum TaskKind {
+    kAllocateProfile,
+    kCompile
+  };
+
+  JitCompileTask(ArtMethod* method, TaskKind kind) : method_(method), kind_(kind) {
     ScopedObjectAccess soa(Thread::Current());
     // Add a global ref to the class to prevent class unloading until compilation is done.
     klass_ = soa.Vm()->AddGlobalRef(soa.Self(), method_->GetDeclaringClass());
@@ -40,9 +45,16 @@
 
   void Run(Thread* self) OVERRIDE {
     ScopedObjectAccess soa(self);
-    VLOG(jit) << "JitCompileTask compiling method " << PrettyMethod(method_);
-    if (!Runtime::Current()->GetJit()->CompileMethod(method_, self)) {
-      VLOG(jit) << "Failed to compile method " << PrettyMethod(method_);
+    if (kind_ == kCompile) {
+      VLOG(jit) << "JitCompileTask compiling method " << PrettyMethod(method_);
+      if (!Runtime::Current()->GetJit()->CompileMethod(method_, self)) {
+        VLOG(jit) << "Failed to compile method " << PrettyMethod(method_);
+      }
+    } else {
+      DCHECK(kind_ == kAllocateProfile);
+      if (ProfilingInfo::Create(self, method_, /* retry_allocation */ true)) {
+        VLOG(jit) << "Start profiling " << PrettyMethod(method_);
+      }
     }
   }
 
@@ -52,6 +64,7 @@
 
  private:
   ArtMethod* const method_;
+  const TaskKind kind_;
   jobject klass_;
 
   DISALLOW_IMPLICIT_CONSTRUCTORS(JitCompileTask);
@@ -85,14 +98,20 @@
   }
   uint16_t sample_count = method->IncrementCounter();
   if (sample_count == warm_method_threshold_) {
-    ProfilingInfo* info = method->CreateProfilingInfo();
-    if (info != nullptr) {
+    if (ProfilingInfo::Create(self, method, /* retry_allocation */ false)) {
       VLOG(jit) << "Start profiling " << PrettyMethod(method);
+    } else {
+      // We failed allocating. Instead of doing the collection on the Java thread, we push
+      // an allocation to a compiler thread, that will do the collection.
+      thread_pool_->AddTask(self, new JitCompileTask(
+          method->GetInterfaceMethodIfProxy(sizeof(void*)), JitCompileTask::kAllocateProfile));
+      thread_pool_->StartWorkers(self);
     }
   }
+
   if (sample_count == hot_method_threshold_) {
     thread_pool_->AddTask(self, new JitCompileTask(
-        method->GetInterfaceMethodIfProxy(sizeof(void*))));
+        method->GetInterfaceMethodIfProxy(sizeof(void*)), JitCompileTask::kCompile));
     thread_pool_->StartWorkers(self);
   }
 }
@@ -107,14 +126,17 @@
                                                           ArtMethod* caller,
                                                           uint32_t dex_pc,
                                                           ArtMethod* callee ATTRIBUTE_UNUSED) {
+  // We make sure we cannot be suspended, as the profiling info can be concurrently deleted.
+  thread->StartAssertNoThreadSuspension("Instrumenting invoke");
   DCHECK(this_object != nullptr);
   ProfilingInfo* info = caller->GetProfilingInfo(sizeof(void*));
   if (info != nullptr) {
     // Since the instrumentation is marked from the declaring class we need to mark the card so
     // that mod-union tables and card rescanning know about the update.
     Runtime::Current()->GetHeap()->WriteBarrierEveryFieldOf(caller->GetDeclaringClass());
-    info->AddInvokeInfo(thread, dex_pc, this_object->GetClass());
+    info->AddInvokeInfo(dex_pc, this_object->GetClass());
   }
+  thread->EndAssertNoThreadSuspension(nullptr);
 }
 
 void JitInstrumentationCache::WaitForCompilationToFinish(Thread* self) {
diff --git a/runtime/jit/profiling_info.cc b/runtime/jit/profiling_info.cc
index 7c5f78e..2e52b1b 100644
--- a/runtime/jit/profiling_info.cc
+++ b/runtime/jit/profiling_info.cc
@@ -25,7 +25,7 @@
 
 namespace art {
 
-ProfilingInfo* ProfilingInfo::Create(ArtMethod* method) {
+bool ProfilingInfo::Create(Thread* self, ArtMethod* method, bool retry_allocation) {
   // Walk over the dex instructions of the method and keep track of
   // instructions we are interested in profiling.
   DCHECK(!method->IsNative());
@@ -57,23 +57,15 @@
   // If there is no instruction we are interested in, no need to create a `ProfilingInfo`
   // object, it will never be filled.
   if (entries.empty()) {
-    return nullptr;
+    return true;
   }
 
   // Allocate the `ProfilingInfo` object int the JIT's data space.
   jit::JitCodeCache* code_cache = Runtime::Current()->GetJit()->GetCodeCache();
-  size_t profile_info_size = sizeof(ProfilingInfo) + sizeof(InlineCache) * entries.size();
-  uint8_t* data = code_cache->ReserveData(Thread::Current(), profile_info_size);
-
-  if (data == nullptr) {
-    VLOG(jit) << "Cannot allocate profiling info anymore";
-    return nullptr;
-  }
-
-  return new (data) ProfilingInfo(entries);
+  return code_cache->AddProfilingInfo(self, method, entries, retry_allocation) != nullptr;
 }
 
-void ProfilingInfo::AddInvokeInfo(Thread* self, uint32_t dex_pc, mirror::Class* cls) {
+void ProfilingInfo::AddInvokeInfo(uint32_t dex_pc, mirror::Class* cls) {
   InlineCache* cache = nullptr;
   // TODO: binary search if array is too long.
   for (size_t i = 0; i < number_of_inline_caches_; ++i) {
@@ -84,7 +76,6 @@
   }
   DCHECK(cache != nullptr);
 
-  ScopedObjectAccess soa(self);
   for (size_t i = 0; i < InlineCache::kIndividualCacheSize; ++i) {
     mirror::Class* existing = cache->classes_[i].Read();
     if (existing == cls) {
diff --git a/runtime/jit/profiling_info.h b/runtime/jit/profiling_info.h
index 7a2d1a8..b13a315 100644
--- a/runtime/jit/profiling_info.h
+++ b/runtime/jit/profiling_info.h
@@ -26,6 +26,10 @@
 
 class ArtMethod;
 
+namespace jit {
+class JitCodeCache;
+}
+
 namespace mirror {
 class Class;
 }
@@ -36,10 +40,17 @@
  */
 class ProfilingInfo {
  public:
-  static ProfilingInfo* Create(ArtMethod* method) SHARED_REQUIRES(Locks::mutator_lock_);
+  // Create a ProfilingInfo for 'method'. Return whether it succeeded, or if it is
+  // not needed in case the method does not have virtual/interface invocations.
+  static bool Create(Thread* self, ArtMethod* method, bool retry_allocation)
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Add information from an executed INVOKE instruction to the profile.
-  void AddInvokeInfo(Thread* self, uint32_t dex_pc, mirror::Class* cls);
+  void AddInvokeInfo(uint32_t dex_pc, mirror::Class* cls)
+      // Method should not be interruptible, as it manipulates the ProfilingInfo
+      // which can be concurrently collected.
+      REQUIRES(Roles::uninterruptible_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
   // NO_THREAD_SAFETY_ANALYSIS since we don't know what the callback requires.
   template<typename RootVisitorType>
@@ -52,6 +63,10 @@
     }
   }
 
+  ArtMethod* GetMethod() const {
+    return method_;
+  }
+
  private:
   // Structure to store the classes seen at runtime for a specific instruction.
   // Once the classes_ array is full, we consider the INVOKE to be megamorphic.
@@ -84,8 +99,9 @@
     GcRoot<mirror::Class> classes_[kIndividualCacheSize];
   };
 
-  explicit ProfilingInfo(const std::vector<uint32_t>& entries)
-      : number_of_inline_caches_(entries.size()) {
+  ProfilingInfo(ArtMethod* method, const std::vector<uint32_t>& entries)
+      : number_of_inline_caches_(entries.size()),
+        method_(method) {
     memset(&cache_, 0, number_of_inline_caches_ * sizeof(InlineCache));
     for (size_t i = 0; i < number_of_inline_caches_; ++i) {
       cache_[i].dex_pc = entries[i];
@@ -95,9 +111,14 @@
   // Number of instructions we are profiling in the ArtMethod.
   const uint32_t number_of_inline_caches_;
 
+  // Method this profiling info is for.
+  ArtMethod* const method_;
+
   // Dynamically allocated array of size `number_of_inline_caches_`.
   InlineCache cache_[0];
 
+  friend class jit::JitCodeCache;
+
   DISALLOW_COPY_AND_ASSIGN(ProfilingInfo);
 };
 
diff --git a/runtime/oat_quick_method_header.h b/runtime/oat_quick_method_header.h
index c9a2cfb..03cad08 100644
--- a/runtime/oat_quick_method_header.h
+++ b/runtime/oat_quick_method_header.h
@@ -43,6 +43,8 @@
   static OatQuickMethodHeader* FromCodePointer(const void* code_ptr) {
     uintptr_t code = reinterpret_cast<uintptr_t>(code_ptr);
     uintptr_t header = code - OFFSETOF_MEMBER(OatQuickMethodHeader, code_);
+    DCHECK(IsAlignedParam(code, GetInstructionSetAlignment(kRuntimeISA)) ||
+           IsAlignedParam(header, GetInstructionSetAlignment(kRuntimeISA)));
     return reinterpret_cast<OatQuickMethodHeader*>(header);
   }
 
diff --git a/tools/run-jdwp-tests.sh b/tools/run-jdwp-tests.sh
index 9aed271..80f0402 100755
--- a/tools/run-jdwp-tests.sh
+++ b/tools/run-jdwp-tests.sh
@@ -30,8 +30,6 @@
 
 art="/data/local/tmp/system/bin/art"
 art_debugee="sh /data/local/tmp/system/bin/art"
-# We use Quick's image on target because optimizing's image is not compiled debuggable.
-image="-Ximage:/data/art-test/core.art"
 args=$@
 debuggee_args="-Xcompiler-option --debuggable"
 device_dir="--device-dir=/data/local/tmp"
@@ -41,6 +39,8 @@
 image_compiler_option=""
 debug="no"
 verbose="no"
+image=""
+vm_args=""
 # By default, we run the whole JDWP test suite.
 test="org.apache.harmony.jpda.tests.share.AllTests"
 
@@ -88,7 +88,10 @@
   fi
 done
 
-vm_args="--vm-arg $image --vm-arg -Xusejit:true"
+if [[ "$image" != "" ]]; then
+  vm_args="--vm-arg $image"
+fi
+vm_args="$vm_args --vm-arg -Xusejit:true"
 debuggee_args="$debuggee_args -Xusejit:true"
 if [[ $debug == "yes" ]]; then
   art="$art -d"