Merge "Fix some typos in ART."
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 6c2934f..f5b6ebe 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -7171,8 +7171,7 @@
   // save one load. However, since this is just an intrinsic slow path we prefer this
   // simple and more robust approach rather that trying to determine if that's the case.
   SlowPathCode* slow_path = GetCurrentSlowPath();
-  DCHECK(slow_path != nullptr);  // For intrinsified invokes the call is emitted on the slow path.
-  if (slow_path->IsCoreRegisterSaved(location.AsRegister<Register>())) {
+  if (slow_path != nullptr && slow_path->IsCoreRegisterSaved(location.AsRegister<Register>())) {
     int stack_offset = slow_path->GetStackOffsetOfCoreRegister(location.AsRegister<Register>());
     __ LoadFromOffset(kLoadWord, temp, SP, stack_offset);
     return temp;
@@ -7180,7 +7179,8 @@
   return location.AsRegister<Register>();
 }
 
-void CodeGeneratorARM::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) {
+Location CodeGeneratorARM::GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticOrDirect* invoke,
+                                                                  Location temp) {
   Location callee_method = temp;  // For all kinds except kRecursive, callee will be in temp.
   switch (invoke->GetMethodLoadKind()) {
     case HInvokeStaticOrDirect::MethodLoadKind::kStringInit: {
@@ -7229,6 +7229,11 @@
       break;
     }
   }
+  return callee_method;
+}
+
+void CodeGeneratorARM::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) {
+  Location callee_method = GenerateCalleeMethodStaticOrDirectCall(invoke, temp);
 
   switch (invoke->GetCodePtrLocation()) {
     case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf:
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index 52d1857..df2dbc7 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -456,6 +456,7 @@
       const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
       HInvokeStaticOrDirect* invoke) OVERRIDE;
 
+  Location GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp);
   void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) OVERRIDE;
   void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) OVERRIDE;
 
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 8bd18ee..9762ee8 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -3993,7 +3993,8 @@
   return desired_dispatch_info;
 }
 
-void CodeGeneratorARM64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) {
+Location CodeGeneratorARM64::GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticOrDirect* invoke,
+                                                                    Location temp) {
   // Make sure that ArtMethod* is passed in kArtMethodRegister as per the calling convention.
   Location callee_method = temp;  // For all kinds except kRecursive, callee will be in temp.
   switch (invoke->GetMethodLoadKind()) {
@@ -4047,6 +4048,12 @@
       break;
     }
   }
+  return callee_method;
+}
+
+void CodeGeneratorARM64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) {
+  // All registers are assumed to be correctly set up.
+  Location callee_method = GenerateCalleeMethodStaticOrDirectCall(invoke, temp);
 
   switch (invoke->GetCodePtrLocation()) {
     case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf:
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index a9dca92..7d3c655 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -527,6 +527,7 @@
       const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
       HInvokeStaticOrDirect* invoke) OVERRIDE;
 
+  Location GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp);
   void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) OVERRIDE;
   void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) OVERRIDE;
 
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index 64bcd62..ffaf18f 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -7262,7 +7262,7 @@
   return RegisterFrom(location);
 }
 
-void CodeGeneratorARMVIXL::GenerateStaticOrDirectCall(
+Location CodeGeneratorARMVIXL::GenerateCalleeMethodStaticOrDirectCall(
     HInvokeStaticOrDirect* invoke, Location temp) {
   Location callee_method = temp;  // For all kinds except kRecursive, callee will be in temp.
   switch (invoke->GetMethodLoadKind()) {
@@ -7313,6 +7313,12 @@
       break;
     }
   }
+  return callee_method;
+}
+
+void CodeGeneratorARMVIXL::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke,
+                                                      Location temp) {
+  Location callee_method = GenerateCalleeMethodStaticOrDirectCall(invoke, temp);
 
   switch (invoke->GetCodePtrLocation()) {
     case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf:
diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h
index be65353..8ae3b7d 100644
--- a/compiler/optimizing/code_generator_arm_vixl.h
+++ b/compiler/optimizing/code_generator_arm_vixl.h
@@ -537,6 +537,7 @@
       const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
       HInvokeStaticOrDirect* invoke) OVERRIDE;
 
+  Location GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp);
   void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) OVERRIDE;
   void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) OVERRIDE;
 
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
index 8f64fae..c262cf9 100644
--- a/compiler/optimizing/intrinsics_arm.cc
+++ b/compiler/optimizing/intrinsics_arm.cc
@@ -2592,6 +2592,58 @@
   __ Lsr(out, out, 5);
 }
 
+void IntrinsicLocationsBuilderARM::VisitReferenceGetReferent(HInvoke* invoke) {
+  if (kEmitCompilerReadBarrier) {
+    // Do not intrinsify this call with the read barrier configuration.
+    return;
+  }
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kCallOnSlowPath,
+                                                            kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::SameAsFirstInput());
+  locations->AddTemp(Location::RequiresRegister());
+}
+
+void IntrinsicCodeGeneratorARM::VisitReferenceGetReferent(HInvoke* invoke) {
+  DCHECK(!kEmitCompilerReadBarrier);
+  ArmAssembler* const assembler = GetAssembler();
+  LocationSummary* locations = invoke->GetLocations();
+
+  Register obj = locations->InAt(0).AsRegister<Register>();
+  Register out = locations->Out().AsRegister<Register>();
+
+  SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathARM(invoke);
+  codegen_->AddSlowPath(slow_path);
+
+  // Load ArtMethod first.
+  HInvokeStaticOrDirect* invoke_direct = invoke->AsInvokeStaticOrDirect();
+  DCHECK(invoke_direct != nullptr);
+  Register temp = codegen_->GenerateCalleeMethodStaticOrDirectCall(
+      invoke_direct, locations->GetTemp(0)).AsRegister<Register>();
+
+  // Now get declaring class.
+  __ ldr(temp, Address(temp, ArtMethod::DeclaringClassOffset().Int32Value()));
+
+  uint32_t slow_path_flag_offset = codegen_->GetReferenceSlowFlagOffset();
+  uint32_t disable_flag_offset = codegen_->GetReferenceDisableFlagOffset();
+  DCHECK_NE(slow_path_flag_offset, 0u);
+  DCHECK_NE(disable_flag_offset, 0u);
+  DCHECK_NE(slow_path_flag_offset, disable_flag_offset);
+
+  // Check static flags that prevent using intrinsic.
+  __ ldr(IP, Address(temp, disable_flag_offset));
+  __ ldr(temp, Address(temp, slow_path_flag_offset));
+  __ orr(IP, IP, ShifterOperand(temp));
+  __ CompareAndBranchIfNonZero(IP, slow_path->GetEntryLabel());
+
+  // Fast path.
+  __ ldr(out, Address(obj, mirror::Reference::ReferentOffset().Int32Value()));
+  codegen_->MaybeRecordImplicitNullCheck(invoke);
+  __ MaybeUnpoisonHeapReference(out);
+  __ Bind(slow_path->GetExitLabel());
+}
+
 UNIMPLEMENTED_INTRINSIC(ARM, MathMinDoubleDouble)
 UNIMPLEMENTED_INTRINSIC(ARM, MathMinFloatFloat)
 UNIMPLEMENTED_INTRINSIC(ARM, MathMaxDoubleDouble)
@@ -2605,7 +2657,6 @@
 UNIMPLEMENTED_INTRINSIC(ARM, MathRoundFloat)    // Could be done by changing rounding mode, maybe?
 UNIMPLEMENTED_INTRINSIC(ARM, UnsafeCASLong)     // High register pressure.
 UNIMPLEMENTED_INTRINSIC(ARM, SystemArrayCopyChar)
-UNIMPLEMENTED_INTRINSIC(ARM, ReferenceGetReferent)
 UNIMPLEMENTED_INTRINSIC(ARM, IntegerHighestOneBit)
 UNIMPLEMENTED_INTRINSIC(ARM, LongHighestOneBit)
 UNIMPLEMENTED_INTRINSIC(ARM, IntegerLowestOneBit)
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index d8a896e..bbf826c 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -2773,7 +2773,65 @@
   GenIsInfinite(invoke->GetLocations(), /* is64bit */ true, GetVIXLAssembler());
 }
 
-UNIMPLEMENTED_INTRINSIC(ARM64, ReferenceGetReferent)
+void IntrinsicLocationsBuilderARM64::VisitReferenceGetReferent(HInvoke* invoke) {
+  if (kEmitCompilerReadBarrier) {
+    // Do not intrinsify this call with the read barrier configuration.
+    return;
+  }
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kCallOnSlowPath,
+                                                            kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::SameAsFirstInput());
+  locations->AddTemp(Location::RequiresRegister());
+}
+
+void IntrinsicCodeGeneratorARM64::VisitReferenceGetReferent(HInvoke* invoke) {
+  DCHECK(!kEmitCompilerReadBarrier);
+  MacroAssembler* masm = GetVIXLAssembler();
+  LocationSummary* locations = invoke->GetLocations();
+
+  Register obj = InputRegisterAt(invoke, 0);
+  Register out = OutputRegister(invoke);
+
+  SlowPathCodeARM64* slow_path = new (GetAllocator()) IntrinsicSlowPathARM64(invoke);
+  codegen_->AddSlowPath(slow_path);
+
+  // Load ArtMethod first.
+  HInvokeStaticOrDirect* invoke_direct = invoke->AsInvokeStaticOrDirect();
+  DCHECK(invoke_direct != nullptr);
+  Register temp0 = XRegisterFrom(codegen_->GenerateCalleeMethodStaticOrDirectCall(
+                                 invoke_direct, locations->GetTemp(0)));
+
+  // Now get declaring class.
+  __ Ldr(temp0.W(), MemOperand(temp0, ArtMethod::DeclaringClassOffset().Int32Value()));
+
+  uint32_t slow_path_flag_offset = codegen_->GetReferenceSlowFlagOffset();
+  uint32_t disable_flag_offset = codegen_->GetReferenceDisableFlagOffset();
+  DCHECK_NE(slow_path_flag_offset, 0u);
+  DCHECK_NE(disable_flag_offset, 0u);
+  DCHECK_NE(slow_path_flag_offset, disable_flag_offset);
+
+  // Check static flags that prevent using intrinsic.
+  if (slow_path_flag_offset == disable_flag_offset + 1) {
+    // Load two adjacent flags in one 64-bit load.
+    __ Ldr(temp0, MemOperand(temp0, disable_flag_offset));
+  } else {
+    UseScratchRegisterScope temps(masm);
+    Register temp1 = temps.AcquireW();
+    __ Ldr(temp1.W(), MemOperand(temp0, disable_flag_offset));
+    __ Ldr(temp0.W(), MemOperand(temp0, slow_path_flag_offset));
+    __ Orr(temp0, temp1, temp0);
+  }
+  __ Cbnz(temp0, slow_path->GetEntryLabel());
+
+  // Fast path.
+  __ Ldr(out, HeapOperand(obj, mirror::Reference::ReferentOffset().Int32Value()));
+  codegen_->MaybeRecordImplicitNullCheck(invoke);
+  codegen_->GetAssembler()->MaybeUnpoisonHeapReference(out);
+  __ Bind(slow_path->GetExitLabel());
+}
+
 UNIMPLEMENTED_INTRINSIC(ARM64, IntegerHighestOneBit)
 UNIMPLEMENTED_INTRINSIC(ARM64, LongHighestOneBit)
 UNIMPLEMENTED_INTRINSIC(ARM64, IntegerLowestOneBit)
diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc
index 85e84d8..68c2d2e 100644
--- a/compiler/optimizing/intrinsics_arm_vixl.cc
+++ b/compiler/optimizing/intrinsics_arm_vixl.cc
@@ -2688,6 +2688,60 @@
   __ Lsr(out, out, 5);
 }
 
+void IntrinsicLocationsBuilderARMVIXL::VisitReferenceGetReferent(HInvoke* invoke) {
+  if (kEmitCompilerReadBarrier) {
+    // Do not intrinsify this call with the read barrier configuration.
+    return;
+  }
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kCallOnSlowPath,
+                                                            kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::SameAsFirstInput());
+  locations->AddTemp(Location::RequiresRegister());
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitReferenceGetReferent(HInvoke* invoke) {
+  DCHECK(!kEmitCompilerReadBarrier);
+  ArmVIXLAssembler* assembler = GetAssembler();
+  LocationSummary* locations = invoke->GetLocations();
+
+  vixl32::Register obj = InputRegisterAt(invoke, 0);
+  vixl32::Register out = OutputRegister(invoke);
+
+  SlowPathCodeARMVIXL* slow_path = new (GetAllocator()) IntrinsicSlowPathARMVIXL(invoke);
+  codegen_->AddSlowPath(slow_path);
+
+  // Load ArtMethod first.
+  HInvokeStaticOrDirect* invoke_direct = invoke->AsInvokeStaticOrDirect();
+  DCHECK(invoke_direct != nullptr);
+  vixl32::Register temp0 = RegisterFrom(codegen_->GenerateCalleeMethodStaticOrDirectCall(
+      invoke_direct, locations->GetTemp(0)));
+
+  // Now get declaring class.
+  __ Ldr(temp0, MemOperand(temp0, ArtMethod::DeclaringClassOffset().Int32Value()));
+
+  uint32_t slow_path_flag_offset = codegen_->GetReferenceSlowFlagOffset();
+  uint32_t disable_flag_offset = codegen_->GetReferenceDisableFlagOffset();
+  DCHECK_NE(slow_path_flag_offset, 0u);
+  DCHECK_NE(disable_flag_offset, 0u);
+  DCHECK_NE(slow_path_flag_offset, disable_flag_offset);
+
+  // Check static flags that prevent using intrinsic.
+  UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
+  vixl32::Register temp1 = temps.Acquire();
+  __ Ldr(temp1, MemOperand(temp0, disable_flag_offset));
+  __ Ldr(temp0, MemOperand(temp0, slow_path_flag_offset));
+  __ Orr(temp0, temp1, temp0);
+  __ CompareAndBranchIfNonZero(temp0, slow_path->GetEntryLabel());
+
+  // Fast path.
+  __ Ldr(out, MemOperand(obj, mirror::Reference::ReferentOffset().Int32Value()));
+  codegen_->MaybeRecordImplicitNullCheck(invoke);
+  assembler->MaybeUnpoisonHeapReference(out);
+  __ Bind(slow_path->GetExitLabel());
+}
+
 UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathMinDoubleDouble)
 UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathMinFloatFloat)
 UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathMaxDoubleDouble)
@@ -2701,7 +2755,6 @@
 UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathRoundFloat)    // Could be done by changing rounding mode, maybe?
 UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeCASLong)     // High register pressure.
 UNIMPLEMENTED_INTRINSIC(ARMVIXL, SystemArrayCopyChar)
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, ReferenceGetReferent)
 UNIMPLEMENTED_INTRINSIC(ARMVIXL, IntegerHighestOneBit)
 UNIMPLEMENTED_INTRINSIC(ARMVIXL, LongHighestOneBit)
 UNIMPLEMENTED_INTRINSIC(ARMVIXL, IntegerLowestOneBit)
diff --git a/runtime/openjdkjvmti/OpenjdkJvmTi.cc b/runtime/openjdkjvmti/OpenjdkJvmTi.cc
index fcedd4e..be10378 100644
--- a/runtime/openjdkjvmti/OpenjdkJvmTi.cc
+++ b/runtime/openjdkjvmti/OpenjdkJvmTi.cc
@@ -1309,6 +1309,7 @@
     PhaseUtil::SetToOnLoad();
   }
   PhaseUtil::Register(&gEventHandler);
+  ThreadUtil::Register(&gEventHandler);
 
   runtime->GetJavaVM()->AddEnvironmentHook(GetEnvHandler);
   runtime->AddSystemWeakHolder(&gObjectTagTable);
@@ -1316,6 +1317,13 @@
   return true;
 }
 
+extern "C" bool ArtPlugin_Deinitialize() {
+  PhaseUtil::Unregister();
+  ThreadUtil::Unregister();
+
+  return true;
+}
+
 // The actual struct holding all of the entrypoints into the jvmti interface.
 const jvmtiInterface_1 gJvmtiInterface = {
   nullptr,  // reserved1
diff --git a/runtime/openjdkjvmti/ti_phase.cc b/runtime/openjdkjvmti/ti_phase.cc
index 85d6b72..62c3ebe 100644
--- a/runtime/openjdkjvmti/ti_phase.cc
+++ b/runtime/openjdkjvmti/ti_phase.cc
@@ -125,5 +125,11 @@
   art::Runtime::Current()->GetRuntimeCallbacks()->AddRuntimePhaseCallback(&gPhaseCallback);
 }
 
+void PhaseUtil::Unregister() {
+  art::ScopedThreadStateChange stsc(art::Thread::Current(),
+                                    art::ThreadState::kWaitingForDebuggerToAttach);
+  art::ScopedSuspendAll ssa("Remove phase callback");
+  art::Runtime::Current()->GetRuntimeCallbacks()->RemoveRuntimePhaseCallback(&gPhaseCallback);
+}
 
 }  // namespace openjdkjvmti
diff --git a/runtime/openjdkjvmti/ti_phase.h b/runtime/openjdkjvmti/ti_phase.h
index 054652a..bd15fa6 100644
--- a/runtime/openjdkjvmti/ti_phase.h
+++ b/runtime/openjdkjvmti/ti_phase.h
@@ -44,6 +44,7 @@
   static jvmtiError GetPhase(jvmtiEnv* env, jvmtiPhase* phase_ptr);
 
   static void Register(EventHandler* event_handler);
+  static void Unregister();
 
   // Move the phase from unitialized to LOAD.
   static void SetToOnLoad();
diff --git a/runtime/openjdkjvmti/ti_thread.cc b/runtime/openjdkjvmti/ti_thread.cc
index 970cc24..bf79570 100644
--- a/runtime/openjdkjvmti/ti_thread.cc
+++ b/runtime/openjdkjvmti/ti_thread.cc
@@ -31,10 +31,12 @@
 
 #include "ti_thread.h"
 
+#include "android-base/strings.h"
 #include "art_field.h"
 #include "art_jvmti.h"
 #include "base/logging.h"
 #include "base/mutex.h"
+#include "events-inl.h"
 #include "gc/system_weak.h"
 #include "gc_root-inl.h"
 #include "jni_internal.h"
@@ -43,6 +45,8 @@
 #include "mirror/string.h"
 #include "obj_ptr.h"
 #include "runtime.h"
+#include "runtime_callbacks.h"
+#include "ScopedLocalRef.h"
 #include "scoped_thread_state_change-inl.h"
 #include "thread-inl.h"
 #include "thread_list.h"
@@ -50,6 +54,76 @@
 
 namespace openjdkjvmti {
 
+struct ThreadCallback : public art::ThreadLifecycleCallback, public art::RuntimePhaseCallback {
+  jthread GetThreadObject(art::Thread* self) REQUIRES_SHARED(art::Locks::mutator_lock_) {
+    if (self->GetPeer() == nullptr) {
+      return nullptr;
+    }
+    return self->GetJniEnv()->AddLocalReference<jthread>(self->GetPeer());
+  }
+  void Post(art::Thread* self, ArtJvmtiEvent type) REQUIRES_SHARED(art::Locks::mutator_lock_) {
+    DCHECK_EQ(self, art::Thread::Current());
+    ScopedLocalRef<jthread> thread(self->GetJniEnv(), GetThreadObject(self));
+    event_handler->DispatchEvent(self, type, self->GetJniEnv(), thread.get());
+  }
+
+  void ThreadStart(art::Thread* self) OVERRIDE REQUIRES_SHARED(art::Locks::mutator_lock_) {
+    if (!started) {
+      // Runtime isn't started. We only expect at most the signal handler or JIT threads to be
+      // started here.
+      if (art::kIsDebugBuild) {
+        std::string name;
+        self->GetThreadName(name);
+        if (name != "Signal Catcher" && !android::base::StartsWith(name, "Jit thread pool")) {
+          LOG(FATAL) << "Unexpected thread before start: " << name;
+        }
+      }
+      return;
+    }
+    Post(self, ArtJvmtiEvent::kThreadStart);
+  }
+
+  void ThreadDeath(art::Thread* self) OVERRIDE REQUIRES_SHARED(art::Locks::mutator_lock_) {
+    Post(self, ArtJvmtiEvent::kThreadEnd);
+  }
+
+  void NextRuntimePhase(RuntimePhase phase) OVERRIDE REQUIRES_SHARED(art::Locks::mutator_lock_) {
+    if (phase == RuntimePhase::kInit) {
+      // We moved to VMInit. Report the main thread as started (it was attached early, and must
+      // not be reported until Init.
+      started = true;
+      Post(art::Thread::Current(), ArtJvmtiEvent::kThreadStart);
+    }
+  }
+
+  EventHandler* event_handler = nullptr;
+  bool started = false;
+};
+
+ThreadCallback gThreadCallback;
+
+void ThreadUtil::Register(EventHandler* handler) {
+  art::Runtime* runtime = art::Runtime::Current();
+
+  gThreadCallback.started = runtime->IsStarted();
+  gThreadCallback.event_handler = handler;
+
+  art::ScopedThreadStateChange stsc(art::Thread::Current(),
+                                    art::ThreadState::kWaitingForDebuggerToAttach);
+  art::ScopedSuspendAll ssa("Add thread callback");
+  runtime->GetRuntimeCallbacks()->AddThreadLifecycleCallback(&gThreadCallback);
+  runtime->GetRuntimeCallbacks()->AddRuntimePhaseCallback(&gThreadCallback);
+}
+
+void ThreadUtil::Unregister() {
+  art::ScopedThreadStateChange stsc(art::Thread::Current(),
+                                    art::ThreadState::kWaitingForDebuggerToAttach);
+  art::ScopedSuspendAll ssa("Remove thread callback");
+  art::Runtime* runtime = art::Runtime::Current();
+  runtime->GetRuntimeCallbacks()->RemoveThreadLifecycleCallback(&gThreadCallback);
+  runtime->GetRuntimeCallbacks()->RemoveRuntimePhaseCallback(&gThreadCallback);
+}
+
 jvmtiError ThreadUtil::GetCurrentThread(jvmtiEnv* env ATTRIBUTE_UNUSED, jthread* thread_ptr) {
   art::Thread* self = art::Thread::Current();
 
diff --git a/runtime/openjdkjvmti/ti_thread.h b/runtime/openjdkjvmti/ti_thread.h
index 5aaec58..f6f93ee 100644
--- a/runtime/openjdkjvmti/ti_thread.h
+++ b/runtime/openjdkjvmti/ti_thread.h
@@ -37,8 +37,13 @@
 
 namespace openjdkjvmti {
 
+class EventHandler;
+
 class ThreadUtil {
  public:
+  static void Register(EventHandler* event_handler);
+  static void Unregister();
+
   static jvmtiError GetAllThreads(jvmtiEnv* env, jint* threads_count_ptr, jthread** threads_ptr);
 
   static jvmtiError GetCurrentThread(jvmtiEnv* env, jthread* thread_ptr);
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 4a32abe..06cd7ff 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -735,6 +735,13 @@
                             GetInstructionSetString(kRuntimeISA));
   }
 
+  // Send the initialized phase event. Send it before starting daemons, as otherwise
+  // sending thread events becomes complicated.
+  {
+    ScopedObjectAccess soa(self);
+    callbacks_->NextRuntimePhase(RuntimePhaseCallback::RuntimePhase::kInit);
+  }
+
   StartDaemonThreads();
 
   {
@@ -756,12 +763,6 @@
                  0);
   }
 
-  // Send the initialized phase event.
-  {
-    ScopedObjectAccess soa(self);
-    callbacks_->NextRuntimePhase(RuntimePhaseCallback::RuntimePhase::kInit);
-  }
-
   return true;
 }
 
diff --git a/test/924-threads/expected.txt b/test/924-threads/expected.txt
index 3b7fb24..67d20eb 100644
--- a/test/924-threads/expected.txt
+++ b/test/924-threads/expected.txt
@@ -31,3 +31,7 @@
 [Thread[FinalizerDaemon,5,system], Thread[FinalizerWatchdogDaemon,5,system], Thread[HeapTaskDaemon,5,system], Thread[ReferenceQueueDaemon,5,system], Thread[Signal Catcher,5,system], Thread[main,5,main]]
 JVMTI_ERROR_THREAD_NOT_ALIVE
 JVMTI_ERROR_THREAD_NOT_ALIVE
+Constructed thread
+Thread(EventTestThread): start
+Thread(EventTestThread): end
+Thread joined
diff --git a/test/924-threads/src/Main.java b/test/924-threads/src/Main.java
index 58695f7..29c4aa3 100644
--- a/test/924-threads/src/Main.java
+++ b/test/924-threads/src/Main.java
@@ -56,6 +56,8 @@
     doAllThreadsTests();
 
     doTLSTests();
+
+    doTestEvents();
   }
 
   private static class Holder {
@@ -226,6 +228,22 @@
     }
   }
 
+  private static void doTestEvents() throws Exception {
+    enableThreadEvents(true);
+
+    Thread t = new Thread("EventTestThread");
+
+    System.out.println("Constructed thread");
+    Thread.yield();
+
+    t.start();
+    t.join();
+
+    System.out.println("Thread joined");
+
+    enableThreadEvents(false);
+  }
+
   private final static Comparator<Thread> THREAD_COMP = new Comparator<Thread>() {
     public int compare(Thread o1, Thread o2) {
       return o1.getName().compareTo(o2.getName());
@@ -293,4 +311,5 @@
   private static native Thread[] getAllThreads();
   private static native void setTLS(Thread t, long l);
   private static native long getTLS(Thread t);
+  private static native void enableThreadEvents(boolean b);
 }
diff --git a/test/924-threads/threads.cc b/test/924-threads/threads.cc
index d35eaa8..0380433 100644
--- a/test/924-threads/threads.cc
+++ b/test/924-threads/threads.cc
@@ -137,5 +137,71 @@
   JvmtiErrorToException(env, result);
 }
 
+static void JNICALL ThreadEvent(jvmtiEnv* jvmti_env,
+                                JNIEnv* jni_env,
+                                jthread thread,
+                                bool is_start) {
+  jvmtiThreadInfo info;
+  jvmtiError result = jvmti_env->GetThreadInfo(thread, &info);
+  if (result != JVMTI_ERROR_NONE) {
+    printf("Error getting thread info");
+    return;
+  }
+  printf("Thread(%s): %s\n", info.name, is_start ? "start" : "end");
+
+  jvmti_env->Deallocate(reinterpret_cast<unsigned char*>(info.name));
+  jni_env->DeleteLocalRef(info.thread_group);
+  jni_env->DeleteLocalRef(info.context_class_loader);
+}
+
+static void JNICALL ThreadStart(jvmtiEnv* jvmti_env,
+                                JNIEnv* jni_env,
+                                jthread thread) {
+  ThreadEvent(jvmti_env, jni_env, thread, true);
+}
+
+static void JNICALL ThreadEnd(jvmtiEnv* jvmti_env,
+                              JNIEnv* jni_env,
+                              jthread thread) {
+  ThreadEvent(jvmti_env, jni_env, thread, false);
+}
+
+extern "C" JNIEXPORT void JNICALL Java_Main_enableThreadEvents(
+    JNIEnv* env, jclass Main_klass ATTRIBUTE_UNUSED, jboolean b) {
+  if (b == JNI_FALSE) {
+    jvmtiError ret = jvmti_env->SetEventNotificationMode(JVMTI_DISABLE,
+                                                         JVMTI_EVENT_THREAD_START,
+                                                         nullptr);
+    if (JvmtiErrorToException(env, ret)) {
+      return;
+    }
+    ret = jvmti_env->SetEventNotificationMode(JVMTI_DISABLE,
+                                              JVMTI_EVENT_THREAD_END,
+                                              nullptr);
+    JvmtiErrorToException(env, ret);
+    return;
+  }
+
+  jvmtiEventCallbacks callbacks;
+  memset(&callbacks, 0, sizeof(jvmtiEventCallbacks));
+  callbacks.ThreadStart = ThreadStart;
+  callbacks.ThreadEnd = ThreadEnd;
+  jvmtiError ret = jvmti_env->SetEventCallbacks(&callbacks, sizeof(callbacks));
+  if (JvmtiErrorToException(env, ret)) {
+    return;
+  }
+
+  ret = jvmti_env->SetEventNotificationMode(JVMTI_ENABLE,
+                                            JVMTI_EVENT_THREAD_START,
+                                            nullptr);
+  if (JvmtiErrorToException(env, ret)) {
+    return;
+  }
+  ret = jvmti_env->SetEventNotificationMode(JVMTI_ENABLE,
+                                            JVMTI_EVENT_THREAD_END,
+                                            nullptr);
+  JvmtiErrorToException(env, ret);
+}
+
 }  // namespace Test924Threads
 }  // namespace art