jni: Support @FastNative methods that return objects

Bug: 32088975
Change-Id: I16f8b7ec6b251812af60ab25f2153d9b72f37044
diff --git a/runtime/asm_support.h b/runtime/asm_support.h
index 567791e..cd8815b 100644
--- a/runtime/asm_support.h
+++ b/runtime/asm_support.h
@@ -98,7 +98,7 @@
 ADD_TEST_EQ(THREAD_LOCAL_END_OFFSET,
             art::Thread::ThreadLocalEndOffset<POINTER_SIZE>().Int32Value())
 // Offset of field Thread::tlsPtr_.thread_local_objects.
-#define THREAD_LOCAL_OBJECTS_OFFSET (THREAD_LOCAL_END_OFFSET + __SIZEOF_POINTER__)
+#define THREAD_LOCAL_OBJECTS_OFFSET (THREAD_LOCAL_END_OFFSET + 2 * __SIZEOF_POINTER__)
 ADD_TEST_EQ(THREAD_LOCAL_OBJECTS_OFFSET,
             art::Thread::ThreadLocalObjectsOffset<POINTER_SIZE>().Int32Value())
 // Offset of field Thread::tlsPtr_.mterp_current_ibase.
diff --git a/runtime/entrypoints/quick/quick_default_init_entrypoints.h b/runtime/entrypoints/quick/quick_default_init_entrypoints.h
index 1ee1f81..df23f94 100644
--- a/runtime/entrypoints/quick/quick_default_init_entrypoints.h
+++ b/runtime/entrypoints/quick/quick_default_init_entrypoints.h
@@ -78,6 +78,7 @@
   qpoints->pJniMethodEnd = JniMethodEnd;
   qpoints->pJniMethodEndSynchronized = JniMethodEndSynchronized;
   qpoints->pJniMethodEndWithReference = JniMethodEndWithReference;
+  qpoints->pJniMethodFastEndWithReference = JniMethodFastEndWithReference;
   qpoints->pJniMethodEndWithReferenceSynchronized = JniMethodEndWithReferenceSynchronized;
   qpoints->pJniMethodFastEnd = JniMethodFastEnd;
   qpoints->pQuickGenericJniTrampoline = art_quick_generic_jni_trampoline;
diff --git a/runtime/entrypoints/quick/quick_entrypoints.h b/runtime/entrypoints/quick/quick_entrypoints.h
index 89712a3..915f18e 100644
--- a/runtime/entrypoints/quick/quick_entrypoints.h
+++ b/runtime/entrypoints/quick/quick_entrypoints.h
@@ -65,6 +65,11 @@
 extern mirror::Object* JniMethodEndWithReference(jobject result, uint32_t saved_local_ref_cookie,
                                                  Thread* self)
     NO_THREAD_SAFETY_ANALYSIS HOT_ATTR;
+extern mirror::Object* JniMethodFastEndWithReference(jobject result,
+                                                     uint32_t saved_local_ref_cookie,
+                                                     Thread* self)
+    NO_THREAD_SAFETY_ANALYSIS HOT_ATTR;
+
 
 extern mirror::Object* JniMethodEndWithReferenceSynchronized(jobject result,
                                                              uint32_t saved_local_ref_cookie,
diff --git a/runtime/entrypoints/quick/quick_entrypoints_list.h b/runtime/entrypoints/quick/quick_entrypoints_list.h
index e402919..3cfee45 100644
--- a/runtime/entrypoints/quick/quick_entrypoints_list.h
+++ b/runtime/entrypoints/quick/quick_entrypoints_list.h
@@ -78,6 +78,7 @@
   V(JniMethodFastEnd, void, uint32_t, Thread*) \
   V(JniMethodEndSynchronized, void, uint32_t, jobject, Thread*) \
   V(JniMethodEndWithReference, mirror::Object*, jobject, uint32_t, Thread*) \
+  V(JniMethodFastEndWithReference, mirror::Object*, jobject, uint32_t, Thread*) \
   V(JniMethodEndWithReferenceSynchronized, mirror::Object*, jobject, uint32_t, jobject, Thread*) \
   V(QuickGenericJniTrampoline, void, ArtMethod*) \
 \
diff --git a/runtime/entrypoints/quick/quick_jni_entrypoints.cc b/runtime/entrypoints/quick/quick_jni_entrypoints.cc
index b25f447..330c742 100644
--- a/runtime/entrypoints/quick/quick_jni_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_jni_entrypoints.cc
@@ -22,6 +22,9 @@
 
 namespace art {
 
+template <bool kDynamicFast>
+static inline void GoToRunnableFast(Thread* self) NO_THREAD_SAFETY_ANALYSIS;
+
 extern void ReadBarrierJni(mirror::CompressedReference<mirror::Object>* handle_on_stack,
                            Thread* self ATTRIBUTE_UNUSED) {
   DCHECK(kUseReadBarrier);
@@ -78,7 +81,28 @@
   bool is_fast = native_method->IsFastNative();
   if (!is_fast) {
     self->TransitionFromSuspendedToRunnable();
-  } else if (UNLIKELY(self->TestAllFlags())) {
+  } else {
+    GoToRunnableFast</*kDynamicFast*/true>(self);
+  }
+}
+
+// TODO: NO_THREAD_SAFETY_ANALYSIS due to different control paths depending on fast JNI.
+template <bool kDynamicFast>
+ALWAYS_INLINE static inline void GoToRunnableFast(Thread* self) NO_THREAD_SAFETY_ANALYSIS {
+  if (kIsDebugBuild) {
+    // Should only enter here if the method is !Fast JNI or @FastNative.
+    ArtMethod* native_method = *self->GetManagedStack()->GetTopQuickFrame();
+
+    if (kDynamicFast) {
+      CHECK(native_method->IsFastNative()) << native_method->PrettyMethod();
+    } else {
+      CHECK(native_method->IsAnnotatedWithFastNative()) << native_method->PrettyMethod();
+    }
+  }
+
+  // When we are in "fast" JNI or @FastNative, we are already Runnable.
+  // Only do a suspend check on the way out of JNI.
+  if (UNLIKELY(self->TestAllFlags())) {
     // In fast JNI mode we never transitioned out of runnable. Perform a suspend check if there
     // is a flag raised.
     DCHECK(Locks::mutator_lock_->IsSharedHeld(self));
@@ -106,20 +130,7 @@
 }
 
 extern void JniMethodFastEnd(uint32_t saved_local_ref_cookie, Thread* self) {
-  // inlined fast version of GoToRunnable(self);
-
-  if (kIsDebugBuild) {
-    ArtMethod* native_method = *self->GetManagedStack()->GetTopQuickFrame();
-    CHECK(native_method->IsAnnotatedWithFastNative()) << native_method->PrettyMethod();
-  }
-
-  if (UNLIKELY(self->TestAllFlags())) {
-    // In fast JNI mode we never transitioned out of runnable. Perform a suspend check if there
-    // is a flag raised.
-    DCHECK(Locks::mutator_lock_->IsSharedHeld(self));
-    self->CheckSuspend();
-  }
-
+  GoToRunnableFast</*kDynamicFast*/false>(self);
   PopLocalReferences(saved_local_ref_cookie, self);
 }
 
@@ -131,10 +142,6 @@
   PopLocalReferences(saved_local_ref_cookie, self);
 }
 
-// TODO: JniMethodFastEndWithReference
-// (Probably don't need to have a synchronized variant since
-// it already has to do atomic operations)
-
 // Common result handling for EndWithReference.
 static mirror::Object* JniMethodEndWithReferenceHandleResult(jobject result,
                                                              uint32_t saved_local_ref_cookie,
@@ -157,6 +164,13 @@
   return o.Ptr();
 }
 
+extern mirror::Object* JniMethodFastEndWithReference(jobject result,
+                                                     uint32_t saved_local_ref_cookie,
+                                                     Thread* self) {
+  GoToRunnableFast</*kDynamicFast*/false>(self);
+  return JniMethodEndWithReferenceHandleResult(result, saved_local_ref_cookie, self);
+}
+
 extern mirror::Object* JniMethodEndWithReference(jobject result,
                                                  uint32_t saved_local_ref_cookie,
                                                  Thread* self) {
diff --git a/runtime/entrypoints_order_test.cc b/runtime/entrypoints_order_test.cc
index 03254ab..cdb1051 100644
--- a/runtime/entrypoints_order_test.cc
+++ b/runtime/entrypoints_order_test.cc
@@ -122,9 +122,9 @@
 
     // Skip across the entrypoints structures.
 
-    EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_start, thread_local_pos, sizeof(void*));
     EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_pos, thread_local_end, sizeof(void*));
-    EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_end, thread_local_objects, sizeof(void*));
+    EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_end, thread_local_start, sizeof(void*));
+    EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_start, thread_local_objects, sizeof(void*));
     EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_objects, mterp_current_ibase, sizeof(size_t));
     EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, mterp_current_ibase, mterp_default_ibase, sizeof(void*));
     EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, mterp_default_ibase, mterp_alt_ibase, sizeof(void*));
@@ -223,6 +223,8 @@
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pJniMethodEndSynchronized, pJniMethodEndWithReference,
                          sizeof(void*));
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pJniMethodEndWithReference,
+                         pJniMethodFastEndWithReference, sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pJniMethodFastEndWithReference,
                          pJniMethodEndWithReferenceSynchronized, sizeof(void*));
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pJniMethodEndWithReferenceSynchronized,
                          pQuickGenericJniTrampoline, sizeof(void*));
diff --git a/runtime/oat.h b/runtime/oat.h
index 4d8687c..814a493 100644
--- a/runtime/oat.h
+++ b/runtime/oat.h
@@ -32,7 +32,7 @@
 class PACKED(4) OatHeader {
  public:
   static constexpr uint8_t kOatMagic[] = { 'o', 'a', 't', '\n' };
-  static constexpr uint8_t kOatVersion[] = { '0', '8', '9', '\0' };
+  static constexpr uint8_t kOatVersion[] = { '0', '9', '0', '\0' };
 
   static constexpr const char* kImageLocationKey = "image-location";
   static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline";
diff --git a/runtime/thread.h b/runtime/thread.h
index 6f5913e..24038f5 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -1392,7 +1392,7 @@
       stacked_shadow_frame_record(nullptr), deoptimization_context_stack(nullptr),
       frame_id_to_shadow_frame(nullptr), name(nullptr), pthread_self(0),
       last_no_thread_suspension_cause(nullptr), checkpoint_function(nullptr),
-      thread_local_start(nullptr), thread_local_pos(nullptr), thread_local_end(nullptr),
+      thread_local_pos(nullptr), thread_local_end(nullptr), thread_local_start(nullptr),
       thread_local_objects(0), mterp_current_ibase(nullptr), mterp_default_ibase(nullptr),
       mterp_alt_ibase(nullptr), thread_local_alloc_stack_top(nullptr),
       thread_local_alloc_stack_end(nullptr), nested_signal_state(nullptr),
@@ -1506,12 +1506,13 @@
     JniEntryPoints jni_entrypoints;
     QuickEntryPoints quick_entrypoints;
 
-    // Thread-local allocation pointer.
-    uint8_t* thread_local_start;
     // thread_local_pos and thread_local_end must be consecutive for ldrd and are 8 byte aligned for
     // potentially better performance.
     uint8_t* thread_local_pos;
     uint8_t* thread_local_end;
+    // Thread-local allocation pointer.
+    uint8_t* thread_local_start;
+
     size_t thread_local_objects;
 
     // Mterp jump table bases.