jni: Fast path for @FastNative annotated java methods
Adds a faster path for java methods annotated with
dalvik.annotation.optimization.FastNative .
Intended to replace usage of fast JNI (registering with "!(FOO)BAR" descriptors).
Performance Microbenchmark Results (Angler):
* Regular JNI cost in nanoseconds: 115
* Fast JNI cost in nanoseconds: 60
* @FastNative cost in nanoseconds: 36
Summary: Up to 67% faster (vs fast jni) JNI transition cost
Change-Id: Ic23823ae0f232270c068ec999fd89aa993894b0e
diff --git a/runtime/art_method.cc b/runtime/art_method.cc
index 60975d4..d812590 100644
--- a/runtime/art_method.cc
+++ b/runtime/art_method.cc
@@ -334,6 +334,23 @@
return GetDeclaringClass()->IsInterface();
}
+bool ArtMethod::IsAnnotatedWithFastNative() {
+ Thread* self = Thread::Current();
+ ScopedObjectAccess soa(self);
+ StackHandleScope<1> shs(self);
+
+ const DexFile& dex_file = GetDeclaringClass()->GetDexFile();
+
+ mirror::Class* fast_native_annotation =
+ soa.Decode<mirror::Class*>(WellKnownClasses::dalvik_annotation_optimization_FastNative);
+ Handle<mirror::Class> fast_native_handle(shs.NewHandle(fast_native_annotation));
+
+ // Note: Resolves any method annotations' classes as a side-effect.
+ // -- This seems allowed by the spec since it says we can preload any classes
+ // referenced by another classes's constant pool table.
+ return dex_file.IsMethodAnnotationPresent(this, fast_native_handle, DexFile::kDexVisibilityBuild);
+}
+
bool ArtMethod::EqualParameters(Handle<mirror::ObjectArray<mirror::Class>> params) {
auto* dex_cache = GetDexCache();
auto* dex_file = dex_cache->GetDexFile();
diff --git a/runtime/art_method.h b/runtime/art_method.h
index acf06fd..a90ef23 100644
--- a/runtime/art_method.h
+++ b/runtime/art_method.h
@@ -375,6 +375,10 @@
return (GetAccessFlags() & kAccMustCountLocks) != 0;
}
+ // Checks to see if the method was annotated with @dalvik.annotation.optimization.FastNative
+ // -- Independent of kAccFastNative access flags.
+ bool IsAnnotatedWithFastNative();
+
// Returns true if this method could be overridden by a default method.
bool IsOverridableByDefaultMethod() SHARED_REQUIRES(Locks::mutator_lock_);
diff --git a/runtime/asm_support.h b/runtime/asm_support.h
index d4cee44..e318f56 100644
--- a/runtime/asm_support.h
+++ b/runtime/asm_support.h
@@ -87,7 +87,7 @@
art::Thread::SelfOffset<POINTER_SIZE>().Int32Value())
// Offset of field Thread::tlsPtr_.thread_local_objects.
-#define THREAD_LOCAL_OBJECTS_OFFSET (THREAD_CARD_TABLE_OFFSET + 197 * __SIZEOF_POINTER__)
+#define THREAD_LOCAL_OBJECTS_OFFSET (THREAD_CARD_TABLE_OFFSET + 199 * __SIZEOF_POINTER__)
ADD_TEST_EQ(THREAD_LOCAL_OBJECTS_OFFSET,
art::Thread::ThreadLocalObjectsOffset<POINTER_SIZE>().Int32Value())
// Offset of field Thread::tlsPtr_.thread_local_pos.
diff --git a/runtime/dex_file.cc b/runtime/dex_file.cc
index a6eb5f6..90c678c 100644
--- a/runtime/dex_file.cc
+++ b/runtime/dex_file.cc
@@ -1403,7 +1403,9 @@
return GetSignatureValue(method_class, annotation_set);
}
-bool DexFile::IsMethodAnnotationPresent(ArtMethod* method, Handle<mirror::Class> annotation_class)
+bool DexFile::IsMethodAnnotationPresent(ArtMethod* method,
+ Handle<mirror::Class> annotation_class,
+ uint32_t visibility /* = kDexVisibilityRuntime */)
const {
const AnnotationSetItem* annotation_set = FindAnnotationSetForMethod(method);
if (annotation_set == nullptr) {
@@ -1411,8 +1413,10 @@
}
StackHandleScope<1> hs(Thread::Current());
Handle<mirror::Class> method_class(hs.NewHandle(method->GetDeclaringClass()));
- const AnnotationItem* annotation_item = GetAnnotationItemFromAnnotationSet(
- method_class, annotation_set, kDexVisibilityRuntime, annotation_class);
+ const AnnotationItem* annotation_item = GetAnnotationItemFromAnnotationSet(method_class,
+ annotation_set,
+ visibility,
+ annotation_class);
return annotation_item != nullptr;
}
diff --git a/runtime/dex_file.h b/runtime/dex_file.h
index 2eca495..59339ef 100644
--- a/runtime/dex_file.h
+++ b/runtime/dex_file.h
@@ -960,7 +960,9 @@
SHARED_REQUIRES(Locks::mutator_lock_);
mirror::ObjectArray<mirror::String>* GetSignatureAnnotationForMethod(ArtMethod* method) const
SHARED_REQUIRES(Locks::mutator_lock_);
- bool IsMethodAnnotationPresent(ArtMethod* method, Handle<mirror::Class> annotation_class) const
+ bool IsMethodAnnotationPresent(ArtMethod* method,
+ Handle<mirror::Class> annotation_class,
+ uint32_t visibility = kDexVisibilityRuntime) const
SHARED_REQUIRES(Locks::mutator_lock_);
const AnnotationSetItem* FindAnnotationSetForClass(Handle<mirror::Class> klass) const
diff --git a/runtime/entrypoints/quick/quick_default_init_entrypoints.h b/runtime/entrypoints/quick/quick_default_init_entrypoints.h
index f98de95..2a206c2 100644
--- a/runtime/entrypoints/quick/quick_default_init_entrypoints.h
+++ b/runtime/entrypoints/quick/quick_default_init_entrypoints.h
@@ -73,11 +73,13 @@
// JNI
qpoints->pJniMethodStart = JniMethodStart;
+ qpoints->pJniMethodFastStart = JniMethodFastStart;
qpoints->pJniMethodStartSynchronized = JniMethodStartSynchronized;
qpoints->pJniMethodEnd = JniMethodEnd;
qpoints->pJniMethodEndSynchronized = JniMethodEndSynchronized;
qpoints->pJniMethodEndWithReference = JniMethodEndWithReference;
qpoints->pJniMethodEndWithReferenceSynchronized = JniMethodEndWithReferenceSynchronized;
+ qpoints->pJniMethodFastEnd = JniMethodFastEnd;
qpoints->pQuickGenericJniTrampoline = art_quick_generic_jni_trampoline;
// Locks
diff --git a/runtime/entrypoints/quick/quick_entrypoints.h b/runtime/entrypoints/quick/quick_entrypoints.h
index f5b68fa..08e0d6e 100644
--- a/runtime/entrypoints/quick/quick_entrypoints.h
+++ b/runtime/entrypoints/quick/quick_entrypoints.h
@@ -52,10 +52,13 @@
// JNI entrypoints.
// TODO: NO_THREAD_SAFETY_ANALYSIS due to different control paths depending on fast JNI.
extern uint32_t JniMethodStart(Thread* self) NO_THREAD_SAFETY_ANALYSIS HOT_ATTR;
+extern uint32_t JniMethodFastStart(Thread* self) NO_THREAD_SAFETY_ANALYSIS HOT_ATTR;
extern uint32_t JniMethodStartSynchronized(jobject to_lock, Thread* self)
NO_THREAD_SAFETY_ANALYSIS HOT_ATTR;
extern void JniMethodEnd(uint32_t saved_local_ref_cookie, Thread* self)
NO_THREAD_SAFETY_ANALYSIS HOT_ATTR;
+extern void JniMethodFastEnd(uint32_t saved_local_ref_cookie, Thread* self)
+ NO_THREAD_SAFETY_ANALYSIS HOT_ATTR;
extern void JniMethodEndSynchronized(uint32_t saved_local_ref_cookie, jobject locked,
Thread* self)
NO_THREAD_SAFETY_ANALYSIS HOT_ATTR;
diff --git a/runtime/entrypoints/quick/quick_entrypoints_list.h b/runtime/entrypoints/quick/quick_entrypoints_list.h
index 07f0394..74c928a 100644
--- a/runtime/entrypoints/quick/quick_entrypoints_list.h
+++ b/runtime/entrypoints/quick/quick_entrypoints_list.h
@@ -72,8 +72,10 @@
V(HandleFillArrayData, void, void*, void*) \
\
V(JniMethodStart, uint32_t, Thread*) \
+ V(JniMethodFastStart, uint32_t, Thread*) \
V(JniMethodStartSynchronized, uint32_t, jobject, Thread*) \
V(JniMethodEnd, void, uint32_t, Thread*) \
+ V(JniMethodFastEnd, void, uint32_t, Thread*) \
V(JniMethodEndSynchronized, void, uint32_t, jobject, Thread*) \
V(JniMethodEndWithReference, mirror::Object*, jobject, uint32_t, Thread*) \
V(JniMethodEndWithReferenceSynchronized, mirror::Object*, jobject, uint32_t, jobject, Thread*) \
@@ -195,7 +197,8 @@
V(ReadBarrierMarkReg28, mirror::Object*, mirror::Object*) \
V(ReadBarrierMarkReg29, mirror::Object*, mirror::Object*) \
V(ReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t) \
- V(ReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*)
+ V(ReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*) \
+\
#endif // ART_RUNTIME_ENTRYPOINTS_QUICK_QUICK_ENTRYPOINTS_LIST_H_
#undef ART_RUNTIME_ENTRYPOINTS_QUICK_QUICK_ENTRYPOINTS_LIST_H_ // #define is only for lint.
diff --git a/runtime/entrypoints/quick/quick_jni_entrypoints.cc b/runtime/entrypoints/quick/quick_jni_entrypoints.cc
index 58f256a..c06824c 100644
--- a/runtime/entrypoints/quick/quick_jni_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_jni_entrypoints.cc
@@ -29,6 +29,21 @@
handle_on_stack->Assign(to_ref);
}
+// Called on entry to fast JNI, push a new local reference table only.
+extern uint32_t JniMethodFastStart(Thread* self) {
+ JNIEnvExt* env = self->GetJniEnv();
+ DCHECK(env != nullptr);
+ uint32_t saved_local_ref_cookie = env->local_ref_cookie;
+ env->local_ref_cookie = env->locals.GetSegmentState();
+
+ if (kIsDebugBuild) {
+ ArtMethod* native_method = *self->GetManagedStack()->GetTopQuickFrame();
+ CHECK(native_method->IsAnnotatedWithFastNative()) << PrettyMethod(native_method);
+ }
+
+ return saved_local_ref_cookie;
+}
+
// Called on entry to JNI, transition out of Runnable and release share of mutator_lock_.
extern uint32_t JniMethodStart(Thread* self) {
JNIEnvExt* env = self->GetJniEnv();
@@ -73,11 +88,32 @@
self->PopHandleScope();
}
+// TODO: These should probably be templatized or macro-ized.
+// Otherwise there's just too much repetitive boilerplate.
+
extern void JniMethodEnd(uint32_t saved_local_ref_cookie, Thread* self) {
GoToRunnable(self);
PopLocalReferences(saved_local_ref_cookie, self);
}
+extern void JniMethodFastEnd(uint32_t saved_local_ref_cookie, Thread* self) {
+ // inlined fast version of GoToRunnable(self);
+
+ if (kIsDebugBuild) {
+ ArtMethod* native_method = *self->GetManagedStack()->GetTopQuickFrame();
+ CHECK(native_method->IsAnnotatedWithFastNative()) << PrettyMethod(native_method);
+ }
+
+ if (UNLIKELY(self->TestAllFlags())) {
+ // In fast JNI mode we never transitioned out of runnable. Perform a suspend check if there
+ // is a flag raised.
+ DCHECK(Locks::mutator_lock_->IsSharedHeld(self));
+ self->CheckSuspend();
+ }
+
+ PopLocalReferences(saved_local_ref_cookie, self);
+}
+
extern void JniMethodEndSynchronized(uint32_t saved_local_ref_cookie, jobject locked,
Thread* self) {
GoToRunnable(self);
@@ -85,6 +121,10 @@
PopLocalReferences(saved_local_ref_cookie, self);
}
+// TODO: JniMethodFastEndWithReference
+// (Probably don't need to have a synchronized variant since
+// it already has to do atomic operations)
+
// Common result handling for EndWithReference.
static mirror::Object* JniMethodEndWithReferenceHandleResult(jobject result,
uint32_t saved_local_ref_cookie,
diff --git a/runtime/entrypoints_order_test.cc b/runtime/entrypoints_order_test.cc
index e3203dc..004cdc4 100644
--- a/runtime/entrypoints_order_test.cc
+++ b/runtime/entrypoints_order_test.cc
@@ -211,11 +211,14 @@
EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pAputObjectWithBoundCheck, pAputObject, sizeof(void*));
EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pAputObject, pHandleFillArrayData, sizeof(void*));
EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pHandleFillArrayData, pJniMethodStart, sizeof(void*));
- EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pJniMethodStart, pJniMethodStartSynchronized,
+ EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pJniMethodStart, pJniMethodFastStart,
+ sizeof(void*));
+ EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pJniMethodFastStart, pJniMethodStartSynchronized,
sizeof(void*));
EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pJniMethodStartSynchronized, pJniMethodEnd,
sizeof(void*));
- EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pJniMethodEnd, pJniMethodEndSynchronized, sizeof(void*));
+ EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pJniMethodEnd, pJniMethodFastEnd, sizeof(void*));
+ EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pJniMethodFastEnd, pJniMethodEndSynchronized, sizeof(void*));
EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pJniMethodEndSynchronized, pJniMethodEndWithReference,
sizeof(void*));
EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pJniMethodEndWithReference,
diff --git a/runtime/jni_internal.cc b/runtime/jni_internal.cc
index e1a4e2a..c322475 100644
--- a/runtime/jni_internal.cc
+++ b/runtime/jni_internal.cc
@@ -2205,6 +2205,7 @@
VLOG(jni) << "[Registering JNI native method " << PrettyMethod(m) << "]";
+ is_fast = is_fast || m->IsFastNative(); // Merge with @FastNative state.
m->RegisterNative(fnPtr, is_fast);
}
return JNI_OK;
diff --git a/runtime/parsed_options.cc b/runtime/parsed_options.cc
index c7e4f8b..2a040a3 100644
--- a/runtime/parsed_options.cc
+++ b/runtime/parsed_options.cc
@@ -487,7 +487,7 @@
args.SetIfMissing(M::ParallelGCThreads, gc::Heap::kDefaultEnableParallelGC ?
static_cast<unsigned int>(sysconf(_SC_NPROCESSORS_CONF) - 1u) : 0u);
- // -Xverbose:
+ // -verbose:
{
LogVerbosity *log_verbosity = args.Get(M::Verbose);
if (log_verbosity != nullptr) {
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 3326736..b35a614 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -2599,6 +2599,9 @@
QUICK_ENTRY_POINT_INFO(pReadBarrierMarkReg29)
QUICK_ENTRY_POINT_INFO(pReadBarrierSlow)
QUICK_ENTRY_POINT_INFO(pReadBarrierForRootSlow)
+
+ QUICK_ENTRY_POINT_INFO(pJniMethodFastStart)
+ QUICK_ENTRY_POINT_INFO(pJniMethodFastEnd)
#undef QUICK_ENTRY_POINT_INFO
os << offset;
diff --git a/runtime/well_known_classes.cc b/runtime/well_known_classes.cc
index 48deb35..ddce344 100644
--- a/runtime/well_known_classes.cc
+++ b/runtime/well_known_classes.cc
@@ -30,6 +30,7 @@
namespace art {
jclass WellKnownClasses::com_android_dex_Dex;
+jclass WellKnownClasses::dalvik_annotation_optimization_FastNative;
jclass WellKnownClasses::dalvik_system_DexFile;
jclass WellKnownClasses::dalvik_system_DexPathList;
jclass WellKnownClasses::dalvik_system_DexPathList__Element;
@@ -215,6 +216,7 @@
void WellKnownClasses::Init(JNIEnv* env) {
com_android_dex_Dex = CacheClass(env, "com/android/dex/Dex");
+ dalvik_annotation_optimization_FastNative = CacheClass(env, "dalvik/annotation/optimization/FastNative");
dalvik_system_DexFile = CacheClass(env, "dalvik/system/DexFile");
dalvik_system_DexPathList = CacheClass(env, "dalvik/system/DexPathList");
dalvik_system_DexPathList__Element = CacheClass(env, "dalvik/system/DexPathList$Element");
diff --git a/runtime/well_known_classes.h b/runtime/well_known_classes.h
index c9faf69..b8e05b8 100644
--- a/runtime/well_known_classes.h
+++ b/runtime/well_known_classes.h
@@ -41,6 +41,7 @@
SHARED_REQUIRES(Locks::mutator_lock_);
static jclass com_android_dex_Dex;
+ static jclass dalvik_annotation_optimization_FastNative;
static jclass dalvik_system_DexFile;
static jclass dalvik_system_DexPathList;
static jclass dalvik_system_DexPathList__Element;