ART: Improve JitProfile perf in arm/arm64 mterp
ART currently requires two profiling-related things from the
interpreters: hotness updates and OSR switch checks. The hotness
updates previously used the existing instrumentation framework - which
is flexible, but quite heavyweight. For most things, the
instrumentation framework overhead is acceptable, but because we do a
hotness update on every backwards branch the overhead is unacceptable.
Prior to this CL, branch profiling dominates interpreter cost.
Here, we bypass the instrumentation framework for hotness updates
and deliver a significant performance improvement. Running
interpreter-only (dalvikvm -Xint) on a Nexus 6, we see the logic
subtest of Caffeinemark improving from 2600 to 9200, and the
overall score going from 1979 to over 3000. Compared to the
C++ switch interpreter, we see a 6x improvement on the branchy logic
subtest and a 2.6x improvement overall.
Compared with the previous mterp which did not have support for
jit profiling, we see a few (1% to 5%) performance loss on the
standard command-line benchmarks. I consider this acceptable
(we could create an alternate non-profiling mterp which would
have no penalty, but I don't consider this overhead big enough to
justify that).
Change-Id: I50b5b8c5ed8ebda3c8b4e65d27ba7393c3feae04
diff --git a/runtime/interpreter/interpreter_goto_table_impl.cc b/runtime/interpreter/interpreter_goto_table_impl.cc
index 12d6fdc..d70a7c4 100644
--- a/runtime/interpreter/interpreter_goto_table_impl.cc
+++ b/runtime/interpreter/interpreter_goto_table_impl.cc
@@ -22,6 +22,7 @@
#include "experimental_flags.h"
#include "interpreter_common.h"
#include "jit/jit.h"
+#include "jit/jit_instrumentation.h"
#include "safe_math.h"
#include <memory> // std::unique_ptr
@@ -64,15 +65,20 @@
currentHandlersTable = handlersTable[ \
Runtime::Current()->GetInstrumentation()->GetInterpreterHandlerTable()]
-#define BRANCH_INSTRUMENTATION(offset) \
- do { \
- ArtMethod* method = shadow_frame.GetMethod(); \
- instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation(); \
- instrumentation->Branch(self, method, dex_pc, offset); \
- JValue result; \
- if (jit::Jit::MaybeDoOnStackReplacement(self, method, dex_pc, offset, &result)) { \
- return result; \
- } \
+#define BRANCH_INSTRUMENTATION(offset) \
+ do { \
+ instrumentation->Branch(self, method, dex_pc, offset); \
+ JValue result; \
+ if (jit::Jit::MaybeDoOnStackReplacement(self, method, dex_pc, offset, &result)) { \
+ return result; \
+ } \
+ } while (false)
+
+#define HOTNESS_UPDATE() \
+ do { \
+ if (jit_instrumentation_cache != nullptr) { \
+ jit_instrumentation_cache->AddSamples(self, method, 1); \
+ } \
} while (false)
#define UNREACHABLE_CODE_CHECK() \
@@ -186,6 +192,13 @@
UPDATE_HANDLER_TABLE();
std::unique_ptr<lambda::ClosureBuilder> lambda_closure_builder;
size_t lambda_captured_variable_index = 0;
+ const auto* const instrumentation = Runtime::Current()->GetInstrumentation();
+ ArtMethod* method = shadow_frame.GetMethod();
+ jit::Jit* jit = Runtime::Current()->GetJit();
+ jit::JitInstrumentationCache* jit_instrumentation_cache = nullptr;
+ if (jit != nullptr) {
+ jit_instrumentation_cache = jit->GetInstrumentationCache();
+ }
// Jump to first instruction.
ADVANCE(0);
@@ -630,6 +643,7 @@
int8_t offset = inst->VRegA_10t(inst_data);
BRANCH_INSTRUMENTATION(offset);
if (IsBackwardBranch(offset)) {
+ HOTNESS_UPDATE();
if (UNLIKELY(self->TestAllFlags())) {
self->CheckSuspend();
UPDATE_HANDLER_TABLE();
@@ -643,6 +657,7 @@
int16_t offset = inst->VRegA_20t();
BRANCH_INSTRUMENTATION(offset);
if (IsBackwardBranch(offset)) {
+ HOTNESS_UPDATE();
if (UNLIKELY(self->TestAllFlags())) {
self->CheckSuspend();
UPDATE_HANDLER_TABLE();
@@ -656,6 +671,7 @@
int32_t offset = inst->VRegA_30t();
BRANCH_INSTRUMENTATION(offset);
if (IsBackwardBranch(offset)) {
+ HOTNESS_UPDATE();
if (UNLIKELY(self->TestAllFlags())) {
self->CheckSuspend();
UPDATE_HANDLER_TABLE();
@@ -669,6 +685,7 @@
int32_t offset = DoPackedSwitch(inst, shadow_frame, inst_data);
BRANCH_INSTRUMENTATION(offset);
if (IsBackwardBranch(offset)) {
+ HOTNESS_UPDATE();
if (UNLIKELY(self->TestAllFlags())) {
self->CheckSuspend();
UPDATE_HANDLER_TABLE();
@@ -682,6 +699,7 @@
int32_t offset = DoSparseSwitch(inst, shadow_frame, inst_data);
BRANCH_INSTRUMENTATION(offset);
if (IsBackwardBranch(offset)) {
+ HOTNESS_UPDATE();
if (UNLIKELY(self->TestAllFlags())) {
self->CheckSuspend();
UPDATE_HANDLER_TABLE();
@@ -785,6 +803,7 @@
int16_t offset = inst->VRegC_22t();
BRANCH_INSTRUMENTATION(offset);
if (IsBackwardBranch(offset)) {
+ HOTNESS_UPDATE();
if (UNLIKELY(self->TestAllFlags())) {
self->CheckSuspend();
UPDATE_HANDLER_TABLE();
@@ -804,6 +823,7 @@
int16_t offset = inst->VRegC_22t();
BRANCH_INSTRUMENTATION(offset);
if (IsBackwardBranch(offset)) {
+ HOTNESS_UPDATE();
if (UNLIKELY(self->TestAllFlags())) {
self->CheckSuspend();
UPDATE_HANDLER_TABLE();
@@ -823,6 +843,7 @@
int16_t offset = inst->VRegC_22t();
BRANCH_INSTRUMENTATION(offset);
if (IsBackwardBranch(offset)) {
+ HOTNESS_UPDATE();
if (UNLIKELY(self->TestAllFlags())) {
self->CheckSuspend();
UPDATE_HANDLER_TABLE();
@@ -842,6 +863,7 @@
int16_t offset = inst->VRegC_22t();
BRANCH_INSTRUMENTATION(offset);
if (IsBackwardBranch(offset)) {
+ HOTNESS_UPDATE();
if (UNLIKELY(self->TestAllFlags())) {
self->CheckSuspend();
UPDATE_HANDLER_TABLE();
@@ -861,6 +883,7 @@
int16_t offset = inst->VRegC_22t();
BRANCH_INSTRUMENTATION(offset);
if (IsBackwardBranch(offset)) {
+ HOTNESS_UPDATE();
if (UNLIKELY(self->TestAllFlags())) {
self->CheckSuspend();
UPDATE_HANDLER_TABLE();
@@ -880,6 +903,7 @@
int16_t offset = inst->VRegC_22t();
BRANCH_INSTRUMENTATION(offset);
if (IsBackwardBranch(offset)) {
+ HOTNESS_UPDATE();
if (UNLIKELY(self->TestAllFlags())) {
self->CheckSuspend();
UPDATE_HANDLER_TABLE();
@@ -898,6 +922,7 @@
int16_t offset = inst->VRegB_21t();
BRANCH_INSTRUMENTATION(offset);
if (IsBackwardBranch(offset)) {
+ HOTNESS_UPDATE();
if (UNLIKELY(self->TestAllFlags())) {
self->CheckSuspend();
UPDATE_HANDLER_TABLE();
@@ -916,6 +941,7 @@
int16_t offset = inst->VRegB_21t();
BRANCH_INSTRUMENTATION(offset);
if (IsBackwardBranch(offset)) {
+ HOTNESS_UPDATE();
if (UNLIKELY(self->TestAllFlags())) {
self->CheckSuspend();
UPDATE_HANDLER_TABLE();
@@ -934,6 +960,7 @@
int16_t offset = inst->VRegB_21t();
BRANCH_INSTRUMENTATION(offset);
if (IsBackwardBranch(offset)) {
+ HOTNESS_UPDATE();
if (UNLIKELY(self->TestAllFlags())) {
self->CheckSuspend();
UPDATE_HANDLER_TABLE();
@@ -952,6 +979,7 @@
int16_t offset = inst->VRegB_21t();
BRANCH_INSTRUMENTATION(offset);
if (IsBackwardBranch(offset)) {
+ HOTNESS_UPDATE();
if (UNLIKELY(self->TestAllFlags())) {
self->CheckSuspend();
UPDATE_HANDLER_TABLE();
@@ -970,6 +998,7 @@
int16_t offset = inst->VRegB_21t();
BRANCH_INSTRUMENTATION(offset);
if (IsBackwardBranch(offset)) {
+ HOTNESS_UPDATE();
if (UNLIKELY(self->TestAllFlags())) {
self->CheckSuspend();
UPDATE_HANDLER_TABLE();
@@ -988,6 +1017,7 @@
int16_t offset = inst->VRegB_21t();
BRANCH_INSTRUMENTATION(offset);
if (IsBackwardBranch(offset)) {
+ HOTNESS_UPDATE();
if (UNLIKELY(self->TestAllFlags())) {
self->CheckSuspend();
UPDATE_HANDLER_TABLE();