ART: Optimize StringBuilder append pattern.
Recognize appending with StringBuilder and replace the
entire expression with a runtime call that perfoms the
append in a more efficient manner.
For now, require the entire pattern to be in a single block
and be very strict about the StringBuilder environment uses.
Also, do not accept StringBuilder/char[]/Object/float/double
arguments as they throw non-OOME exceptions and/or require a
call from the entrypoint back to a helper function in Java;
these shall be implemented later.
Boot image size for aosp_taimen-userdebug:
- before:
arm/boot*.oat: 19653872
arm64/boot*.oat: 23292784
oat/arm64/services.odex: 22408664
- after:
arm/boot*.oat: 19432184 (-216KiB)
arm64/boot*.oat: 22992488 (-293KiB)
oat/arm64/services.odex: 22376776 (-31KiB)
Note that const-string in compiled boot image methods cannot
throw, but for apps it can and therefore its environment can
prevent the optimization for apps. We could implement either
a simple carve-out for const-string or generic environment
pruning to allow this pattern to be applied more often.
Results for the new StringBuilderAppendBenchmark on taimen:
timeAppendLongStrings: ~700ns -> ~200ns
timeAppendStringAndInt: ~220ns -> ~140ns
timeAppendStrings: ~200ns -> 130ns
Bug: 19575890
Test: 697-checker-string-append
Test: m test-art-host-gtest
Test: testrunner.py --host --optimizing
Test: aosp_taimen-userdebug boots.
Test: run-gtests.sh
Test: testrunner.py --target --optimizing
Test: vogar --benchmark art/benchmark/stringbuilder-append/src/StringBuilderAppendBenchmark.java
Change-Id: I51789bf299f5219f68ada4c077b6a1d3fe083964
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 2bbb570..3b5699b 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -64,6 +64,7 @@
#include "ssa_liveness_analysis.h"
#include "stack_map.h"
#include "stack_map_stream.h"
+#include "string_builder_append.h"
#include "thread-current-inl.h"
#include "utils/assembler.h"
@@ -599,6 +600,57 @@
InvokeRuntime(entrypoint, invoke, invoke->GetDexPc(), nullptr);
}
+void CodeGenerator::CreateStringBuilderAppendLocations(HStringBuilderAppend* instruction,
+ Location out) {
+ ArenaAllocator* allocator = GetGraph()->GetAllocator();
+ LocationSummary* locations =
+ new (allocator) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
+ locations->SetOut(out);
+ instruction->GetLocations()->SetInAt(instruction->FormatIndex(),
+ Location::ConstantLocation(instruction->GetFormat()));
+
+ uint32_t format = static_cast<uint32_t>(instruction->GetFormat()->GetValue());
+ uint32_t f = format;
+ PointerSize pointer_size = InstructionSetPointerSize(GetInstructionSet());
+ size_t stack_offset = static_cast<size_t>(pointer_size); // Start after the ArtMethod*.
+ for (size_t i = 0, num_args = instruction->GetNumberOfArguments(); i != num_args; ++i) {
+ StringBuilderAppend::Argument arg_type =
+ static_cast<StringBuilderAppend::Argument>(f & StringBuilderAppend::kArgMask);
+ switch (arg_type) {
+ case StringBuilderAppend::Argument::kStringBuilder:
+ case StringBuilderAppend::Argument::kString:
+ case StringBuilderAppend::Argument::kCharArray:
+ static_assert(sizeof(StackReference<mirror::Object>) == sizeof(uint32_t), "Size check.");
+ FALLTHROUGH_INTENDED;
+ case StringBuilderAppend::Argument::kBoolean:
+ case StringBuilderAppend::Argument::kChar:
+ case StringBuilderAppend::Argument::kInt:
+ case StringBuilderAppend::Argument::kFloat:
+ locations->SetInAt(i, Location::StackSlot(stack_offset));
+ break;
+ case StringBuilderAppend::Argument::kLong:
+ case StringBuilderAppend::Argument::kDouble:
+ stack_offset = RoundUp(stack_offset, sizeof(uint64_t));
+ locations->SetInAt(i, Location::DoubleStackSlot(stack_offset));
+ // Skip the low word, let the common code skip the high word.
+ stack_offset += sizeof(uint32_t);
+ break;
+ default:
+ LOG(FATAL) << "Unexpected arg format: 0x" << std::hex
+ << (f & StringBuilderAppend::kArgMask) << " full format: 0x" << format;
+ UNREACHABLE();
+ }
+ f >>= StringBuilderAppend::kBitsPerArg;
+ stack_offset += sizeof(uint32_t);
+ }
+ DCHECK_EQ(f, 0u);
+
+ size_t param_size = stack_offset - static_cast<size_t>(pointer_size);
+ DCHECK_ALIGNED(param_size, kVRegSize);
+ size_t num_vregs = param_size / kVRegSize;
+ graph_->UpdateMaximumNumberOfOutVRegs(num_vregs);
+}
+
void CodeGenerator::CreateUnresolvedFieldLocationSummary(
HInstruction* field_access,
DataType::Type field_type,