ART: Optimize StringBuilder append pattern.

Recognize appending with StringBuilder and replace the
entire expression with a runtime call that perfoms the
append in a more efficient manner.

For now, require the entire pattern to be in a single block
and be very strict about the StringBuilder environment uses.
Also, do not accept StringBuilder/char[]/Object/float/double
arguments as they throw non-OOME exceptions and/or require a
call from the entrypoint back to a helper function in Java;
these shall be implemented later.

Boot image size for aosp_taimen-userdebug:
 - before:
   arm/boot*.oat: 19653872
   arm64/boot*.oat: 23292784
   oat/arm64/services.odex: 22408664
 - after:
   arm/boot*.oat: 19432184 (-216KiB)
   arm64/boot*.oat: 22992488 (-293KiB)
   oat/arm64/services.odex: 22376776 (-31KiB)
Note that const-string in compiled boot image methods cannot
throw, but for apps it can and therefore its environment can
prevent the optimization for apps. We could implement either
a simple carve-out for const-string or generic environment
pruning to allow this pattern to be applied more often.

Results for the new StringBuilderAppendBenchmark on taimen:
  timeAppendLongStrings: ~700ns -> ~200ns
  timeAppendStringAndInt: ~220ns -> ~140ns
  timeAppendStrings: ~200ns -> 130ns

Bug: 19575890
Test: 697-checker-string-append
Test: m test-art-host-gtest
Test: testrunner.py --host --optimizing
Test: aosp_taimen-userdebug boots.
Test: run-gtests.sh
Test: testrunner.py --target --optimizing
Test: vogar --benchmark art/benchmark/stringbuilder-append/src/StringBuilderAppendBenchmark.java
Change-Id: I51789bf299f5219f68ada4c077b6a1d3fe083964
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 2bbb570..3b5699b 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -64,6 +64,7 @@
 #include "ssa_liveness_analysis.h"
 #include "stack_map.h"
 #include "stack_map_stream.h"
+#include "string_builder_append.h"
 #include "thread-current-inl.h"
 #include "utils/assembler.h"
 
@@ -599,6 +600,57 @@
   InvokeRuntime(entrypoint, invoke, invoke->GetDexPc(), nullptr);
 }
 
+void CodeGenerator::CreateStringBuilderAppendLocations(HStringBuilderAppend* instruction,
+                                                       Location out) {
+  ArenaAllocator* allocator = GetGraph()->GetAllocator();
+  LocationSummary* locations =
+      new (allocator) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
+  locations->SetOut(out);
+  instruction->GetLocations()->SetInAt(instruction->FormatIndex(),
+                                       Location::ConstantLocation(instruction->GetFormat()));
+
+  uint32_t format = static_cast<uint32_t>(instruction->GetFormat()->GetValue());
+  uint32_t f = format;
+  PointerSize pointer_size = InstructionSetPointerSize(GetInstructionSet());
+  size_t stack_offset = static_cast<size_t>(pointer_size);  // Start after the ArtMethod*.
+  for (size_t i = 0, num_args = instruction->GetNumberOfArguments(); i != num_args; ++i) {
+    StringBuilderAppend::Argument arg_type =
+        static_cast<StringBuilderAppend::Argument>(f & StringBuilderAppend::kArgMask);
+    switch (arg_type) {
+      case StringBuilderAppend::Argument::kStringBuilder:
+      case StringBuilderAppend::Argument::kString:
+      case StringBuilderAppend::Argument::kCharArray:
+        static_assert(sizeof(StackReference<mirror::Object>) == sizeof(uint32_t), "Size check.");
+        FALLTHROUGH_INTENDED;
+      case StringBuilderAppend::Argument::kBoolean:
+      case StringBuilderAppend::Argument::kChar:
+      case StringBuilderAppend::Argument::kInt:
+      case StringBuilderAppend::Argument::kFloat:
+        locations->SetInAt(i, Location::StackSlot(stack_offset));
+        break;
+      case StringBuilderAppend::Argument::kLong:
+      case StringBuilderAppend::Argument::kDouble:
+        stack_offset = RoundUp(stack_offset, sizeof(uint64_t));
+        locations->SetInAt(i, Location::DoubleStackSlot(stack_offset));
+        // Skip the low word, let the common code skip the high word.
+        stack_offset += sizeof(uint32_t);
+        break;
+      default:
+        LOG(FATAL) << "Unexpected arg format: 0x" << std::hex
+            << (f & StringBuilderAppend::kArgMask) << " full format: 0x" << format;
+        UNREACHABLE();
+    }
+    f >>= StringBuilderAppend::kBitsPerArg;
+    stack_offset += sizeof(uint32_t);
+  }
+  DCHECK_EQ(f, 0u);
+
+  size_t param_size = stack_offset - static_cast<size_t>(pointer_size);
+  DCHECK_ALIGNED(param_size, kVRegSize);
+  size_t num_vregs = param_size / kVRegSize;
+  graph_->UpdateMaximumNumberOfOutVRegs(num_vregs);
+}
+
 void CodeGenerator::CreateUnresolvedFieldLocationSummary(
     HInstruction* field_access,
     DataType::Type field_type,