Merge "Do not attempt to unregister null oat files"
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 2897006..6a743eb 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -41,6 +41,7 @@
 #include "driver/dex_compilation_unit.h"
 #include "gc_map_builder.h"
 #include "graph_visualizer.h"
+#include "intrinsics.h"
 #include "leb128.h"
 #include "mapping_table.h"
 #include "mirror/array-inl.h"
@@ -1381,4 +1382,57 @@
   }
 }
 
+void CodeGenerator::CreateSystemArrayCopyLocationSummary(HInvoke* invoke) {
+  // Check to see if we have known failures that will cause us to have to bail out
+  // to the runtime, and just generate the runtime call directly.
+  HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant();
+  HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant();
+
+  // The positions must be non-negative.
+  if ((src_pos != nullptr && src_pos->GetValue() < 0) ||
+      (dest_pos != nullptr && dest_pos->GetValue() < 0)) {
+    // We will have to fail anyways.
+    return;
+  }
+
+  // The length must be >= 0.
+  HIntConstant* length = invoke->InputAt(4)->AsIntConstant();
+  if (length != nullptr) {
+    int32_t len = length->GetValue();
+    if (len < 0) {
+      // Just call as normal.
+      return;
+    }
+  }
+
+  SystemArrayCopyOptimizations optimizations(invoke);
+
+  if (optimizations.GetDestinationIsSource()) {
+    if (src_pos != nullptr && dest_pos != nullptr && src_pos->GetValue() < dest_pos->GetValue()) {
+      // We only support backward copying if source and destination are the same.
+      return;
+    }
+  }
+
+  if (optimizations.GetDestinationIsPrimitiveArray() || optimizations.GetSourceIsPrimitiveArray()) {
+    // We currently don't intrinsify primitive copying.
+    return;
+  }
+
+  ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetArena();
+  LocationSummary* locations = new (allocator) LocationSummary(invoke,
+                                                               LocationSummary::kCallOnSlowPath,
+                                                               kIntrinsified);
+  // arraycopy(Object src, int src_pos, Object dest, int dest_pos, int length).
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
+  locations->SetInAt(2, Location::RequiresRegister());
+  locations->SetInAt(3, Location::RegisterOrConstant(invoke->InputAt(3)));
+  locations->SetInAt(4, Location::RegisterOrConstant(invoke->InputAt(4)));
+
+  locations->AddTemp(Location::RequiresRegister());
+  locations->AddTemp(Location::RequiresRegister());
+  locations->AddTemp(Location::RequiresRegister());
+}
+
 }  // namespace art
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index acce5b3..b04dfc0 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -421,6 +421,8 @@
                                              Location runtime_type_index_location,
                                              Location runtime_return_location);
 
+  static void CreateSystemArrayCopyLocationSummary(HInvoke* invoke);
+
   void SetDisassemblyInformation(DisassemblyInformation* info) { disasm_info_ = info; }
   DisassemblyInformation* GetDisassemblyInformation() const { return disasm_info_; }
 
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index d172fba..8c1820b 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -1645,6 +1645,7 @@
   DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
 
   IntrinsicLocationsBuilderARM intrinsic(GetGraph()->GetArena(),
+                                         codegen_->GetAssembler(),
                                          codegen_->GetInstructionSetFeatures());
   if (intrinsic.TryDispatch(invoke)) {
     return;
@@ -1684,6 +1685,7 @@
 
 void LocationsBuilderARM::VisitInvokeVirtual(HInvokeVirtual* invoke) {
   IntrinsicLocationsBuilderARM intrinsic(GetGraph()->GetArena(),
+                                         codegen_->GetAssembler(),
                                          codegen_->GetInstructionSetFeatures());
   if (intrinsic.TryDispatch(invoke)) {
     return;
@@ -3512,6 +3514,47 @@
   }
 }
 
+Location LocationsBuilderARM::ArmEncodableConstantOrRegister(HInstruction* constant,
+                                                             Opcode opcode) {
+  DCHECK(!Primitive::IsFloatingPointType(constant->GetType()));
+  if (constant->IsConstant() &&
+      CanEncodeConstantAsImmediate(constant->AsConstant(), opcode)) {
+    return Location::ConstantLocation(constant->AsConstant());
+  }
+  return Location::RequiresRegister();
+}
+
+bool LocationsBuilderARM::CanEncodeConstantAsImmediate(HConstant* input_cst,
+                                                       Opcode opcode) {
+  uint64_t value = static_cast<uint64_t>(Int64FromConstant(input_cst));
+  if (Primitive::Is64BitType(input_cst->GetType())) {
+    return CanEncodeConstantAsImmediate(Low32Bits(value), opcode) &&
+        CanEncodeConstantAsImmediate(High32Bits(value), opcode);
+  } else {
+    return CanEncodeConstantAsImmediate(Low32Bits(value), opcode);
+  }
+}
+
+bool LocationsBuilderARM::CanEncodeConstantAsImmediate(uint32_t value, Opcode opcode) {
+  ShifterOperand so;
+  ArmAssembler* assembler = codegen_->GetAssembler();
+  if (assembler->ShifterOperandCanHold(kNoRegister, kNoRegister, opcode, value, &so)) {
+    return true;
+  }
+  Opcode neg_opcode = kNoOperand;
+  switch (opcode) {
+    case AND:
+      neg_opcode = BIC;
+      break;
+    case ORR:
+      neg_opcode = ORN;
+      break;
+    default:
+      return false;
+  }
+  return assembler->ShifterOperandCanHold(kNoRegister, kNoRegister, neg_opcode, ~value, &so);
+}
+
 void InstructionCodeGeneratorARM::HandleFieldGet(HInstruction* instruction,
                                                  const FieldInfo& field_info) {
   DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
@@ -4912,17 +4955,18 @@
       nullptr);
 }
 
-void LocationsBuilderARM::VisitAnd(HAnd* instruction) { HandleBitwiseOperation(instruction); }
-void LocationsBuilderARM::VisitOr(HOr* instruction) { HandleBitwiseOperation(instruction); }
-void LocationsBuilderARM::VisitXor(HXor* instruction) { HandleBitwiseOperation(instruction); }
+void LocationsBuilderARM::VisitAnd(HAnd* instruction) { HandleBitwiseOperation(instruction, AND); }
+void LocationsBuilderARM::VisitOr(HOr* instruction) { HandleBitwiseOperation(instruction, ORR); }
+void LocationsBuilderARM::VisitXor(HXor* instruction) { HandleBitwiseOperation(instruction, EOR); }
 
-void LocationsBuilderARM::HandleBitwiseOperation(HBinaryOperation* instruction) {
+void LocationsBuilderARM::HandleBitwiseOperation(HBinaryOperation* instruction, Opcode opcode) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
   DCHECK(instruction->GetResultType() == Primitive::kPrimInt
          || instruction->GetResultType() == Primitive::kPrimLong);
+  // Note: GVN reorders commutative operations to have the constant on the right hand side.
   locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RequiresRegister());
+  locations->SetInAt(1, ArmEncodableConstantOrRegister(instruction->InputAt(1), opcode));
   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
 }
 
@@ -4938,48 +4982,131 @@
   HandleBitwiseOperation(instruction);
 }
 
+void InstructionCodeGeneratorARM::GenerateAndConst(Register out, Register first, uint32_t value) {
+  // Optimize special cases for individual halfs of `and-long` (`and` is simplified earlier).
+  if (value == 0xffffffffu) {
+    if (out != first) {
+      __ mov(out, ShifterOperand(first));
+    }
+    return;
+  }
+  if (value == 0u) {
+    __ mov(out, ShifterOperand(0));
+    return;
+  }
+  ShifterOperand so;
+  if (__ ShifterOperandCanHold(kNoRegister, kNoRegister, AND, value, &so)) {
+    __ and_(out, first, so);
+  } else {
+    DCHECK(__ ShifterOperandCanHold(kNoRegister, kNoRegister, BIC, ~value, &so));
+    __ bic(out, first, ShifterOperand(~value));
+  }
+}
+
+void InstructionCodeGeneratorARM::GenerateOrrConst(Register out, Register first, uint32_t value) {
+  // Optimize special cases for individual halfs of `or-long` (`or` is simplified earlier).
+  if (value == 0u) {
+    if (out != first) {
+      __ mov(out, ShifterOperand(first));
+    }
+    return;
+  }
+  if (value == 0xffffffffu) {
+    __ mvn(out, ShifterOperand(0));
+    return;
+  }
+  ShifterOperand so;
+  if (__ ShifterOperandCanHold(kNoRegister, kNoRegister, ORR, value, &so)) {
+    __ orr(out, first, so);
+  } else {
+    DCHECK(__ ShifterOperandCanHold(kNoRegister, kNoRegister, ORN, ~value, &so));
+    __ orn(out, first, ShifterOperand(~value));
+  }
+}
+
+void InstructionCodeGeneratorARM::GenerateEorConst(Register out, Register first, uint32_t value) {
+  // Optimize special case for individual halfs of `xor-long` (`xor` is simplified earlier).
+  if (value == 0u) {
+    if (out != first) {
+      __ mov(out, ShifterOperand(first));
+    }
+    return;
+  }
+  __ eor(out, first, ShifterOperand(value));
+}
+
 void InstructionCodeGeneratorARM::HandleBitwiseOperation(HBinaryOperation* instruction) {
   LocationSummary* locations = instruction->GetLocations();
+  Location first = locations->InAt(0);
+  Location second = locations->InAt(1);
+  Location out = locations->Out();
+
+  if (second.IsConstant()) {
+    uint64_t value = static_cast<uint64_t>(Int64FromConstant(second.GetConstant()));
+    uint32_t value_low = Low32Bits(value);
+    if (instruction->GetResultType() == Primitive::kPrimInt) {
+      Register first_reg = first.AsRegister<Register>();
+      Register out_reg = out.AsRegister<Register>();
+      if (instruction->IsAnd()) {
+        GenerateAndConst(out_reg, first_reg, value_low);
+      } else if (instruction->IsOr()) {
+        GenerateOrrConst(out_reg, first_reg, value_low);
+      } else {
+        DCHECK(instruction->IsXor());
+        GenerateEorConst(out_reg, first_reg, value_low);
+      }
+    } else {
+      DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimLong);
+      uint32_t value_high = High32Bits(value);
+      Register first_low = first.AsRegisterPairLow<Register>();
+      Register first_high = first.AsRegisterPairHigh<Register>();
+      Register out_low = out.AsRegisterPairLow<Register>();
+      Register out_high = out.AsRegisterPairHigh<Register>();
+      if (instruction->IsAnd()) {
+        GenerateAndConst(out_low, first_low, value_low);
+        GenerateAndConst(out_high, first_high, value_high);
+      } else if (instruction->IsOr()) {
+        GenerateOrrConst(out_low, first_low, value_low);
+        GenerateOrrConst(out_high, first_high, value_high);
+      } else {
+        DCHECK(instruction->IsXor());
+        GenerateEorConst(out_low, first_low, value_low);
+        GenerateEorConst(out_high, first_high, value_high);
+      }
+    }
+    return;
+  }
 
   if (instruction->GetResultType() == Primitive::kPrimInt) {
-    Register first = locations->InAt(0).AsRegister<Register>();
-    Register second = locations->InAt(1).AsRegister<Register>();
-    Register out = locations->Out().AsRegister<Register>();
+    Register first_reg = first.AsRegister<Register>();
+    ShifterOperand second_reg(second.AsRegister<Register>());
+    Register out_reg = out.AsRegister<Register>();
     if (instruction->IsAnd()) {
-      __ and_(out, first, ShifterOperand(second));
+      __ and_(out_reg, first_reg, second_reg);
     } else if (instruction->IsOr()) {
-      __ orr(out, first, ShifterOperand(second));
+      __ orr(out_reg, first_reg, second_reg);
     } else {
       DCHECK(instruction->IsXor());
-      __ eor(out, first, ShifterOperand(second));
+      __ eor(out_reg, first_reg, second_reg);
     }
   } else {
     DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimLong);
-    Location first = locations->InAt(0);
-    Location second = locations->InAt(1);
-    Location out = locations->Out();
+    Register first_low = first.AsRegisterPairLow<Register>();
+    Register first_high = first.AsRegisterPairHigh<Register>();
+    ShifterOperand second_low(second.AsRegisterPairLow<Register>());
+    ShifterOperand second_high(second.AsRegisterPairHigh<Register>());
+    Register out_low = out.AsRegisterPairLow<Register>();
+    Register out_high = out.AsRegisterPairHigh<Register>();
     if (instruction->IsAnd()) {
-      __ and_(out.AsRegisterPairLow<Register>(),
-              first.AsRegisterPairLow<Register>(),
-              ShifterOperand(second.AsRegisterPairLow<Register>()));
-      __ and_(out.AsRegisterPairHigh<Register>(),
-              first.AsRegisterPairHigh<Register>(),
-              ShifterOperand(second.AsRegisterPairHigh<Register>()));
+      __ and_(out_low, first_low, second_low);
+      __ and_(out_high, first_high, second_high);
     } else if (instruction->IsOr()) {
-      __ orr(out.AsRegisterPairLow<Register>(),
-             first.AsRegisterPairLow<Register>(),
-             ShifterOperand(second.AsRegisterPairLow<Register>()));
-      __ orr(out.AsRegisterPairHigh<Register>(),
-             first.AsRegisterPairHigh<Register>(),
-             ShifterOperand(second.AsRegisterPairHigh<Register>()));
+      __ orr(out_low, first_low, second_low);
+      __ orr(out_high, first_high, second_high);
     } else {
       DCHECK(instruction->IsXor());
-      __ eor(out.AsRegisterPairLow<Register>(),
-             first.AsRegisterPairLow<Register>(),
-             ShifterOperand(second.AsRegisterPairLow<Register>()));
-      __ eor(out.AsRegisterPairHigh<Register>(),
-             first.AsRegisterPairHigh<Register>(),
-             ShifterOperand(second.AsRegisterPairHigh<Register>()));
+      __ eor(out_low, first_low, second_low);
+      __ eor(out_high, first_high, second_high);
     }
   }
 }
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index 16d1d38..6900933 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -169,11 +169,15 @@
 
  private:
   void HandleInvoke(HInvoke* invoke);
-  void HandleBitwiseOperation(HBinaryOperation* operation);
+  void HandleBitwiseOperation(HBinaryOperation* operation, Opcode opcode);
   void HandleShift(HBinaryOperation* operation);
   void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info);
   void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
 
+  Location ArmEncodableConstantOrRegister(HInstruction* constant, Opcode opcode);
+  bool CanEncodeConstantAsImmediate(HConstant* input_cst, Opcode opcode);
+  bool CanEncodeConstantAsImmediate(uint32_t value, Opcode opcode);
+
   CodeGeneratorARM* const codegen_;
   InvokeDexCallingConventionVisitorARM parameter_visitor_;
 
@@ -205,6 +209,9 @@
   // the suspend call.
   void GenerateSuspendCheck(HSuspendCheck* check, HBasicBlock* successor);
   void GenerateClassInitializationCheck(SlowPathCode* slow_path, Register class_reg);
+  void GenerateAndConst(Register out, Register first, uint32_t value);
+  void GenerateOrrConst(Register out, Register first, uint32_t value);
+  void GenerateEorConst(Register out, Register first, uint32_t value);
   void HandleBitwiseOperation(HBinaryOperation* operation);
   void HandleShift(HBinaryOperation* operation);
   void GenerateMemoryBarrier(MemBarrierKind kind);
diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h
index 5e8f9e7..7799437 100644
--- a/compiler/optimizing/code_generator_mips64.h
+++ b/compiler/optimizing/code_generator_mips64.h
@@ -117,7 +117,7 @@
     return Location::RegisterLocation(A0);
   }
   Location GetReturnLocation(Primitive::Type type ATTRIBUTE_UNUSED) const OVERRIDE {
-    return Location::RegisterLocation(A0);
+    return Location::RegisterLocation(V0);
   }
   Location GetSetValueLocation(
       Primitive::Type type ATTRIBUTE_UNUSED, bool is_instance) const OVERRIDE {
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index f8be21a..b60eebf 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -521,7 +521,8 @@
       move_resolver_(graph->GetArena(), this),
       isa_features_(isa_features),
       method_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
-      relative_call_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {
+      relative_call_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      fixups_to_jump_tables_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {
   // Use a fake return address register to mimic Quick.
   AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister));
 }
@@ -5669,6 +5670,51 @@
   }
 }
 
+void LocationsBuilderX86::VisitX86PackedSwitch(HX86PackedSwitch* switch_instr) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(switch_instr, LocationSummary::kNoCall);
+  locations->SetInAt(0, Location::RequiresRegister());
+
+  // Constant area pointer.
+  locations->SetInAt(1, Location::RequiresRegister());
+
+  // And the temporary we need.
+  locations->AddTemp(Location::RequiresRegister());
+}
+
+void InstructionCodeGeneratorX86::VisitX86PackedSwitch(HX86PackedSwitch* switch_instr) {
+  int32_t lower_bound = switch_instr->GetStartValue();
+  int32_t num_entries = switch_instr->GetNumEntries();
+  LocationSummary* locations = switch_instr->GetLocations();
+  Register value_reg = locations->InAt(0).AsRegister<Register>();
+  HBasicBlock* default_block = switch_instr->GetDefaultBlock();
+
+  // Optimizing has a jump area.
+  Register temp_reg = locations->GetTemp(0).AsRegister<Register>();
+  Register constant_area = locations->InAt(1).AsRegister<Register>();
+
+  // Remove the bias, if needed.
+  if (lower_bound != 0) {
+    __ leal(temp_reg, Address(value_reg, -lower_bound));
+    value_reg = temp_reg;
+  }
+
+  // Is the value in range?
+  DCHECK_GE(num_entries, 1);
+  __ cmpl(value_reg, Immediate(num_entries - 1));
+  __ j(kAbove, codegen_->GetLabelOf(default_block));
+
+  // We are in the range of the table.
+  // Load (target-constant_area) from the jump table, indexing by the value.
+  __ movl(temp_reg, codegen_->LiteralCaseTable(switch_instr, constant_area, value_reg));
+
+  // Compute the actual target address by adding in constant_area.
+  __ addl(temp_reg, constant_area);
+
+  // And jump.
+  __ jmp(temp_reg);
+}
+
 void LocationsBuilderX86::VisitX86ComputeBaseMethodAddress(
     HX86ComputeBaseMethodAddress* insn) {
   LocationSummary* locations =
@@ -5752,28 +5798,18 @@
   }
 }
 
-void CodeGeneratorX86::Finalize(CodeAllocator* allocator) {
-  // Generate the constant area if needed.
-  X86Assembler* assembler = GetAssembler();
-  if (!assembler->IsConstantAreaEmpty()) {
-    // Align to 4 byte boundary to reduce cache misses, as the data is 4 and 8
-    // byte values.
-    assembler->Align(4, 0);
-    constant_area_start_ = assembler->CodeSize();
-    assembler->AddConstantArea();
-  }
-
-  // And finish up.
-  CodeGenerator::Finalize(allocator);
-}
-
 /**
  * Class to handle late fixup of offsets into constant area.
  */
 class RIPFixup : public AssemblerFixup, public ArenaObject<kArenaAllocCodeGenerator> {
  public:
-  RIPFixup(const CodeGeneratorX86& codegen, int offset)
-      : codegen_(codegen), offset_into_constant_area_(offset) {}
+  RIPFixup(CodeGeneratorX86& codegen, size_t offset)
+      : codegen_(&codegen), offset_into_constant_area_(offset) {}
+
+ protected:
+  void SetOffset(size_t offset) { offset_into_constant_area_ = offset; }
+
+  CodeGeneratorX86* codegen_;
 
  private:
   void Process(const MemoryRegion& region, int pos) OVERRIDE {
@@ -5781,19 +5817,77 @@
     // last 4 bytes of the instruction.
     // The value to patch is the distance from the offset in the constant area
     // from the address computed by the HX86ComputeBaseMethodAddress instruction.
-    int32_t constant_offset = codegen_.ConstantAreaStart() + offset_into_constant_area_;
-    int32_t relative_position = constant_offset - codegen_.GetMethodAddressOffset();;
+    int32_t constant_offset = codegen_->ConstantAreaStart() + offset_into_constant_area_;
+    int32_t relative_position = constant_offset - codegen_->GetMethodAddressOffset();;
 
     // Patch in the right value.
     region.StoreUnaligned<int32_t>(pos - 4, relative_position);
   }
 
-  const CodeGeneratorX86& codegen_;
-
   // Location in constant area that the fixup refers to.
-  int offset_into_constant_area_;
+  int32_t offset_into_constant_area_;
 };
 
+/**
+ * Class to handle late fixup of offsets to a jump table that will be created in the
+ * constant area.
+ */
+class JumpTableRIPFixup : public RIPFixup {
+ public:
+  JumpTableRIPFixup(CodeGeneratorX86& codegen, HX86PackedSwitch* switch_instr)
+      : RIPFixup(codegen, static_cast<size_t>(-1)), switch_instr_(switch_instr) {}
+
+  void CreateJumpTable() {
+    X86Assembler* assembler = codegen_->GetAssembler();
+
+    // Ensure that the reference to the jump table has the correct offset.
+    const int32_t offset_in_constant_table = assembler->ConstantAreaSize();
+    SetOffset(offset_in_constant_table);
+
+    // The label values in the jump table are computed relative to the
+    // instruction addressing the constant area.
+    const int32_t relative_offset = codegen_->GetMethodAddressOffset();
+
+    // Populate the jump table with the correct values for the jump table.
+    int32_t num_entries = switch_instr_->GetNumEntries();
+    HBasicBlock* block = switch_instr_->GetBlock();
+    const ArenaVector<HBasicBlock*>& successors = block->GetSuccessors();
+    // The value that we want is the target offset - the position of the table.
+    for (int32_t i = 0; i < num_entries; i++) {
+      HBasicBlock* b = successors[i];
+      Label* l = codegen_->GetLabelOf(b);
+      DCHECK(l->IsBound());
+      int32_t offset_to_block = l->Position() - relative_offset;
+      assembler->AppendInt32(offset_to_block);
+    }
+  }
+
+ private:
+  const HX86PackedSwitch* switch_instr_;
+};
+
+void CodeGeneratorX86::Finalize(CodeAllocator* allocator) {
+  // Generate the constant area if needed.
+  X86Assembler* assembler = GetAssembler();
+  if (!assembler->IsConstantAreaEmpty() || !fixups_to_jump_tables_.empty()) {
+    // Align to 4 byte boundary to reduce cache misses, as the data is 4 and 8
+    // byte values.
+    assembler->Align(4, 0);
+    constant_area_start_ = assembler->CodeSize();
+
+    // Populate any jump tables.
+    for (auto jump_table : fixups_to_jump_tables_) {
+      jump_table->CreateJumpTable();
+    }
+
+    // And now add the constant area to the generated code.
+    assembler->AddConstantArea();
+  }
+
+  // And finish up.
+  CodeGenerator::Finalize(allocator);
+}
+
 Address CodeGeneratorX86::LiteralDoubleAddress(double v, Register reg) {
   AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, __ AddDouble(v));
   return Address(reg, kDummy32BitOffset, fixup);
@@ -5814,6 +5908,20 @@
   return Address(reg, kDummy32BitOffset, fixup);
 }
 
+Address CodeGeneratorX86::LiteralCaseTable(HX86PackedSwitch* switch_instr,
+                                           Register reg,
+                                           Register value) {
+  // Create a fixup to be used to create and address the jump table.
+  JumpTableRIPFixup* table_fixup =
+      new (GetGraph()->GetArena()) JumpTableRIPFixup(*this, switch_instr);
+
+  // We have to populate the jump tables.
+  fixups_to_jump_tables_.push_back(table_fixup);
+
+  // We want a scaled address, as we are extracting the correct offset from the table.
+  return Address(reg, value, TIMES_4, kDummy32BitOffset, table_fixup);
+}
+
 /**
  * Finds instructions that need the constant area base as an input.
  */
@@ -5864,6 +5972,21 @@
     }
   }
 
+  void VisitPackedSwitch(HPackedSwitch* switch_insn) OVERRIDE {
+    // We need to replace the HPackedSwitch with a HX86PackedSwitch in order to
+    // address the constant area.
+    InitializeConstantAreaPointer(switch_insn);
+    HGraph* graph = GetGraph();
+    HBasicBlock* block = switch_insn->GetBlock();
+    HX86PackedSwitch* x86_switch = new (graph->GetArena()) HX86PackedSwitch(
+        switch_insn->GetStartValue(),
+        switch_insn->GetNumEntries(),
+        switch_insn->InputAt(0),
+        base_,
+        switch_insn->GetDexPc());
+    block->ReplaceAndRemoveInstructionWith(switch_insn, x86_switch);
+  }
+
   void InitializeConstantAreaPointer(HInstruction* user) {
     // Ensure we only initialize the pointer once.
     if (base_ != nullptr) {
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index ae2d84f..fdfc5ab 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -245,6 +245,8 @@
   DISALLOW_COPY_AND_ASSIGN(InstructionCodeGeneratorX86);
 };
 
+class JumpTableRIPFixup;
+
 class CodeGeneratorX86 : public CodeGenerator {
  public:
   CodeGeneratorX86(HGraph* graph,
@@ -385,6 +387,8 @@
   Address LiteralInt32Address(int32_t v, Register reg);
   Address LiteralInt64Address(int64_t v, Register reg);
 
+  Address LiteralCaseTable(HX86PackedSwitch* switch_instr, Register reg, Register value);
+
   void Finalize(CodeAllocator* allocator) OVERRIDE;
 
  private:
@@ -405,6 +409,9 @@
   // Used for fixups to the constant area.
   int32_t constant_area_start_;
 
+  // Fixups for jump tables that need to be patched after the constant table is generated.
+  ArenaVector<JumpTableRIPFixup*> fixups_to_jump_tables_;
+
   // If there is a HX86ComputeBaseMethodAddress instruction in the graph
   // (which shall be the sole instruction of this kind), subtracting this offset
   // from the value contained in the out register of this HX86ComputeBaseMethodAddress
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 21120a0..f0d9420 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -670,7 +670,8 @@
         constant_area_start_(0),
         method_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
         relative_call_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
-        pc_rel_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {
+        pc_rel_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+        fixups_to_jump_tables_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {
   AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister));
 }
 
@@ -5322,31 +5323,43 @@
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(switch_instr, LocationSummary::kNoCall);
   locations->SetInAt(0, Location::RequiresRegister());
+  locations->AddTemp(Location::RequiresRegister());
+  locations->AddTemp(Location::RequiresRegister());
 }
 
 void InstructionCodeGeneratorX86_64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
   int32_t lower_bound = switch_instr->GetStartValue();
   int32_t num_entries = switch_instr->GetNumEntries();
   LocationSummary* locations = switch_instr->GetLocations();
-  CpuRegister value_reg = locations->InAt(0).AsRegister<CpuRegister>();
+  CpuRegister value_reg_in = locations->InAt(0).AsRegister<CpuRegister>();
+  CpuRegister temp_reg = locations->GetTemp(0).AsRegister<CpuRegister>();
+  CpuRegister base_reg = locations->GetTemp(1).AsRegister<CpuRegister>();
+
+  // Remove the bias, if needed.
+  Register value_reg_out = value_reg_in.AsRegister();
+  if (lower_bound != 0) {
+    __ leal(temp_reg, Address(value_reg_in, -lower_bound));
+    value_reg_out = temp_reg.AsRegister();
+  }
+  CpuRegister value_reg(value_reg_out);
+
+  // Is the value in range?
   HBasicBlock* default_block = switch_instr->GetDefaultBlock();
+  __ cmpl(value_reg, Immediate(num_entries - 1));
+  __ j(kAbove, codegen_->GetLabelOf(default_block));
 
-  // Create a series of compare/jumps.
-  const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
-  for (int i = 0; i < num_entries; i++) {
-    int32_t case_value = lower_bound + i;
-    if (case_value == 0) {
-      __ testl(value_reg, value_reg);
-    } else {
-      __ cmpl(value_reg, Immediate(case_value));
-    }
-    __ j(kEqual, codegen_->GetLabelOf(successors[i]));
-  }
+  // We are in the range of the table.
+  // Load the address of the jump table in the constant area.
+  __ leaq(base_reg, codegen_->LiteralCaseTable(switch_instr));
 
-  // And the default for any other value.
-  if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) {
-      __ jmp(codegen_->GetLabelOf(default_block));
-  }
+  // Load the (signed) offset from the jump table.
+  __ movsxd(temp_reg, Address(base_reg, value_reg, TIMES_4, 0));
+
+  // Add the offset to the address of the table base.
+  __ addq(temp_reg, base_reg);
+
+  // And jump.
+  __ jmp(temp_reg);
 }
 
 void CodeGeneratorX86_64::Load64BitValue(CpuRegister dest, int64_t value) {
@@ -5372,15 +5385,85 @@
   }
 }
 
+/**
+ * Class to handle late fixup of offsets into constant area.
+ */
+class RIPFixup : public AssemblerFixup, public ArenaObject<kArenaAllocCodeGenerator> {
+ public:
+  RIPFixup(CodeGeneratorX86_64& codegen, size_t offset)
+      : codegen_(&codegen), offset_into_constant_area_(offset) {}
+
+ protected:
+  void SetOffset(size_t offset) { offset_into_constant_area_ = offset; }
+
+  CodeGeneratorX86_64* codegen_;
+
+ private:
+  void Process(const MemoryRegion& region, int pos) OVERRIDE {
+    // Patch the correct offset for the instruction.  We use the address of the
+    // 'next' instruction, which is 'pos' (patch the 4 bytes before).
+    int32_t constant_offset = codegen_->ConstantAreaStart() + offset_into_constant_area_;
+    int32_t relative_position = constant_offset - pos;
+
+    // Patch in the right value.
+    region.StoreUnaligned<int32_t>(pos - 4, relative_position);
+  }
+
+  // Location in constant area that the fixup refers to.
+  size_t offset_into_constant_area_;
+};
+
+/**
+ t * Class to handle late fixup of offsets to a jump table that will be created in the
+ * constant area.
+ */
+class JumpTableRIPFixup : public RIPFixup {
+ public:
+  JumpTableRIPFixup(CodeGeneratorX86_64& codegen, HPackedSwitch* switch_instr)
+      : RIPFixup(codegen, -1), switch_instr_(switch_instr) {}
+
+  void CreateJumpTable() {
+    X86_64Assembler* assembler = codegen_->GetAssembler();
+
+    // Ensure that the reference to the jump table has the correct offset.
+    const int32_t offset_in_constant_table = assembler->ConstantAreaSize();
+    SetOffset(offset_in_constant_table);
+
+    // Compute the offset from the start of the function to this jump table.
+    const int32_t current_table_offset = assembler->CodeSize() + offset_in_constant_table;
+
+    // Populate the jump table with the correct values for the jump table.
+    int32_t num_entries = switch_instr_->GetNumEntries();
+    HBasicBlock* block = switch_instr_->GetBlock();
+    const ArenaVector<HBasicBlock*>& successors = block->GetSuccessors();
+    // The value that we want is the target offset - the position of the table.
+    for (int32_t i = 0; i < num_entries; i++) {
+      HBasicBlock* b = successors[i];
+      Label* l = codegen_->GetLabelOf(b);
+      DCHECK(l->IsBound());
+      int32_t offset_to_block = l->Position() - current_table_offset;
+      assembler->AppendInt32(offset_to_block);
+    }
+  }
+
+ private:
+  const HPackedSwitch* switch_instr_;
+};
+
 void CodeGeneratorX86_64::Finalize(CodeAllocator* allocator) {
   // Generate the constant area if needed.
   X86_64Assembler* assembler = GetAssembler();
-  if (!assembler->IsConstantAreaEmpty()) {
-    // Align to 4 byte boundary to reduce cache misses, as the data is 4 and 8
-    // byte values.  If used for vectors at a later time, this will need to be
-    // updated to 16 bytes with the appropriate offset.
+  if (!assembler->IsConstantAreaEmpty() || !fixups_to_jump_tables_.empty()) {
+    // Align to 4 byte boundary to reduce cache misses, as the data is 4 and 8 byte values.
     assembler->Align(4, 0);
     constant_area_start_ = assembler->CodeSize();
+
+    // Populate any jump tables.
+    for (auto jump_table : fixups_to_jump_tables_) {
+      jump_table->CreateJumpTable();
+    }
+
+    // And now add the constant area to the generated code.
     assembler->AddConstantArea();
   }
 
@@ -5388,31 +5471,6 @@
   CodeGenerator::Finalize(allocator);
 }
 
-/**
- * Class to handle late fixup of offsets into constant area.
- */
-class RIPFixup : public AssemblerFixup, public ArenaObject<kArenaAllocCodeGenerator> {
-  public:
-    RIPFixup(const CodeGeneratorX86_64& codegen, int offset)
-      : codegen_(codegen), offset_into_constant_area_(offset) {}
-
-  private:
-    void Process(const MemoryRegion& region, int pos) OVERRIDE {
-      // Patch the correct offset for the instruction.  We use the address of the
-      // 'next' instruction, which is 'pos' (patch the 4 bytes before).
-      int constant_offset = codegen_.ConstantAreaStart() + offset_into_constant_area_;
-      int relative_position = constant_offset - pos;
-
-      // Patch in the right value.
-      region.StoreUnaligned<int32_t>(pos - 4, relative_position);
-    }
-
-    const CodeGeneratorX86_64& codegen_;
-
-    // Location in constant area that the fixup refers to.
-    int offset_into_constant_area_;
-};
-
 Address CodeGeneratorX86_64::LiteralDoubleAddress(double v) {
   AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, __ AddDouble(v));
   return Address::RIP(fixup);
@@ -5453,6 +5511,16 @@
   GetMoveResolver()->EmitNativeCode(&parallel_move);
 }
 
+Address CodeGeneratorX86_64::LiteralCaseTable(HPackedSwitch* switch_instr) {
+  // Create a fixup to be used to create and address the jump table.
+  JumpTableRIPFixup* table_fixup =
+      new (GetGraph()->GetArena()) JumpTableRIPFixup(*this, switch_instr);
+
+  // We have to populate the jump tables.
+  fixups_to_jump_tables_.push_back(table_fixup);
+  return Address::RIP(table_fixup);
+}
+
 #undef __
 
 }  // namespace x86_64
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index d6a6a7e..dc86a48 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -234,6 +234,9 @@
   DISALLOW_COPY_AND_ASSIGN(InstructionCodeGeneratorX86_64);
 };
 
+// Class for fixups to jump tables.
+class JumpTableRIPFixup;
+
 class CodeGeneratorX86_64 : public CodeGenerator {
  public:
   CodeGeneratorX86_64(HGraph* graph,
@@ -354,6 +357,7 @@
 
   // Load a 64 bit value into a register in the most efficient manner.
   void Load64BitValue(CpuRegister dest, int64_t value);
+  Address LiteralCaseTable(HPackedSwitch* switch_instr);
 
   // Store a 64 bit value into a DoubleStackSlot in the most efficient manner.
   void Store64BitValueToStack(Location dest, int64_t value);
@@ -391,6 +395,9 @@
   // We will fix this up in the linker later to have the right value.
   static constexpr int32_t kDummy32BitOffset = 256;
 
+  // Fixups for jump tables need to be handled specially.
+  ArenaVector<JumpTableRIPFixup*> fixups_to_jump_tables_;
+
   DISALLOW_COPY_AND_ASSIGN(CodeGeneratorX86_64);
 };
 
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
index 2793793..58e479a 100644
--- a/compiler/optimizing/intrinsics_arm.cc
+++ b/compiler/optimizing/intrinsics_arm.cc
@@ -1307,6 +1307,308 @@
   __ Bind(slow_path->GetExitLabel());
 }
 
+void IntrinsicLocationsBuilderARM::VisitSystemArrayCopy(HInvoke* invoke) {
+  CodeGenerator::CreateSystemArrayCopyLocationSummary(invoke);
+  LocationSummary* locations = invoke->GetLocations();
+  if (locations == nullptr) {
+    return;
+  }
+
+  HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant();
+  HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant();
+  HIntConstant* length = invoke->InputAt(4)->AsIntConstant();
+
+  if (src_pos != nullptr && !assembler_->ShifterOperandCanAlwaysHold(src_pos->GetValue())) {
+    locations->SetInAt(1, Location::RequiresRegister());
+  }
+  if (dest_pos != nullptr && !assembler_->ShifterOperandCanAlwaysHold(dest_pos->GetValue())) {
+    locations->SetInAt(3, Location::RequiresRegister());
+  }
+  if (length != nullptr && !assembler_->ShifterOperandCanAlwaysHold(length->GetValue())) {
+    locations->SetInAt(4, Location::RequiresRegister());
+  }
+}
+
+static void CheckPosition(ArmAssembler* assembler,
+                          Location pos,
+                          Register input,
+                          Location length,
+                          SlowPathCode* slow_path,
+                          Register input_len,
+                          Register temp,
+                          bool length_is_input_length = false) {
+  // Where is the length in the Array?
+  const uint32_t length_offset = mirror::Array::LengthOffset().Uint32Value();
+
+  if (pos.IsConstant()) {
+    int32_t pos_const = pos.GetConstant()->AsIntConstant()->GetValue();
+    if (pos_const == 0) {
+      if (!length_is_input_length) {
+        // Check that length(input) >= length.
+        __ LoadFromOffset(kLoadWord, temp, input, length_offset);
+        if (length.IsConstant()) {
+          __ cmp(temp, ShifterOperand(length.GetConstant()->AsIntConstant()->GetValue()));
+        } else {
+          __ cmp(temp, ShifterOperand(length.AsRegister<Register>()));
+        }
+        __ b(slow_path->GetEntryLabel(), LT);
+      }
+    } else {
+      // Check that length(input) >= pos.
+      __ LoadFromOffset(kLoadWord, input_len, input, length_offset);
+      __ subs(temp, input_len, ShifterOperand(pos_const));
+      __ b(slow_path->GetEntryLabel(), LT);
+
+      // Check that (length(input) - pos) >= length.
+      if (length.IsConstant()) {
+        __ cmp(temp, ShifterOperand(length.GetConstant()->AsIntConstant()->GetValue()));
+      } else {
+        __ cmp(temp, ShifterOperand(length.AsRegister<Register>()));
+      }
+      __ b(slow_path->GetEntryLabel(), LT);
+    }
+  } else if (length_is_input_length) {
+    // The only way the copy can succeed is if pos is zero.
+    Register pos_reg = pos.AsRegister<Register>();
+    __ CompareAndBranchIfNonZero(pos_reg, slow_path->GetEntryLabel());
+  } else {
+    // Check that pos >= 0.
+    Register pos_reg = pos.AsRegister<Register>();
+    __ cmp(pos_reg, ShifterOperand(0));
+    __ b(slow_path->GetEntryLabel(), LT);
+
+    // Check that pos <= length(input).
+    __ LoadFromOffset(kLoadWord, temp, input, length_offset);
+    __ subs(temp, temp, ShifterOperand(pos_reg));
+    __ b(slow_path->GetEntryLabel(), LT);
+
+    // Check that (length(input) - pos) >= length.
+    if (length.IsConstant()) {
+      __ cmp(temp, ShifterOperand(length.GetConstant()->AsIntConstant()->GetValue()));
+    } else {
+      __ cmp(temp, ShifterOperand(length.AsRegister<Register>()));
+    }
+    __ b(slow_path->GetEntryLabel(), LT);
+  }
+}
+
+void IntrinsicCodeGeneratorARM::VisitSystemArrayCopy(HInvoke* invoke) {
+  ArmAssembler* assembler = GetAssembler();
+  LocationSummary* locations = invoke->GetLocations();
+
+  uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
+  uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
+  uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
+  uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
+
+  Register src = locations->InAt(0).AsRegister<Register>();
+  Location src_pos = locations->InAt(1);
+  Register dest = locations->InAt(2).AsRegister<Register>();
+  Location dest_pos = locations->InAt(3);
+  Location length = locations->InAt(4);
+  Register temp1 = locations->GetTemp(0).AsRegister<Register>();
+  Register temp2 = locations->GetTemp(1).AsRegister<Register>();
+  Register temp3 = locations->GetTemp(2).AsRegister<Register>();
+
+  SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathARM(invoke);
+  codegen_->AddSlowPath(slow_path);
+
+  Label ok;
+  SystemArrayCopyOptimizations optimizations(invoke);
+
+  if (!optimizations.GetDestinationIsSource()) {
+    if (!src_pos.IsConstant() || !dest_pos.IsConstant()) {
+      __ cmp(src, ShifterOperand(dest));
+    }
+  }
+
+  // If source and destination are the same, we go to slow path if we need to do
+  // forward copying.
+  if (src_pos.IsConstant()) {
+    int32_t src_pos_constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
+    if (dest_pos.IsConstant()) {
+      // Checked when building locations.
+      DCHECK(!optimizations.GetDestinationIsSource()
+             || (src_pos_constant >= dest_pos.GetConstant()->AsIntConstant()->GetValue()));
+    } else {
+      if (!optimizations.GetDestinationIsSource()) {
+        __ b(&ok, NE);
+      }
+      __ cmp(dest_pos.AsRegister<Register>(), ShifterOperand(src_pos_constant));
+      __ b(slow_path->GetEntryLabel(), GT);
+    }
+  } else {
+    if (!optimizations.GetDestinationIsSource()) {
+      __ b(&ok, NE);
+    }
+    if (dest_pos.IsConstant()) {
+      int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
+      __ cmp(src_pos.AsRegister<Register>(), ShifterOperand(dest_pos_constant));
+    } else {
+      __ cmp(src_pos.AsRegister<Register>(), ShifterOperand(dest_pos.AsRegister<Register>()));
+    }
+    __ b(slow_path->GetEntryLabel(), LT);
+  }
+
+  __ Bind(&ok);
+
+  if (!optimizations.GetSourceIsNotNull()) {
+    // Bail out if the source is null.
+    __ CompareAndBranchIfZero(src, slow_path->GetEntryLabel());
+  }
+
+  if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
+    // Bail out if the destination is null.
+    __ CompareAndBranchIfZero(dest, slow_path->GetEntryLabel());
+  }
+
+  // If the length is negative, bail out.
+  // We have already checked in the LocationsBuilder for the constant case.
+  if (!length.IsConstant() &&
+      !optimizations.GetCountIsSourceLength() &&
+      !optimizations.GetCountIsDestinationLength()) {
+    __ cmp(length.AsRegister<Register>(), ShifterOperand(0));
+    __ b(slow_path->GetEntryLabel(), LT);
+  }
+
+  // Validity checks: source.
+  CheckPosition(assembler,
+                src_pos,
+                src,
+                length,
+                slow_path,
+                temp1,
+                temp2,
+                optimizations.GetCountIsSourceLength());
+
+  // Validity checks: dest.
+  CheckPosition(assembler,
+                dest_pos,
+                dest,
+                length,
+                slow_path,
+                temp1,
+                temp2,
+                optimizations.GetCountIsDestinationLength());
+
+  if (!optimizations.GetDoesNotNeedTypeCheck()) {
+    // Check whether all elements of the source array are assignable to the component
+    // type of the destination array. We do two checks: the classes are the same,
+    // or the destination is Object[]. If none of these checks succeed, we go to the
+    // slow path.
+    __ LoadFromOffset(kLoadWord, temp1, dest, class_offset);
+    __ LoadFromOffset(kLoadWord, temp2, src, class_offset);
+    bool did_unpoison = false;
+    if (!optimizations.GetDestinationIsNonPrimitiveArray() ||
+        !optimizations.GetSourceIsNonPrimitiveArray()) {
+      // One or two of the references need to be unpoisoned. Unpoisoned them
+      // both to make the identity check valid.
+      __ MaybeUnpoisonHeapReference(temp1);
+      __ MaybeUnpoisonHeapReference(temp2);
+      did_unpoison = true;
+    }
+
+    if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
+      // Bail out if the destination is not a non primitive array.
+      __ LoadFromOffset(kLoadWord, temp3, temp1, component_offset);
+      __ CompareAndBranchIfZero(temp3, slow_path->GetEntryLabel());
+      __ MaybeUnpoisonHeapReference(temp3);
+      __ LoadFromOffset(kLoadUnsignedHalfword, temp3, temp3, primitive_offset);
+      static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
+      __ CompareAndBranchIfNonZero(temp3, slow_path->GetEntryLabel());
+    }
+
+    if (!optimizations.GetSourceIsNonPrimitiveArray()) {
+      // Bail out if the source is not a non primitive array.
+      // Bail out if the destination is not a non primitive array.
+      __ LoadFromOffset(kLoadWord, temp3, temp2, component_offset);
+      __ CompareAndBranchIfZero(temp3, slow_path->GetEntryLabel());
+      __ MaybeUnpoisonHeapReference(temp3);
+      __ LoadFromOffset(kLoadUnsignedHalfword, temp3, temp3, primitive_offset);
+      static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
+      __ CompareAndBranchIfNonZero(temp3, slow_path->GetEntryLabel());
+    }
+
+    __ cmp(temp1, ShifterOperand(temp2));
+
+    if (optimizations.GetDestinationIsTypedObjectArray()) {
+      Label do_copy;
+      __ b(&do_copy, EQ);
+      if (!did_unpoison) {
+        __ MaybeUnpoisonHeapReference(temp1);
+      }
+      __ LoadFromOffset(kLoadWord, temp1, temp1, component_offset);
+      __ MaybeUnpoisonHeapReference(temp1);
+      __ LoadFromOffset(kLoadWord, temp1, temp1, super_offset);
+      // No need to unpoison the result, we're comparing against null.
+      __ CompareAndBranchIfNonZero(temp1, slow_path->GetEntryLabel());
+      __ Bind(&do_copy);
+    } else {
+      __ b(slow_path->GetEntryLabel(), NE);
+    }
+  } else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
+    DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
+    // Bail out if the source is not a non primitive array.
+    __ LoadFromOffset(kLoadWord, temp1, src, class_offset);
+    __ MaybeUnpoisonHeapReference(temp1);
+    __ LoadFromOffset(kLoadWord, temp3, temp1, component_offset);
+    __ CompareAndBranchIfZero(temp3, slow_path->GetEntryLabel());
+    __ MaybeUnpoisonHeapReference(temp3);
+    __ LoadFromOffset(kLoadUnsignedHalfword, temp3, temp3, primitive_offset);
+    static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
+    __ CompareAndBranchIfNonZero(temp3, slow_path->GetEntryLabel());
+  }
+
+  // Compute base source address, base destination address, and end source address.
+
+  uint32_t element_size = sizeof(int32_t);
+  uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value();
+  if (src_pos.IsConstant()) {
+    int32_t constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
+    __ AddConstant(temp1, src, element_size * constant + offset);
+  } else {
+    __ add(temp1, src, ShifterOperand(src_pos.AsRegister<Register>(), LSL, 2));
+    __ AddConstant(temp1, offset);
+  }
+
+  if (dest_pos.IsConstant()) {
+    int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
+    __ AddConstant(temp2, dest, element_size * constant + offset);
+  } else {
+    __ add(temp2, dest, ShifterOperand(dest_pos.AsRegister<Register>(), LSL, 2));
+    __ AddConstant(temp2, offset);
+  }
+
+  if (length.IsConstant()) {
+    int32_t constant = length.GetConstant()->AsIntConstant()->GetValue();
+    __ AddConstant(temp3, temp1, element_size * constant);
+  } else {
+    __ add(temp3, temp1, ShifterOperand(length.AsRegister<Register>(), LSL, 2));
+  }
+
+  // Iterate over the arrays and do a raw copy of the objects. We don't need to
+  // poison/unpoison, nor do any read barrier as the next uses of the destination
+  // array will do it.
+  Label loop, done;
+  __ cmp(temp1, ShifterOperand(temp3));
+  __ b(&done, EQ);
+  __ Bind(&loop);
+  __ ldr(IP, Address(temp1, element_size, Address::PostIndex));
+  __ str(IP, Address(temp2, element_size, Address::PostIndex));
+  __ cmp(temp1, ShifterOperand(temp3));
+  __ b(&loop, NE);
+  __ Bind(&done);
+
+  // We only need one card marking on the destination array.
+  codegen_->MarkGCCard(temp1,
+                       temp2,
+                       dest,
+                       Register(kNoRegister),
+                       false);
+
+  __ Bind(slow_path->GetExitLabel());
+}
+
 // Unimplemented intrinsics.
 
 #define UNIMPLEMENTED_INTRINSIC(Name)                                                  \
@@ -1333,7 +1635,6 @@
 UNIMPLEMENTED_INTRINSIC(MathRoundFloat)    // Could be done by changing rounding mode, maybe?
 UNIMPLEMENTED_INTRINSIC(UnsafeCASLong)     // High register pressure.
 UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar)
-UNIMPLEMENTED_INTRINSIC(SystemArrayCopy)
 UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
 UNIMPLEMENTED_INTRINSIC(StringGetCharsNoCheck)
 
diff --git a/compiler/optimizing/intrinsics_arm.h b/compiler/optimizing/intrinsics_arm.h
index 2abb605..127e9a4 100644
--- a/compiler/optimizing/intrinsics_arm.h
+++ b/compiler/optimizing/intrinsics_arm.h
@@ -33,8 +33,10 @@
 
 class IntrinsicLocationsBuilderARM FINAL : public IntrinsicVisitor {
  public:
-  IntrinsicLocationsBuilderARM(ArenaAllocator* arena, const ArmInstructionSetFeatures& features)
-      : arena_(arena), features_(features) {}
+  IntrinsicLocationsBuilderARM(ArenaAllocator* arena,
+                               ArmAssembler* assembler,
+                               const ArmInstructionSetFeatures& features)
+      : arena_(arena), assembler_(assembler), features_(features) {}
 
   // Define visitor methods.
 
@@ -52,6 +54,7 @@
 
  private:
   ArenaAllocator* arena_;
+  ArmAssembler* assembler_;
 
   const ArmInstructionSetFeatures& features_;
 
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index 1061aae..e0d88a9 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -914,55 +914,7 @@
 
 
 void IntrinsicLocationsBuilderX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
-  // Check to see if we have known failures that will cause us to have to bail out
-  // to the runtime, and just generate the runtime call directly.
-  HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant();
-  HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant();
-
-  // The positions must be non-negative.
-  if ((src_pos != nullptr && src_pos->GetValue() < 0) ||
-      (dest_pos != nullptr && dest_pos->GetValue() < 0)) {
-    // We will have to fail anyways.
-    return;
-  }
-
-  // The length must be > 0.
-  HIntConstant* length = invoke->InputAt(4)->AsIntConstant();
-  if (length != nullptr) {
-    int32_t len = length->GetValue();
-    if (len < 0) {
-      // Just call as normal.
-      return;
-    }
-  }
-
-  SystemArrayCopyOptimizations optimizations(invoke);
-
-  if (optimizations.GetDestinationIsSource()) {
-    if (src_pos != nullptr && dest_pos != nullptr && src_pos->GetValue() < dest_pos->GetValue()) {
-      // We only support backward copying if source and destination are the same.
-      return;
-    }
-  }
-
-  if (optimizations.GetDestinationIsPrimitiveArray() || optimizations.GetSourceIsPrimitiveArray()) {
-    // We currently don't intrinsify primitive copying.
-    return;
-  }
-
-  LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCallOnSlowPath,
-                                                            kIntrinsified);
-  // arraycopy(Object src, int src_pos, Object dest, int dest_pos, int length).
-  locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
-  locations->SetInAt(2, Location::RequiresRegister());
-  locations->SetInAt(3, Location::RegisterOrConstant(invoke->InputAt(3)));
-  locations->SetInAt(4, Location::RegisterOrConstant(invoke->InputAt(4)));
-
-  locations->AddTemp(Location::RequiresRegister());
-  locations->AddTemp(Location::RequiresRegister());
-  locations->AddTemp(Location::RequiresRegister());
+  CodeGenerator::CreateSystemArrayCopyLocationSummary(invoke);
 }
 
 void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
@@ -990,7 +942,9 @@
   SystemArrayCopyOptimizations optimizations(invoke);
 
   if (!optimizations.GetDestinationIsSource()) {
-    __ cmpl(src, dest);
+    if (!src_pos.IsConstant() || !dest_pos.IsConstant()) {
+      __ cmpl(src, dest);
+    }
   }
 
   // If source and destination are the same, we go to slow path if we need to do
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index 24a89bc..ed401b6 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -606,8 +606,23 @@
 void HBasicBlock::ReplaceAndRemoveInstructionWith(HInstruction* initial,
                                                   HInstruction* replacement) {
   DCHECK(initial->GetBlock() == this);
-  InsertInstructionBefore(replacement, initial);
-  initial->ReplaceWith(replacement);
+  if (initial->IsControlFlow()) {
+    // We can only replace a control flow instruction with another control flow instruction.
+    DCHECK(replacement->IsControlFlow());
+    DCHECK_EQ(replacement->GetId(), -1);
+    DCHECK_EQ(replacement->GetType(), Primitive::kPrimVoid);
+    DCHECK_EQ(initial->GetBlock(), this);
+    DCHECK_EQ(initial->GetType(), Primitive::kPrimVoid);
+    DCHECK(initial->GetUses().IsEmpty());
+    DCHECK(initial->GetEnvUses().IsEmpty());
+    replacement->SetBlock(this);
+    replacement->SetId(GetGraph()->GetNextInstructionId());
+    instructions_.InsertInstructionBefore(replacement, initial);
+    UpdateInputsUsers(replacement);
+  } else {
+    InsertInstructionBefore(replacement, initial);
+    initial->ReplaceWith(replacement);
+  }
   RemoveInstruction(initial);
 }
 
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 82909c4..0d668e8 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -1074,7 +1074,8 @@
 
 #define FOR_EACH_CONCRETE_INSTRUCTION_X86(M)                            \
   M(X86ComputeBaseMethodAddress, Instruction)                           \
-  M(X86LoadFromConstantTable, Instruction)
+  M(X86LoadFromConstantTable, Instruction)                              \
+  M(X86PackedSwitch, Instruction)
 
 #define FOR_EACH_CONCRETE_INSTRUCTION_X86_64(M)
 
diff --git a/compiler/optimizing/nodes_x86.h b/compiler/optimizing/nodes_x86.h
index f7cc872..556217b 100644
--- a/compiler/optimizing/nodes_x86.h
+++ b/compiler/optimizing/nodes_x86.h
@@ -62,6 +62,45 @@
   DISALLOW_COPY_AND_ASSIGN(HX86LoadFromConstantTable);
 };
 
+// X86 version of HPackedSwitch that holds a pointer to the base method address.
+class HX86PackedSwitch : public HTemplateInstruction<2> {
+ public:
+  HX86PackedSwitch(int32_t start_value,
+                   int32_t num_entries,
+                   HInstruction* input,
+                   HX86ComputeBaseMethodAddress* method_base,
+                   uint32_t dex_pc)
+    : HTemplateInstruction(SideEffects::None(), dex_pc),
+      start_value_(start_value),
+      num_entries_(num_entries) {
+    SetRawInputAt(0, input);
+    SetRawInputAt(1, method_base);
+  }
+
+  bool IsControlFlow() const OVERRIDE { return true; }
+
+  int32_t GetStartValue() const { return start_value_; }
+
+  int32_t GetNumEntries() const { return num_entries_; }
+
+  HX86ComputeBaseMethodAddress* GetBaseMethodAddress() const {
+    return InputAt(1)->AsX86ComputeBaseMethodAddress();
+  }
+
+  HBasicBlock* GetDefaultBlock() const {
+    // Last entry is the default block.
+    return GetBlock()->GetSuccessors()[num_entries_];
+  }
+
+  DECLARE_INSTRUCTION(X86PackedSwitch);
+
+ private:
+  const int32_t start_value_;
+  const int32_t num_entries_;
+
+  DISALLOW_COPY_AND_ASSIGN(HX86PackedSwitch);
+};
+
 }  // namespace art
 
 #endif  // ART_COMPILER_OPTIMIZING_NODES_X86_H_
diff --git a/compiler/utils/arm/assembler_arm.h b/compiler/utils/arm/assembler_arm.h
index 967b191..d59bc6b 100644
--- a/compiler/utils/arm/assembler_arm.h
+++ b/compiler/utils/arm/assembler_arm.h
@@ -470,6 +470,13 @@
     orr(rd, rn, so, cond, kCcSet);
   }
 
+  virtual void orn(Register rd, Register rn, const ShifterOperand& so,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) = 0;
+
+  virtual void orns(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) {
+    orn(rd, rn, so, cond, kCcSet);
+  }
+
   virtual void mov(Register rd, const ShifterOperand& so,
                    Condition cond = AL, SetCc set_cc = kCcDontCare) = 0;
 
@@ -832,6 +839,8 @@
                                      uint32_t immediate,
                                      ShifterOperand* shifter_op) = 0;
 
+  virtual bool ShifterOperandCanAlwaysHold(uint32_t immediate) = 0;
+
   static bool IsInstructionForExceptionHandling(uintptr_t pc);
 
   virtual void CompareAndBranchIfZero(Register r, Label* label) = 0;
diff --git a/compiler/utils/arm/assembler_arm32.cc b/compiler/utils/arm/assembler_arm32.cc
index f7772ae..6e7c828 100644
--- a/compiler/utils/arm/assembler_arm32.cc
+++ b/compiler/utils/arm/assembler_arm32.cc
@@ -48,6 +48,11 @@
   return false;
 }
 
+bool Arm32Assembler::ShifterOperandCanAlwaysHold(uint32_t immediate) {
+  ShifterOperand shifter_op;
+  return ShifterOperandCanHoldArm32(immediate, &shifter_op);
+}
+
 bool Arm32Assembler::ShifterOperandCanHold(Register rd ATTRIBUTE_UNUSED,
                                            Register rn ATTRIBUTE_UNUSED,
                                            Opcode opcode ATTRIBUTE_UNUSED,
@@ -130,6 +135,15 @@
 }
 
 
+void Arm32Assembler::orn(Register rd ATTRIBUTE_UNUSED,
+                         Register rn ATTRIBUTE_UNUSED,
+                         const ShifterOperand& so ATTRIBUTE_UNUSED,
+                         Condition cond ATTRIBUTE_UNUSED,
+                         SetCc set_cc ATTRIBUTE_UNUSED) {
+  LOG(FATAL) << "orn is not supported on ARM32";
+}
+
+
 void Arm32Assembler::mov(Register rd, const ShifterOperand& so,
                          Condition cond, SetCc set_cc) {
   EmitType01(cond, so.type(), MOV, set_cc, R0, rd, so);
diff --git a/compiler/utils/arm/assembler_arm32.h b/compiler/utils/arm/assembler_arm32.h
index 3407369..4646538 100644
--- a/compiler/utils/arm/assembler_arm32.h
+++ b/compiler/utils/arm/assembler_arm32.h
@@ -74,6 +74,9 @@
   virtual void orr(Register rd, Register rn, const ShifterOperand& so,
                    Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
 
+  virtual void orn(Register rd, Register rn, const ShifterOperand& so,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
+
   virtual void mov(Register rd, const ShifterOperand& so,
                    Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
 
@@ -294,6 +297,7 @@
                              uint32_t immediate,
                              ShifterOperand* shifter_op) OVERRIDE;
 
+  bool ShifterOperandCanAlwaysHold(uint32_t immediate) OVERRIDE;
 
   static bool IsInstructionForExceptionHandling(uintptr_t pc);
 
diff --git a/compiler/utils/arm/assembler_thumb2.cc b/compiler/utils/arm/assembler_thumb2.cc
index 0f6c4f5..cc87856 100644
--- a/compiler/utils/arm/assembler_thumb2.cc
+++ b/compiler/utils/arm/assembler_thumb2.cc
@@ -390,6 +390,10 @@
   EmitLiterals();
 }
 
+bool Thumb2Assembler::ShifterOperandCanAlwaysHold(uint32_t immediate) {
+  return ArmAssembler::ModifiedImmediate(immediate) != kInvalidModifiedImmediate;
+}
+
 bool Thumb2Assembler::ShifterOperandCanHold(Register rd ATTRIBUTE_UNUSED,
                                             Register rn ATTRIBUTE_UNUSED,
                                             Opcode opcode,
@@ -410,6 +414,7 @@
     case MOV:
       // TODO: Support less than or equal to 12bits.
       return ArmAssembler::ModifiedImmediate(immediate) != kInvalidModifiedImmediate;
+
     case MVN:
     default:
       return ArmAssembler::ModifiedImmediate(immediate) != kInvalidModifiedImmediate;
@@ -492,6 +497,12 @@
 }
 
 
+void Thumb2Assembler::orn(Register rd, Register rn, const ShifterOperand& so,
+                          Condition cond, SetCc set_cc) {
+  EmitDataProcessing(cond, ORN, set_cc, rn, rd, so);
+}
+
+
 void Thumb2Assembler::mov(Register rd, const ShifterOperand& so,
                           Condition cond, SetCc set_cc) {
   EmitDataProcessing(cond, MOV, set_cc, R0, rd, so);
@@ -1105,6 +1116,7 @@
       rn_is_valid = false;      // There is no Rn for these instructions.
       break;
     case TEQ:
+    case ORN:
       return true;
     case ADD:
     case SUB:
@@ -1222,6 +1234,7 @@
     case MOV: thumb_opcode =  2U /* 0b0010 */; rn = PC; break;
     case BIC: thumb_opcode =  1U /* 0b0001 */; break;
     case MVN: thumb_opcode =  3U /* 0b0011 */; rn = PC; break;
+    case ORN: thumb_opcode =  3U /* 0b0011 */; break;
     default:
       break;
   }
diff --git a/compiler/utils/arm/assembler_thumb2.h b/compiler/utils/arm/assembler_thumb2.h
index a1a8927..055b137 100644
--- a/compiler/utils/arm/assembler_thumb2.h
+++ b/compiler/utils/arm/assembler_thumb2.h
@@ -98,6 +98,9 @@
   virtual void orr(Register rd, Register rn, const ShifterOperand& so,
                    Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
 
+  virtual void orn(Register rd, Register rn, const ShifterOperand& so,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
+
   virtual void mov(Register rd, const ShifterOperand& so,
                    Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
 
@@ -337,6 +340,8 @@
                              uint32_t immediate,
                              ShifterOperand* shifter_op) OVERRIDE;
 
+  bool ShifterOperandCanAlwaysHold(uint32_t immediate) OVERRIDE;
+
 
   static bool IsInstructionForExceptionHandling(uintptr_t pc);
 
diff --git a/compiler/utils/arm/constants_arm.h b/compiler/utils/arm/constants_arm.h
index 6b4daed..2060064 100644
--- a/compiler/utils/arm/constants_arm.h
+++ b/compiler/utils/arm/constants_arm.h
@@ -148,7 +148,8 @@
   MOV = 13,  // Move
   BIC = 14,  // Bit Clear
   MVN = 15,  // Move Not
-  kMaxOperand = 16
+  ORN = 16,  // Logical OR NOT.
+  kMaxOperand = 17
 };
 std::ostream& operator<<(std::ostream& os, const Opcode& rhs);
 
diff --git a/compiler/utils/assembler_thumb_test.cc b/compiler/utils/assembler_thumb_test.cc
index b2a354b..2ae8841 100644
--- a/compiler/utils/assembler_thumb_test.cc
+++ b/compiler/utils/assembler_thumb_test.cc
@@ -238,6 +238,7 @@
   __ sub(R0, R1, ShifterOperand(R2), AL, kCcKeep);
   __ and_(R0, R1, ShifterOperand(R2), AL, kCcKeep);
   __ orr(R0, R1, ShifterOperand(R2), AL, kCcKeep);
+  __ orn(R0, R1, ShifterOperand(R2), AL, kCcKeep);
   __ eor(R0, R1, ShifterOperand(R2), AL, kCcKeep);
   __ bic(R0, R1, ShifterOperand(R2), AL, kCcKeep);
   __ adc(R0, R1, ShifterOperand(R2), AL, kCcKeep);
@@ -371,6 +372,7 @@
   __ sub(R0, R1, ShifterOperand(0x55));
   __ and_(R0, R1, ShifterOperand(0x55));
   __ orr(R0, R1, ShifterOperand(0x55));
+  __ orn(R0, R1, ShifterOperand(0x55));
   __ eor(R0, R1, ShifterOperand(0x55));
   __ bic(R0, R1, ShifterOperand(0x55));
   __ adc(R0, R1, ShifterOperand(0x55));
@@ -403,6 +405,7 @@
   __ sub(R0, R1, ShifterOperand(0x550055));
   __ and_(R0, R1, ShifterOperand(0x550055));
   __ orr(R0, R1, ShifterOperand(0x550055));
+  __ orn(R0, R1, ShifterOperand(0x550055));
   __ eor(R0, R1, ShifterOperand(0x550055));
   __ bic(R0, R1, ShifterOperand(0x550055));
   __ adc(R0, R1, ShifterOperand(0x550055));
diff --git a/compiler/utils/assembler_thumb_test_expected.cc.inc b/compiler/utils/assembler_thumb_test_expected.cc.inc
index 82ad642..b79c2e4 100644
--- a/compiler/utils/assembler_thumb_test_expected.cc.inc
+++ b/compiler/utils/assembler_thumb_test_expected.cc.inc
@@ -23,109 +23,110 @@
   "   8:	eba1 0002 	sub.w	r0, r1, r2\n",
   "   c:	ea01 0002 	and.w	r0, r1, r2\n",
   "  10:	ea41 0002 	orr.w	r0, r1, r2\n",
-  "  14:	ea81 0002 	eor.w	r0, r1, r2\n",
-  "  18:	ea21 0002 	bic.w	r0, r1, r2\n",
-  "  1c:	eb41 0002 	adc.w	r0, r1, r2\n",
-  "  20:	eb61 0002 	sbc.w	r0, r1, r2\n",
-  "  24:	ebc1 0002 	rsb	r0, r1, r2\n",
-  "  28:	ea90 0f01 	teq	r0, r1\n",
-  "  2c:	0008      	movs	r0, r1\n",
-  "  2e:	4608      	mov	r0, r1\n",
-  "  30:	43c8      	mvns	r0, r1\n",
-  "  32:	4408      	add	r0, r1\n",
-  "  34:	1888      	adds	r0, r1, r2\n",
-  "  36:	1a88      	subs	r0, r1, r2\n",
-  "  38:	4148      	adcs	r0, r1\n",
-  "  3a:	4188      	sbcs	r0, r1\n",
-  "  3c:	4008      	ands	r0, r1\n",
-  "  3e:	4308      	orrs	r0, r1\n",
-  "  40:	4048      	eors	r0, r1\n",
-  "  42:	4388      	bics	r0, r1\n",
-  "  44:	4208      	tst	r0, r1\n",
-  "  46:	4288      	cmp	r0, r1\n",
-  "  48:	42c8      	cmn	r0, r1\n",
-  "  4a:	4641		mov	r1, r8\n",
-  "  4c:	4681		mov	r9, r0\n",
-  "  4e:	46c8		mov	r8, r9\n",
-  "  50:	4441		add	r1, r8\n",
-  "  52:	4481		add	r9, r0\n",
-  "  54:	44c8		add	r8, r9\n",
-  "  56:	4548		cmp	r0, r9\n",
-  "  58:	4588		cmp	r8, r1\n",
-  "  5a:	45c1		cmp	r9, r8\n",
-  "  5c:	4248   	   	negs	r0, r1\n",
-  "  5e:	4240   	   	negs	r0, r0\n",
-  "  60:	ea5f 0008  	movs.w	r0, r8\n",
-  "  64:	ea7f 0008  	mvns.w	r0, r8\n",
-  "  68:	eb01 0008 	add.w	r0, r1, r8\n",
-  "  6c:	eb11 0008 	adds.w	r0, r1, r8\n",
-  "  70:	ebb1 0008 	subs.w	r0, r1, r8\n",
-  "  74:	eb50 0008 	adcs.w	r0, r0, r8\n",
-  "  78:	eb70 0008 	sbcs.w	r0, r0, r8\n",
-  "  7c:	ea10 0008 	ands.w	r0, r0, r8\n",
-  "  80:	ea50 0008 	orrs.w	r0, r0, r8\n",
-  "  84:	ea90 0008 	eors.w	r0, r0, r8\n",
-  "  88:	ea30 0008 	bics.w	r0, r0, r8\n",
-  "  8c:	ea10 0f08 	tst.w	r0, r8\n",
-  "  90:	eb10 0f08 	cmn.w	r0, r8\n",
-  "  94:	f1d8 0000 	rsbs	r0, r8, #0\n",
-  "  98:	f1d8 0800 	rsbs	r8, r8, #0\n",
-  "  9c:	bf08       	it	eq\n",
-  "  9e:	ea7f 0001  	mvnseq.w	r0, r1\n",
-  "  a2:	bf08       	it	eq\n",
-  "  a4:	eb11 0002 	addseq.w	r0, r1, r2\n",
-  "  a8:	bf08       	it	eq\n",
-  "  aa:	ebb1 0002 	subseq.w	r0, r1, r2\n",
-  "  ae:	bf08       	it	eq\n",
-  "  b0:	eb50 0001 	adcseq.w	r0, r0, r1\n",
-  "  b4:	bf08       	it	eq\n",
-  "  b6:	eb70 0001 	sbcseq.w	r0, r0, r1\n",
-  "  ba:	bf08       	it	eq\n",
-  "  bc:	ea10 0001 	andseq.w	r0, r0, r1\n",
-  "  c0:	bf08       	it	eq\n",
-  "  c2:	ea50 0001 	orrseq.w	r0, r0, r1\n",
-  "  c6:	bf08       	it	eq\n",
-  "  c8:	ea90 0001 	eorseq.w	r0, r0, r1\n",
-  "  cc:	bf08       	it	eq\n",
-  "  ce:	ea30 0001 	bicseq.w	r0, r0, r1\n",
-  "  d2:	bf08       	it	eq\n",
-  "  d4:	43c8      	mvneq	r0, r1\n",
+  "  14:	ea61 0002 	orn	r0, r1, r2\n",
+  "  18:	ea81 0002 	eor.w	r0, r1, r2\n",
+  "  1c:	ea21 0002 	bic.w	r0, r1, r2\n",
+  "  20:	eb41 0002 	adc.w	r0, r1, r2\n",
+  "  24:	eb61 0002 	sbc.w	r0, r1, r2\n",
+  "  28:	ebc1 0002 	rsb	r0, r1, r2\n",
+  "  2c:	ea90 0f01 	teq	r0, r1\n",
+  "  30:	0008      	movs	r0, r1\n",
+  "  32:	4608      	mov	r0, r1\n",
+  "  34:	43c8      	mvns	r0, r1\n",
+  "  36:	4408      	add	r0, r1\n",
+  "  38:	1888      	adds	r0, r1, r2\n",
+  "  3a:	1a88      	subs	r0, r1, r2\n",
+  "  3c:	4148      	adcs	r0, r1\n",
+  "  3e:	4188      	sbcs	r0, r1\n",
+  "  40:	4008      	ands	r0, r1\n",
+  "  42:	4308      	orrs	r0, r1\n",
+  "  44:	4048      	eors	r0, r1\n",
+  "  46:	4388      	bics	r0, r1\n",
+  "  48:	4208      	tst	r0, r1\n",
+  "  4a:	4288      	cmp	r0, r1\n",
+  "  4c:	42c8      	cmn	r0, r1\n",
+  "  4e:	4641		mov	r1, r8\n",
+  "  50:	4681		mov	r9, r0\n",
+  "  52:	46c8		mov	r8, r9\n",
+  "  54:	4441		add	r1, r8\n",
+  "  56:	4481		add	r9, r0\n",
+  "  58:	44c8		add	r8, r9\n",
+  "  5a:	4548		cmp	r0, r9\n",
+  "  5c:	4588		cmp	r8, r1\n",
+  "  5e:	45c1		cmp	r9, r8\n",
+  "  60:	4248   	   	negs	r0, r1\n",
+  "  62:	4240   	   	negs	r0, r0\n",
+  "  64:	ea5f 0008  	movs.w	r0, r8\n",
+  "  68:	ea7f 0008  	mvns.w	r0, r8\n",
+  "  6c:	eb01 0008 	add.w	r0, r1, r8\n",
+  "  70:	eb11 0008 	adds.w	r0, r1, r8\n",
+  "  74:	ebb1 0008 	subs.w	r0, r1, r8\n",
+  "  78:	eb50 0008 	adcs.w	r0, r0, r8\n",
+  "  7c:	eb70 0008 	sbcs.w	r0, r0, r8\n",
+  "  80:	ea10 0008 	ands.w	r0, r0, r8\n",
+  "  84:	ea50 0008 	orrs.w	r0, r0, r8\n",
+  "  88:	ea90 0008 	eors.w	r0, r0, r8\n",
+  "  8c:	ea30 0008 	bics.w	r0, r0, r8\n",
+  "  90:	ea10 0f08 	tst.w	r0, r8\n",
+  "  94:	eb10 0f08 	cmn.w	r0, r8\n",
+  "  98:	f1d8 0000 	rsbs	r0, r8, #0\n",
+  "  9c:	f1d8 0800 	rsbs	r8, r8, #0\n",
+  "  a0:	bf08       	it	eq\n",
+  "  a2:	ea7f 0001  	mvnseq.w	r0, r1\n",
+  "  a6:	bf08       	it	eq\n",
+  "  a8:	eb11 0002 	addseq.w	r0, r1, r2\n",
+  "  ac:	bf08       	it	eq\n",
+  "  ae:	ebb1 0002 	subseq.w	r0, r1, r2\n",
+  "  b2:	bf08       	it	eq\n",
+  "  b4:	eb50 0001 	adcseq.w	r0, r0, r1\n",
+  "  b8:	bf08       	it	eq\n",
+  "  ba:	eb70 0001 	sbcseq.w	r0, r0, r1\n",
+  "  be:	bf08       	it	eq\n",
+  "  c0:	ea10 0001 	andseq.w	r0, r0, r1\n",
+  "  c4:	bf08       	it	eq\n",
+  "  c6:	ea50 0001 	orrseq.w	r0, r0, r1\n",
+  "  ca:	bf08       	it	eq\n",
+  "  cc:	ea90 0001 	eorseq.w	r0, r0, r1\n",
+  "  d0:	bf08       	it	eq\n",
+  "  d2:	ea30 0001 	bicseq.w	r0, r0, r1\n",
   "  d6:	bf08       	it	eq\n",
-  "  d8:	1888      	addeq	r0, r1, r2\n",
+  "  d8:	43c8      	mvneq	r0, r1\n",
   "  da:	bf08       	it	eq\n",
-  "  dc:	1a88      	subeq	r0, r1, r2\n",
+  "  dc:	1888      	addeq	r0, r1, r2\n",
   "  de:	bf08       	it	eq\n",
-  "  e0:	4148      	adceq	r0, r1\n",
+  "  e0:	1a88      	subeq	r0, r1, r2\n",
   "  e2:	bf08       	it	eq\n",
-  "  e4:	4188      	sbceq	r0, r1\n",
+  "  e4:	4148      	adceq	r0, r1\n",
   "  e6:	bf08       	it	eq\n",
-  "  e8:	4008      	andeq	r0, r1\n",
+  "  e8:	4188      	sbceq	r0, r1\n",
   "  ea:	bf08       	it	eq\n",
-  "  ec:	4308      	orreq	r0, r1\n",
+  "  ec:	4008      	andeq	r0, r1\n",
   "  ee:	bf08       	it	eq\n",
-  "  f0:	4048      	eoreq	r0, r1\n",
+  "  f0:	4308      	orreq	r0, r1\n",
   "  f2:	bf08       	it	eq\n",
-  "  f4:	4388      	biceq	r0, r1\n",
-  "  f6:	4608      	mov	r0, r1\n",
-  "  f8:	43c8      	mvns	r0, r1\n",
-  "  fa:	4408      	add	r0, r1\n",
-  "  fc:	1888      	adds	r0, r1, r2\n",
-  "  fe:	1a88      	subs	r0, r1, r2\n",
-  " 100:	4148      	adcs	r0, r1\n",
-  " 102:	4188      	sbcs	r0, r1\n",
-  " 104:	4008      	ands	r0, r1\n",
-  " 106:	4308      	orrs	r0, r1\n",
-  " 108:	4048      	eors	r0, r1\n",
-  " 10a:	4388      	bics	r0, r1\n",
-  " 10c:	4641		mov	r1, r8\n",
-  " 10e:	4681		mov	r9, r0\n",
-  " 110:	46c8		mov	r8, r9\n",
-  " 112:	4441		add	r1, r8\n",
-  " 114:	4481		add	r9, r0\n",
-  " 116:	44c8		add	r8, r9\n",
-  " 118:	4248   	   	negs	r0, r1\n",
-  " 11a:	4240   	   	negs	r0, r0\n",
-  " 11c:	eb01 0c00 	add.w	ip, r1, r0\n",
+  "  f4:	4048      	eoreq	r0, r1\n",
+  "  f6:	bf08       	it	eq\n",
+  "  f8:	4388      	biceq	r0, r1\n",
+  "  fa:	4608      	mov	r0, r1\n",
+  "  fc:	43c8      	mvns	r0, r1\n",
+  "  fe:	4408      	add	r0, r1\n",
+  " 100:	1888      	adds	r0, r1, r2\n",
+  " 102:	1a88      	subs	r0, r1, r2\n",
+  " 104:	4148      	adcs	r0, r1\n",
+  " 106:	4188      	sbcs	r0, r1\n",
+  " 108:	4008      	ands	r0, r1\n",
+  " 10a:	4308      	orrs	r0, r1\n",
+  " 10c:	4048      	eors	r0, r1\n",
+  " 10e:	4388      	bics	r0, r1\n",
+  " 110:	4641		mov	r1, r8\n",
+  " 112:	4681		mov	r9, r0\n",
+  " 114:	46c8		mov	r8, r9\n",
+  " 116:	4441		add	r1, r8\n",
+  " 118:	4481		add	r9, r0\n",
+  " 11a:	44c8		add	r8, r9\n",
+  " 11c:	4248   	   	negs	r0, r1\n",
+  " 11e:	4240   	   	negs	r0, r0\n",
+  " 120:	eb01 0c00 	add.w	ip, r1, r0\n",
   nullptr
 };
 const char* DataProcessingImmediateResults[] = {
@@ -135,21 +136,22 @@
   "   a:	f2a1 0055 	subw	r0, r1, #85	; 0x55\n",
   "   e:	f001 0055 	and.w	r0, r1, #85	; 0x55\n",
   "  12:	f041 0055 	orr.w	r0, r1, #85	; 0x55\n",
-  "  16:	f081 0055 	eor.w	r0, r1, #85	; 0x55\n",
-  "  1a:	f021 0055 	bic.w	r0, r1, #85	; 0x55\n",
-  "  1e:	f141 0055 	adc.w	r0, r1, #85	; 0x55\n",
-  "  22:	f161 0055 	sbc.w	r0, r1, #85	; 0x55\n",
-  "  26:	f1c1 0055 	rsb	r0, r1, #85	; 0x55\n",
-  "  2a:	f010 0f55 	tst.w	r0, #85	; 0x55\n",
-  "  2e:	f090 0f55 	teq	r0, #85	; 0x55\n",
-  "  32:	2855      	cmp	r0, #85	; 0x55\n",
-  "  34:	f110 0f55 	cmn.w	r0, #85	; 0x55\n",
-  "  38:	1d48      	adds	r0, r1, #5\n",
-  "  3a:	1f48      	subs	r0, r1, #5\n",
-  "  3c:	2055      	movs	r0, #85	; 0x55\n",
-  "  3e:	f07f 0055 	mvns.w	r0, #85	; 0x55\n",
-  "  42:	1d48      	adds  r0, r1, #5\n",
-  "  44:	1f48      	subs  r0, r1, #5\n",
+  "  16:	f061 0055 	orn	r0, r1, #85	; 0x55\n",
+  "  1a:	f081 0055 	eor.w	r0, r1, #85	; 0x55\n",
+  "  1e:	f021 0055 	bic.w	r0, r1, #85	; 0x55\n",
+  "  22:	f141 0055 	adc.w	r0, r1, #85	; 0x55\n",
+  "  26:	f161 0055 	sbc.w	r0, r1, #85	; 0x55\n",
+  "  2a:	f1c1 0055 	rsb	r0, r1, #85	; 0x55\n",
+  "  2e:	f010 0f55 	tst.w	r0, #85	; 0x55\n",
+  "  32:	f090 0f55 	teq	r0, #85	; 0x55\n",
+  "  36:	2855      	cmp	r0, #85	; 0x55\n",
+  "  38:	f110 0f55 	cmn.w	r0, #85	; 0x55\n",
+  "  3c:	1d48      	adds	r0, r1, #5\n",
+  "  3e:	1f48      	subs	r0, r1, #5\n",
+  "  40:	2055      	movs	r0, #85	; 0x55\n",
+  "  42:	f07f 0055 	mvns.w	r0, #85	; 0x55\n",
+  "  46:	1d48      	adds	r0, r1, #5\n",
+  "  48:	1f48      	subs	r0, r1, #5\n",
   nullptr
 };
 const char* DataProcessingModifiedImmediateResults[] = {
@@ -159,15 +161,16 @@
   "   c:	f1a1 1055 	sub.w	r0, r1, #5570645	; 0x550055\n",
   "  10:	f001 1055 	and.w	r0, r1, #5570645	; 0x550055\n",
   "  14:	f041 1055 	orr.w	r0, r1, #5570645	; 0x550055\n",
-  "  18:	f081 1055 	eor.w	r0, r1, #5570645	; 0x550055\n",
-  "  1c:	f021 1055 	bic.w	r0, r1, #5570645	; 0x550055\n",
-  "  20:	f141 1055 	adc.w	r0, r1, #5570645	; 0x550055\n",
-  "  24:	f161 1055 	sbc.w	r0, r1, #5570645	; 0x550055\n",
-  "  28:	f1c1 1055 	rsb	r0, r1, #5570645	; 0x550055\n",
-  "  2c:	f010 1f55 	tst.w	r0, #5570645	; 0x550055\n",
-  "  30:	f090 1f55 	teq	r0, #5570645	; 0x550055\n",
-  "  34:	f1b0 1f55 	cmp.w	r0, #5570645	; 0x550055\n",
-  "  38:	f110 1f55 	cmn.w	r0, #5570645	; 0x550055\n",
+  "  18:	f061 1055 	orn	r0, r1, #5570645	; 0x550055\n",
+  "  1c:	f081 1055 	eor.w	r0, r1, #5570645	; 0x550055\n",
+  "  20:	f021 1055 	bic.w	r0, r1, #5570645	; 0x550055\n",
+  "  24:	f141 1055 	adc.w	r0, r1, #5570645	; 0x550055\n",
+  "  28:	f161 1055 	sbc.w	r0, r1, #5570645	; 0x550055\n",
+  "  2c:	f1c1 1055 	rsb	r0, r1, #5570645	; 0x550055\n",
+  "  30:	f010 1f55 	tst.w	r0, #5570645	; 0x550055\n",
+  "  34:	f090 1f55 	teq	r0, #5570645	; 0x550055\n",
+  "  38:	f1b0 1f55 	cmp.w	r0, #5570645	; 0x550055\n",
+  "  3c:	f110 1f55 	cmn.w	r0, #5570645	; 0x550055\n",
   nullptr
 };
 const char* DataProcessingModifiedImmediatesResults[] = {
diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc
index 04e815a..5347bf0 100644
--- a/compiler/utils/x86/assembler_x86.cc
+++ b/compiler/utils/x86/assembler_x86.cc
@@ -2369,44 +2369,48 @@
   }
 }
 
-int ConstantArea::AddInt32(int32_t v) {
-  for (size_t i = 0, e = buffer_.size(); i < e; i++) {
-    if (v == buffer_[i]) {
-      return i * kEntrySize;
-    }
-  }
-
-  // Didn't match anything.
-  int result = buffer_.size() * kEntrySize;
+size_t ConstantArea::AppendInt32(int32_t v) {
+  size_t result = buffer_.size() * elem_size_;
   buffer_.push_back(v);
   return result;
 }
 
-int ConstantArea::AddInt64(int64_t v) {
+size_t ConstantArea::AddInt32(int32_t v) {
+  for (size_t i = 0, e = buffer_.size(); i < e; i++) {
+    if (v == buffer_[i]) {
+      return i * elem_size_;
+    }
+  }
+
+  // Didn't match anything.
+  return AppendInt32(v);
+}
+
+size_t ConstantArea::AddInt64(int64_t v) {
   int32_t v_low = Low32Bits(v);
   int32_t v_high = High32Bits(v);
   if (buffer_.size() > 1) {
     // Ensure we don't pass the end of the buffer.
     for (size_t i = 0, e = buffer_.size() - 1; i < e; i++) {
       if (v_low == buffer_[i] && v_high == buffer_[i + 1]) {
-        return i * kEntrySize;
+        return i * elem_size_;
       }
     }
   }
 
   // Didn't match anything.
-  int result = buffer_.size() * kEntrySize;
+  size_t result = buffer_.size() * elem_size_;
   buffer_.push_back(v_low);
   buffer_.push_back(v_high);
   return result;
 }
 
-int ConstantArea::AddDouble(double v) {
+size_t ConstantArea::AddDouble(double v) {
   // Treat the value as a 64-bit integer value.
   return AddInt64(bit_cast<int64_t, double>(v));
 }
 
-int ConstantArea::AddFloat(float v) {
+size_t ConstantArea::AddFloat(float v) {
   // Treat the value as a 32-bit integer value.
   return AddInt32(bit_cast<int32_t, float>(v));
 }
diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h
index 93ecdf5..b50fda9 100644
--- a/compiler/utils/x86/assembler_x86.h
+++ b/compiler/utils/x86/assembler_x86.h
@@ -166,21 +166,6 @@
     Init(base_in, disp.Int32Value());
   }
 
-  void Init(Register base_in, int32_t disp) {
-    if (disp == 0 && base_in != EBP) {
-      SetModRM(0, base_in);
-      if (base_in == ESP) SetSIB(TIMES_1, ESP, base_in);
-    } else if (disp >= -128 && disp <= 127) {
-      SetModRM(1, base_in);
-      if (base_in == ESP) SetSIB(TIMES_1, ESP, base_in);
-      SetDisp8(disp);
-    } else {
-      SetModRM(2, base_in);
-      if (base_in == ESP) SetSIB(TIMES_1, ESP, base_in);
-      SetDisp32(disp);
-    }
-  }
-
   Address(Register index_in, ScaleFactor scale_in, int32_t disp) {
     CHECK_NE(index_in, ESP);  // Illegal addressing mode.
     SetModRM(0, ESP);
@@ -189,19 +174,15 @@
   }
 
   Address(Register base_in, Register index_in, ScaleFactor scale_in, int32_t disp) {
-    CHECK_NE(index_in, ESP);  // Illegal addressing mode.
-    if (disp == 0 && base_in != EBP) {
-      SetModRM(0, ESP);
-      SetSIB(scale_in, index_in, base_in);
-    } else if (disp >= -128 && disp <= 127) {
-      SetModRM(1, ESP);
-      SetSIB(scale_in, index_in, base_in);
-      SetDisp8(disp);
-    } else {
-      SetModRM(2, ESP);
-      SetSIB(scale_in, index_in, base_in);
-      SetDisp32(disp);
-    }
+    Init(base_in, index_in, scale_in, disp);
+  }
+
+  Address(Register base_in,
+          Register index_in,
+          ScaleFactor scale_in,
+          int32_t disp, AssemblerFixup *fixup) {
+    Init(base_in, index_in, scale_in, disp);
+    SetFixup(fixup);
   }
 
   static Address Absolute(uintptr_t addr) {
@@ -217,6 +198,37 @@
 
  private:
   Address() {}
+
+  void Init(Register base_in, int32_t disp) {
+    if (disp == 0 && base_in != EBP) {
+      SetModRM(0, base_in);
+      if (base_in == ESP) SetSIB(TIMES_1, ESP, base_in);
+    } else if (disp >= -128 && disp <= 127) {
+      SetModRM(1, base_in);
+      if (base_in == ESP) SetSIB(TIMES_1, ESP, base_in);
+      SetDisp8(disp);
+    } else {
+      SetModRM(2, base_in);
+      if (base_in == ESP) SetSIB(TIMES_1, ESP, base_in);
+      SetDisp32(disp);
+    }
+  }
+
+  void Init(Register base_in, Register index_in, ScaleFactor scale_in, int32_t disp) {
+    CHECK_NE(index_in, ESP);  // Illegal addressing mode.
+    if (disp == 0 && base_in != EBP) {
+      SetModRM(0, ESP);
+      SetSIB(scale_in, index_in, base_in);
+    } else if (disp >= -128 && disp <= 127) {
+      SetModRM(1, ESP);
+      SetSIB(scale_in, index_in, base_in);
+      SetDisp8(disp);
+    } else {
+      SetModRM(2, ESP);
+      SetSIB(scale_in, index_in, base_in);
+      SetDisp32(disp);
+    }
+  }
 };
 
 
@@ -252,40 +264,39 @@
 
   // Add a double to the constant area, returning the offset into
   // the constant area where the literal resides.
-  int AddDouble(double v);
+  size_t AddDouble(double v);
 
   // Add a float to the constant area, returning the offset into
   // the constant area where the literal resides.
-  int AddFloat(float v);
+  size_t AddFloat(float v);
 
   // Add an int32_t to the constant area, returning the offset into
   // the constant area where the literal resides.
-  int AddInt32(int32_t v);
+  size_t AddInt32(int32_t v);
+
+  // Add an int32_t to the end of the constant area, returning the offset into
+  // the constant area where the literal resides.
+  size_t AppendInt32(int32_t v);
 
   // Add an int64_t to the constant area, returning the offset into
   // the constant area where the literal resides.
-  int AddInt64(int64_t v);
+  size_t AddInt64(int64_t v);
 
   bool IsEmpty() const {
     return buffer_.size() == 0;
   }
 
+  size_t GetSize() const {
+    return buffer_.size() * elem_size_;
+  }
+
   const std::vector<int32_t>& GetBuffer() const {
     return buffer_;
   }
 
-  void AddFixup(AssemblerFixup* fixup) {
-    fixups_.push_back(fixup);
-  }
-
-  const std::vector<AssemblerFixup*>& GetFixups() const {
-    return fixups_;
-  }
-
  private:
-  static constexpr size_t kEntrySize = sizeof(int32_t);
+  static constexpr size_t elem_size_ = sizeof(int32_t);
   std::vector<int32_t> buffer_;
-  std::vector<AssemblerFixup*> fixups_;
 };
 
 class X86Assembler FINAL : public Assembler {
@@ -740,26 +751,36 @@
 
   // Add a double to the constant area, returning the offset into
   // the constant area where the literal resides.
-  int AddDouble(double v) { return constant_area_.AddDouble(v); }
+  size_t AddDouble(double v) { return constant_area_.AddDouble(v); }
 
   // Add a float to the constant area, returning the offset into
   // the constant area where the literal resides.
-  int AddFloat(float v)   { return constant_area_.AddFloat(v); }
+  size_t AddFloat(float v)   { return constant_area_.AddFloat(v); }
 
   // Add an int32_t to the constant area, returning the offset into
   // the constant area where the literal resides.
-  int AddInt32(int32_t v) { return constant_area_.AddInt32(v); }
+  size_t AddInt32(int32_t v) {
+    return constant_area_.AddInt32(v);
+  }
+
+  // Add an int32_t to the end of the constant area, returning the offset into
+  // the constant area where the literal resides.
+  size_t AppendInt32(int32_t v) {
+    return constant_area_.AppendInt32(v);
+  }
 
   // Add an int64_t to the constant area, returning the offset into
   // the constant area where the literal resides.
-  int AddInt64(int64_t v) { return constant_area_.AddInt64(v); }
+  size_t AddInt64(int64_t v) { return constant_area_.AddInt64(v); }
 
   // Add the contents of the constant area to the assembler buffer.
   void AddConstantArea();
 
   // Is the constant area empty? Return true if there are no literals in the constant area.
   bool IsConstantAreaEmpty() const { return constant_area_.IsEmpty(); }
-  void AddConstantAreaFixup(AssemblerFixup* fixup) { constant_area_.AddFixup(fixup); }
+
+  // Return the current size of the constant area.
+  size_t ConstantAreaSize() const { return constant_area_.GetSize(); }
 
  private:
   inline void EmitUint8(uint8_t value);
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index 6e7d74d..9eb5e67 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -3122,7 +3122,14 @@
   }
 }
 
-int ConstantArea::AddInt32(int32_t v) {
+size_t ConstantArea::AppendInt32(int32_t v) {
+  size_t result = buffer_.size() * elem_size_;
+  buffer_.push_back(v);
+  return result;
+}
+
+size_t ConstantArea::AddInt32(int32_t v) {
+  // Look for an existing match.
   for (size_t i = 0, e = buffer_.size(); i < e; i++) {
     if (v == buffer_[i]) {
       return i * elem_size_;
@@ -3130,12 +3137,10 @@
   }
 
   // Didn't match anything.
-  int result = buffer_.size() * elem_size_;
-  buffer_.push_back(v);
-  return result;
+  return AppendInt32(v);
 }
 
-int ConstantArea::AddInt64(int64_t v) {
+size_t ConstantArea::AddInt64(int64_t v) {
   int32_t v_low = v;
   int32_t v_high = v >> 32;
   if (buffer_.size() > 1) {
@@ -3148,18 +3153,18 @@
   }
 
   // Didn't match anything.
-  int result = buffer_.size() * elem_size_;
+  size_t result = buffer_.size() * elem_size_;
   buffer_.push_back(v_low);
   buffer_.push_back(v_high);
   return result;
 }
 
-int ConstantArea::AddDouble(double v) {
+size_t ConstantArea::AddDouble(double v) {
   // Treat the value as a 64-bit integer value.
   return AddInt64(bit_cast<int64_t, double>(v));
 }
 
-int ConstantArea::AddFloat(float v) {
+size_t ConstantArea::AddFloat(float v) {
   // Treat the value as a 32-bit integer value.
   return AddInt32(bit_cast<int32_t, float>(v));
 }
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index 255f551..01d28e3 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -269,36 +269,40 @@
  * Class to handle constant area values.
  */
 class ConstantArea {
-  public:
-    ConstantArea() {}
+ public:
+  ConstantArea() {}
 
-    // Add a double to the constant area, returning the offset into
-    // the constant area where the literal resides.
-    int AddDouble(double v);
+  // Add a double to the constant area, returning the offset into
+  // the constant area where the literal resides.
+  size_t AddDouble(double v);
 
-    // Add a float to the constant area, returning the offset into
-    // the constant area where the literal resides.
-    int AddFloat(float v);
+  // Add a float to the constant area, returning the offset into
+  // the constant area where the literal resides.
+  size_t AddFloat(float v);
 
-    // Add an int32_t to the constant area, returning the offset into
-    // the constant area where the literal resides.
-    int AddInt32(int32_t v);
+  // Add an int32_t to the constant area, returning the offset into
+  // the constant area where the literal resides.
+  size_t AddInt32(int32_t v);
 
-    // Add an int64_t to the constant area, returning the offset into
-    // the constant area where the literal resides.
-    int AddInt64(int64_t v);
+  // Add an int32_t to the end of the constant area, returning the offset into
+  // the constant area where the literal resides.
+  size_t AppendInt32(int32_t v);
 
-    int GetSize() const {
-      return buffer_.size() * elem_size_;
-    }
+  // Add an int64_t to the constant area, returning the offset into
+  // the constant area where the literal resides.
+  size_t AddInt64(int64_t v);
 
-    const std::vector<int32_t>& GetBuffer() const {
-      return buffer_;
-    }
+  size_t GetSize() const {
+    return buffer_.size() * elem_size_;
+  }
 
-  private:
-    static constexpr size_t elem_size_ = sizeof(int32_t);
-    std::vector<int32_t> buffer_;
+  const std::vector<int32_t>& GetBuffer() const {
+    return buffer_;
+  }
+
+ private:
+  static constexpr size_t elem_size_ = sizeof(int32_t);
+  std::vector<int32_t> buffer_;
 };
 
 
@@ -806,19 +810,27 @@
 
   // Add a double to the constant area, returning the offset into
   // the constant area where the literal resides.
-  int AddDouble(double v) { return constant_area_.AddDouble(v); }
+  size_t AddDouble(double v) { return constant_area_.AddDouble(v); }
 
   // Add a float to the constant area, returning the offset into
   // the constant area where the literal resides.
-  int AddFloat(float v)   { return constant_area_.AddFloat(v); }
+  size_t AddFloat(float v)   { return constant_area_.AddFloat(v); }
 
   // Add an int32_t to the constant area, returning the offset into
   // the constant area where the literal resides.
-  int AddInt32(int32_t v) { return constant_area_.AddInt32(v); }
+  size_t AddInt32(int32_t v) {
+    return constant_area_.AddInt32(v);
+  }
+
+  // Add an int32_t to the end of the constant area, returning the offset into
+  // the constant area where the literal resides.
+  size_t AppendInt32(int32_t v) {
+    return constant_area_.AppendInt32(v);
+  }
 
   // Add an int64_t to the constant area, returning the offset into
   // the constant area where the literal resides.
-  int AddInt64(int64_t v) { return constant_area_.AddInt64(v); }
+  size_t AddInt64(int64_t v) { return constant_area_.AddInt64(v); }
 
   // Add the contents of the constant area to the assembler buffer.
   void AddConstantArea();
@@ -826,6 +838,9 @@
   // Is the constant area empty? Return true if there are no literals in the constant area.
   bool IsConstantAreaEmpty() const { return constant_area_.GetSize() == 0; }
 
+  // Return the current size of the constant area.
+  size_t ConstantAreaSize() const { return constant_area_.GetSize(); }
+
   //
   // Heap poisoning.
   //
diff --git a/test/538-checker-embed-constants/expected.txt b/test/538-checker-embed-constants/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/538-checker-embed-constants/expected.txt
diff --git a/test/538-checker-embed-constants/info.txt b/test/538-checker-embed-constants/info.txt
new file mode 100644
index 0000000..5a722ec
--- /dev/null
+++ b/test/538-checker-embed-constants/info.txt
@@ -0,0 +1 @@
+Test embedding of constants in assembler instructions.
diff --git a/test/538-checker-embed-constants/src/Main.java b/test/538-checker-embed-constants/src/Main.java
new file mode 100644
index 0000000..d8618e3
--- /dev/null
+++ b/test/538-checker-embed-constants/src/Main.java
@@ -0,0 +1,290 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  public static void assertIntEquals(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  public static void assertLongEquals(long expected, long result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  /// CHECK-START-ARM: int Main.and255(int) disassembly (after)
+  /// CHECK-NOT:            movs {{r\d+}}, #255
+  /// CHECK:                and {{r\d+}}, {{r\d+}}, #255
+
+  public static int and255(int arg) {
+    return arg & 255;
+  }
+
+  /// CHECK-START-ARM: int Main.and511(int) disassembly (after)
+  /// CHECK:                movw {{r\d+}}, #511
+  /// CHECK:                and{{(\.w)?}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+
+  public static int and511(int arg) {
+    return arg & 511;
+  }
+
+  /// CHECK-START-ARM: int Main.andNot15(int) disassembly (after)
+  /// CHECK-NOT:            mvn {{r\d+}}, #15
+  /// CHECK:                bic {{r\d+}}, {{r\d+}}, #15
+
+  public static int andNot15(int arg) {
+    return arg & ~15;
+  }
+
+  /// CHECK-START-ARM: int Main.or255(int) disassembly (after)
+  /// CHECK-NOT:            movs {{r\d+}}, #255
+  /// CHECK:                orr {{r\d+}}, {{r\d+}}, #255
+
+  public static int or255(int arg) {
+    return arg | 255;
+  }
+
+  /// CHECK-START-ARM: int Main.or511(int) disassembly (after)
+  /// CHECK:                movw {{r\d+}}, #511
+  /// CHECK:                orr{{(\.w)?}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+
+  public static int or511(int arg) {
+    return arg | 511;
+  }
+
+  /// CHECK-START-ARM: int Main.orNot15(int) disassembly (after)
+  /// CHECK-NOT:            mvn {{r\d+}}, #15
+  /// CHECK:                orn {{r\d+}}, {{r\d+}}, #15
+
+  public static int orNot15(int arg) {
+    return arg | ~15;
+  }
+
+  /// CHECK-START-ARM: int Main.xor255(int) disassembly (after)
+  /// CHECK-NOT:            movs {{r\d+}}, #255
+  /// CHECK:                eor {{r\d+}}, {{r\d+}}, #255
+
+  public static int xor255(int arg) {
+    return arg ^ 255;
+  }
+
+  /// CHECK-START-ARM: int Main.xor511(int) disassembly (after)
+  /// CHECK:                movw {{r\d+}}, #511
+  /// CHECK:                eor{{(\.w)?}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+
+  public static int xor511(int arg) {
+    return arg ^ 511;
+  }
+
+  /// CHECK-START-ARM: int Main.xorNot15(int) disassembly (after)
+  /// CHECK:                mvn {{r\d+}}, #15
+  /// CHECK:                eor{{(\.w)?}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+
+  public static int xorNot15(int arg) {
+    return arg ^ ~15;
+  }
+
+  /// CHECK-START-ARM: long Main.and255(long) disassembly (after)
+  /// CHECK-NOT:            movs {{r\d+}}, #255
+  /// CHECK-NOT:            and
+  /// CHECK-NOT:            bic
+  /// CHECK-DAG:            and {{r\d+}}, {{r\d+}}, #255
+  /// CHECK-DAG:            movs {{r\d+}}, #0
+  /// CHECK-NOT:            and
+  /// CHECK-NOT:            bic
+
+  public static long and255(long arg) {
+    return arg & 255L;
+  }
+
+  /// CHECK-START-ARM: long Main.and511(long) disassembly (after)
+  /// CHECK:                movw {{r\d+}}, #511
+  /// CHECK-NOT:            and
+  /// CHECK-NOT:            bic
+  /// CHECK-DAG:            and{{(\.w)?}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+  /// CHECK-DAG:            movs {{r\d+}}, #0
+  /// CHECK-NOT:            and
+  /// CHECK-NOT:            bic
+
+  public static long and511(long arg) {
+    return arg & 511L;
+  }
+
+  /// CHECK-START-ARM: long Main.andNot15(long) disassembly (after)
+  /// CHECK-NOT:            mvn {{r\d+}}, #15
+  /// CHECK-NOT:            and
+  /// CHECK-NOT:            bic
+  /// CHECK:                bic {{r\d+}}, {{r\d+}}, #15
+  /// CHECK-NOT:            and
+  /// CHECK-NOT:            bic
+
+  public static long andNot15(long arg) {
+    return arg & ~15L;
+  }
+
+  /// CHECK-START-ARM: long Main.and0xfffffff00000000f(long) disassembly (after)
+  /// CHECK-NOT:            movs {{r\d+}}, #15
+  /// CHECK-NOT:            mvn {{r\d+}}, #15
+  /// CHECK-NOT:            and
+  /// CHECK-NOT:            bic
+  /// CHECK-DAG:            and {{r\d+}}, {{r\d+}}, #15
+  /// CHECK-DAG:            bic {{r\d+}}, {{r\d+}}, #15
+  /// CHECK-NOT:            and
+  /// CHECK-NOT:            bic
+
+  public static long and0xfffffff00000000f(long arg) {
+    return arg & 0xfffffff00000000fL;
+  }
+
+  /// CHECK-START-ARM: long Main.or255(long) disassembly (after)
+  /// CHECK-NOT:            movs {{r\d+}}, #255
+  /// CHECK-NOT:            orr
+  /// CHECK-NOT:            orn
+  /// CHECK:                orr {{r\d+}}, {{r\d+}}, #255
+  /// CHECK-NOT:            orr
+  /// CHECK-NOT:            orn
+
+  public static long or255(long arg) {
+    return arg | 255L;
+  }
+
+  /// CHECK-START-ARM: long Main.or511(long) disassembly (after)
+  /// CHECK:                movw {{r\d+}}, #511
+  /// CHECK-NOT:            orr
+  /// CHECK-NOT:            orn
+  /// CHECK:                orr{{(\.w)?}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+  /// CHECK-NOT:            orr
+  /// CHECK-NOT:            orn
+
+  public static long or511(long arg) {
+    return arg | 511L;
+  }
+
+  /// CHECK-START-ARM: long Main.orNot15(long) disassembly (after)
+  /// CHECK-NOT:            mvn {{r\d+}}, #15
+  /// CHECK-NOT:            orr
+  /// CHECK-NOT:            orn
+  /// CHECK-DAG:            orn {{r\d+}}, {{r\d+}}, #15
+  /// CHECK-DAG:            mvn {{r\d+}}, #0
+  /// CHECK-NOT:            orr
+  /// CHECK-NOT:            orn
+
+  public static long orNot15(long arg) {
+    return arg | ~15L;
+  }
+
+  /// CHECK-START-ARM: long Main.or0xfffffff00000000f(long) disassembly (after)
+  /// CHECK-NOT:            movs {{r\d+}}, #15
+  /// CHECK-NOT:            mvn {{r\d+}}, #15
+  /// CHECK-NOT:            orr
+  /// CHECK-NOT:            orn
+  /// CHECK-DAG:            orr {{r\d+}}, {{r\d+}}, #15
+  /// CHECK-DAG:            orn {{r\d+}}, {{r\d+}}, #15
+  /// CHECK-NOT:            orr
+  /// CHECK-NOT:            orn
+
+  public static long or0xfffffff00000000f(long arg) {
+    return arg | 0xfffffff00000000fL;
+  }
+
+  /// CHECK-START-ARM: long Main.xor255(long) disassembly (after)
+  /// CHECK-NOT:            movs {{r\d+}}, #255
+  /// CHECK-NOT:            eor
+  /// CHECK:                eor {{r\d+}}, {{r\d+}}, #255
+  /// CHECK-NOT:            eor
+
+  public static long xor255(long arg) {
+    return arg ^ 255L;
+  }
+
+  /// CHECK-START-ARM: long Main.xor511(long) disassembly (after)
+  /// CHECK:                movw {{r\d+}}, #511
+  /// CHECK-NOT:            eor
+  /// CHECK-DAG:            eor{{(\.w)?}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+  /// CHECK-NOT:            eor
+
+  public static long xor511(long arg) {
+    return arg ^ 511L;
+  }
+
+  /// CHECK-START-ARM: long Main.xorNot15(long) disassembly (after)
+  /// CHECK-DAG:            mvn {{r\d+}}, #15
+  /// CHECK-DAG:            mov.w {{r\d+}}, #-1
+  /// CHECK-NOT:            eor
+  /// CHECK-DAG:            eor{{(\.w)?}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+  /// CHECK-DAG:            eor{{(\.w)?}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+  /// CHECK-NOT:            eor
+
+  public static long xorNot15(long arg) {
+    return arg ^ ~15L;
+  }
+
+  // Note: No support for partial long constant embedding.
+  /// CHECK-START-ARM: long Main.xor0xfffffff00000000f(long) disassembly (after)
+  /// CHECK-DAG:            movs {{r\d+}}, #15
+  /// CHECK-DAG:            mvn {{r\d+}}, #15
+  /// CHECK-NOT:            eor
+  /// CHECK-DAG:            eor{{(\.w)?}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+  /// CHECK-DAG:            eor{{(\.w)?}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+  /// CHECK-NOT:            eor
+
+  public static long xor0xfffffff00000000f(long arg) {
+    return arg ^ 0xfffffff00000000fL;
+  }
+
+  /// CHECK-START-ARM: long Main.xor0xf00000000000000f(long) disassembly (after)
+  /// CHECK-NOT:            movs {{r\d+}}, #15
+  /// CHECK-NOT:            mov.w {{r\d+}}, #-268435456
+  /// CHECK-NOT:            eor
+  /// CHECK-DAG:            eor {{r\d+}}, {{r\d+}}, #15
+  /// CHECK-DAG:            eor {{r\d+}}, {{r\d+}}, #-268435456
+  /// CHECK-NOT:            eor
+
+  public static long xor0xf00000000000000f(long arg) {
+    return arg ^ 0xf00000000000000fL;
+  }
+
+  public static void main(String[] args) {
+    int arg = 0x87654321;
+    assertIntEquals(and255(arg), 0x21);
+    assertIntEquals(and511(arg), 0x121);
+    assertIntEquals(andNot15(arg), 0x87654320);
+    assertIntEquals(or255(arg), 0x876543ff);
+    assertIntEquals(or511(arg), 0x876543ff);
+    assertIntEquals(orNot15(arg), 0xfffffff1);
+    assertIntEquals(xor255(arg), 0x876543de);
+    assertIntEquals(xor511(arg), 0x876542de);
+    assertIntEquals(xorNot15(arg), 0x789abcd1);
+
+    long longArg = 0x1234567887654321L;
+    assertLongEquals(and255(longArg), 0x21L);
+    assertLongEquals(and511(longArg), 0x121L);
+    assertLongEquals(andNot15(longArg), 0x1234567887654320L);
+    assertLongEquals(and0xfffffff00000000f(longArg), 0x1234567000000001L);
+    assertLongEquals(or255(longArg), 0x12345678876543ffL);
+    assertLongEquals(or511(longArg), 0x12345678876543ffL);
+    assertLongEquals(orNot15(longArg), 0xfffffffffffffff1L);
+    assertLongEquals(or0xfffffff00000000f(longArg), 0xfffffff88765432fL);
+    assertLongEquals(xor255(longArg), 0x12345678876543deL);
+    assertLongEquals(xor511(longArg), 0x12345678876542deL);
+    assertLongEquals(xorNot15(longArg), 0xedcba987789abcd1L);
+    assertLongEquals(xor0xfffffff00000000f(longArg), 0xedcba9888765432eL);
+    assertLongEquals(xor0xf00000000000000f(longArg), 0xe23456788765432eL);
+  }
+}
diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk
index 537873f..ad64b68 100644
--- a/test/Android.run-test.mk
+++ b/test/Android.run-test.mk
@@ -214,11 +214,22 @@
   055-enum-performance \
   133-static-invoke-super
 
- # disable timing sensitive tests on "dist" builds.
+# Tests that require python3.
+TEST_ART_PYTHON3_DEPENDENCY_RUN_TESTS := \
+  960-default-smali \
+  961-default-iface-resolution-generated \
+  964-default-iface-init-generated \
+
+# disable timing sensitive tests on "dist" builds.
 ifdef dist_goal
   ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
         $(COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
         $(IMAGE_TYPES), $(PICTEST_TYPES), $(DEBUGGABLE_TYPES), $(TEST_ART_TIMING_SENSITIVE_RUN_TESTS), $(ALL_ADDRESS_SIZES))
+
+  # Currently disable tsts requiring python3.
+  ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
+        $(COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
+        $(IMAGE_TYPES), $(PICTEST_TYPES), $(DEBUGGABLE_TYPES), $(TEST_ART_PYTHON3_DEPENDENCY_RUN_TESTS), $(ALL_ADDRESS_SIZES))
 endif
 
 TEST_ART_TIMING_SENSITIVE_RUN_TESTS :=