Merge "Revert "Unfortunately, the test still hits too many run failures.""
diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc
index 061ee07..ee523f3 100644
--- a/compiler/dex/quick/gen_common.cc
+++ b/compiler/dex/quick/gen_common.cc
@@ -425,7 +425,11 @@
       RegLocation loc = UpdateLoc(info->args[i]);
       if (loc.location == kLocPhysReg) {
         ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-        Store32Disp(TargetPtrReg(kSp), SRegOffset(loc.s_reg_low), loc.reg);
+        if (loc.ref) {
+          StoreRefDisp(TargetPtrReg(kSp), SRegOffset(loc.s_reg_low), loc.reg, kNotVolatile);
+        } else {
+          Store32Disp(TargetPtrReg(kSp), SRegOffset(loc.s_reg_low), loc.reg);
+        }
       }
     }
     /*
@@ -481,9 +485,17 @@
   } else if (!info->is_range) {
     // TUNING: interleave
     for (int i = 0; i < elems; i++) {
-      RegLocation rl_arg = LoadValue(info->args[i], kCoreReg);
-      Store32Disp(ref_reg,
-                  mirror::Array::DataOffset(component_size).Int32Value() + i * 4, rl_arg.reg);
+      RegLocation rl_arg;
+      if (info->args[i].ref) {
+        rl_arg = LoadValue(info->args[i], kRefReg);
+        StoreRefDisp(ref_reg,
+                    mirror::Array::DataOffset(component_size).Int32Value() + i * 4, rl_arg.reg,
+                    kNotVolatile);
+      } else {
+        rl_arg = LoadValue(info->args[i], kCoreReg);
+        Store32Disp(ref_reg,
+                    mirror::Array::DataOffset(component_size).Int32Value() + i * 4, rl_arg.reg);
+      }
       // If the LoadValue caused a temp to be allocated, free it
       if (IsTemp(rl_arg.reg)) {
         FreeTemp(rl_arg.reg);
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc
index d3ac4e0..dddee2b 100644
--- a/compiler/optimizing/builder.cc
+++ b/compiler/optimizing/builder.cc
@@ -41,25 +41,29 @@
  */
 class Temporaries : public ValueObject {
  public:
-  Temporaries(HGraph* graph, size_t count) : graph_(graph), count_(count), index_(0) {
-    graph_->UpdateNumberOfTemporaries(count_);
-  }
+  explicit Temporaries(HGraph* graph) : graph_(graph), index_(0) {}
 
   void Add(HInstruction* instruction) {
-    // We currently only support vreg size temps.
-    DCHECK(instruction->GetType() != Primitive::kPrimLong
-           && instruction->GetType() != Primitive::kPrimDouble);
-    HInstruction* temp = new (graph_->GetArena()) HTemporary(index_++);
+    HInstruction* temp = new (graph_->GetArena()) HTemporary(index_);
     instruction->GetBlock()->AddInstruction(temp);
+
     DCHECK(temp->GetPrevious() == instruction);
+
+    size_t offset;
+    if (instruction->GetType() == Primitive::kPrimLong
+        || instruction->GetType() == Primitive::kPrimDouble) {
+      offset = 2;
+    } else {
+      offset = 1;
+    }
+    index_ += offset;
+
+    graph_->UpdateTemporariesVRegSlots(index_);
   }
 
  private:
   HGraph* const graph_;
 
-  // The total number of temporaries that will be used.
-  const size_t count_;
-
   // Current index in the temporary stack, updated by `Add`.
   size_t index_;
 };
@@ -115,37 +119,37 @@
 }
 
 template<typename T>
-void HGraphBuilder::If_22t(const Instruction& instruction, uint32_t dex_offset) {
+void HGraphBuilder::If_22t(const Instruction& instruction, uint32_t dex_pc) {
   int32_t target_offset = instruction.GetTargetOffset();
-  PotentiallyAddSuspendCheck(target_offset, dex_offset);
+  PotentiallyAddSuspendCheck(target_offset, dex_pc);
   HInstruction* first = LoadLocal(instruction.VRegA(), Primitive::kPrimInt);
   HInstruction* second = LoadLocal(instruction.VRegB(), Primitive::kPrimInt);
   T* comparison = new (arena_) T(first, second);
   current_block_->AddInstruction(comparison);
   HInstruction* ifinst = new (arena_) HIf(comparison);
   current_block_->AddInstruction(ifinst);
-  HBasicBlock* target = FindBlockStartingAt(dex_offset + target_offset);
+  HBasicBlock* target = FindBlockStartingAt(dex_pc + target_offset);
   DCHECK(target != nullptr);
   current_block_->AddSuccessor(target);
-  target = FindBlockStartingAt(dex_offset + instruction.SizeInCodeUnits());
+  target = FindBlockStartingAt(dex_pc + instruction.SizeInCodeUnits());
   DCHECK(target != nullptr);
   current_block_->AddSuccessor(target);
   current_block_ = nullptr;
 }
 
 template<typename T>
-void HGraphBuilder::If_21t(const Instruction& instruction, uint32_t dex_offset) {
+void HGraphBuilder::If_21t(const Instruction& instruction, uint32_t dex_pc) {
   int32_t target_offset = instruction.GetTargetOffset();
-  PotentiallyAddSuspendCheck(target_offset, dex_offset);
+  PotentiallyAddSuspendCheck(target_offset, dex_pc);
   HInstruction* value = LoadLocal(instruction.VRegA(), Primitive::kPrimInt);
   T* comparison = new (arena_) T(value, GetIntConstant(0));
   current_block_->AddInstruction(comparison);
   HInstruction* ifinst = new (arena_) HIf(comparison);
   current_block_->AddInstruction(ifinst);
-  HBasicBlock* target = FindBlockStartingAt(dex_offset + target_offset);
+  HBasicBlock* target = FindBlockStartingAt(dex_pc + target_offset);
   DCHECK(target != nullptr);
   current_block_->AddSuccessor(target);
-  target = FindBlockStartingAt(dex_offset + instruction.SizeInCodeUnits());
+  target = FindBlockStartingAt(dex_pc + instruction.SizeInCodeUnits());
   DCHECK(target != nullptr);
   current_block_->AddSuccessor(target);
   current_block_ = nullptr;
@@ -192,13 +196,13 @@
 
   InitializeParameters(code_item.ins_size_);
 
-  size_t dex_offset = 0;
+  size_t dex_pc = 0;
   while (code_ptr < code_end) {
-    // Update the current block if dex_offset starts a new block.
-    MaybeUpdateCurrentBlock(dex_offset);
+    // Update the current block if dex_pc starts a new block.
+    MaybeUpdateCurrentBlock(dex_pc);
     const Instruction& instruction = *Instruction::At(code_ptr);
-    if (!AnalyzeDexInstruction(instruction, dex_offset)) return nullptr;
-    dex_offset += instruction.SizeInCodeUnits();
+    if (!AnalyzeDexInstruction(instruction, dex_pc)) return nullptr;
+    dex_pc += instruction.SizeInCodeUnits();
     code_ptr += instruction.SizeInCodeUnits();
   }
 
@@ -239,25 +243,25 @@
 
   // Iterate over all instructions and find branching instructions. Create blocks for
   // the locations these instructions branch to.
-  size_t dex_offset = 0;
+  size_t dex_pc = 0;
   while (code_ptr < code_end) {
     const Instruction& instruction = *Instruction::At(code_ptr);
     if (instruction.IsBranch()) {
-      int32_t target = instruction.GetTargetOffset() + dex_offset;
+      int32_t target = instruction.GetTargetOffset() + dex_pc;
       // Create a block for the target instruction.
       if (FindBlockStartingAt(target) == nullptr) {
         block = new (arena_) HBasicBlock(graph_, target);
         branch_targets_.Put(target, block);
       }
-      dex_offset += instruction.SizeInCodeUnits();
+      dex_pc += instruction.SizeInCodeUnits();
       code_ptr += instruction.SizeInCodeUnits();
-      if ((code_ptr < code_end) && (FindBlockStartingAt(dex_offset) == nullptr)) {
-        block = new (arena_) HBasicBlock(graph_, dex_offset);
-        branch_targets_.Put(dex_offset, block);
+      if ((code_ptr < code_end) && (FindBlockStartingAt(dex_pc) == nullptr)) {
+        block = new (arena_) HBasicBlock(graph_, dex_pc);
+        branch_targets_.Put(dex_pc, block);
       }
     } else {
       code_ptr += instruction.SizeInCodeUnits();
-      dex_offset += instruction.SizeInCodeUnits();
+      dex_pc += instruction.SizeInCodeUnits();
     }
   }
 }
@@ -291,6 +295,16 @@
 }
 
 template<typename T>
+void HGraphBuilder::Binop_23x(const Instruction& instruction,
+                              Primitive::Type type,
+                              uint32_t dex_pc) {
+  HInstruction* first = LoadLocal(instruction.VRegB(), type);
+  HInstruction* second = LoadLocal(instruction.VRegC(), type);
+  current_block_->AddInstruction(new (arena_) T(type, first, second, dex_pc));
+  UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction());
+}
+
+template<typename T>
 void HGraphBuilder::Binop_12x(const Instruction& instruction, Primitive::Type type) {
   HInstruction* first = LoadLocal(instruction.VRegA(), type);
   HInstruction* second = LoadLocal(instruction.VRegB(), type);
@@ -299,6 +313,16 @@
 }
 
 template<typename T>
+void HGraphBuilder::Binop_12x(const Instruction& instruction,
+                              Primitive::Type type,
+                              uint32_t dex_pc) {
+  HInstruction* first = LoadLocal(instruction.VRegA(), type);
+  HInstruction* second = LoadLocal(instruction.VRegB(), type);
+  current_block_->AddInstruction(new (arena_) T(type, first, second, dex_pc));
+  UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction());
+}
+
+template<typename T>
 void HGraphBuilder::Binop_22s(const Instruction& instruction, bool reverse) {
   HInstruction* first = LoadLocal(instruction.VRegB(), Primitive::kPrimInt);
   HInstruction* second = GetIntConstant(instruction.VRegC_22s());
@@ -332,7 +356,7 @@
 }
 
 bool HGraphBuilder::BuildInvoke(const Instruction& instruction,
-                                uint32_t dex_offset,
+                                uint32_t dex_pc,
                                 uint32_t method_idx,
                                 uint32_t number_of_vreg_arguments,
                                 bool is_range,
@@ -380,7 +404,7 @@
     uintptr_t direct_method;
     int table_index;
     InvokeType optimized_invoke_type = invoke_type;
-    compiler_driver_->ComputeInvokeInfo(dex_compilation_unit_, dex_offset, true, true,
+    compiler_driver_->ComputeInvokeInfo(dex_compilation_unit_, dex_pc, true, true,
                                         &optimized_invoke_type, &target_method, &table_index,
                                         &direct_code, &direct_method);
     if (table_index == -1) {
@@ -389,29 +413,29 @@
 
     if (optimized_invoke_type == kVirtual) {
       invoke = new (arena_) HInvokeVirtual(
-          arena_, number_of_arguments, return_type, dex_offset, table_index);
+          arena_, number_of_arguments, return_type, dex_pc, table_index);
     } else if (optimized_invoke_type == kInterface) {
       invoke = new (arena_) HInvokeInterface(
-          arena_, number_of_arguments, return_type, dex_offset, method_idx, table_index);
+          arena_, number_of_arguments, return_type, dex_pc, method_idx, table_index);
     } else if (optimized_invoke_type == kDirect) {
       // For this compiler, sharpening only works if we compile PIC.
       DCHECK(compiler_driver_->GetCompilerOptions().GetCompilePic());
       // Treat invoke-direct like static calls for now.
       invoke = new (arena_) HInvokeStatic(
-          arena_, number_of_arguments, return_type, dex_offset, target_method.dex_method_index);
+          arena_, number_of_arguments, return_type, dex_pc, target_method.dex_method_index);
     }
   } else {
     DCHECK(invoke_type == kDirect || invoke_type == kStatic);
     // Treat invoke-direct like static calls for now.
     invoke = new (arena_) HInvokeStatic(
-        arena_, number_of_arguments, return_type, dex_offset, method_idx);
+        arena_, number_of_arguments, return_type, dex_pc, method_idx);
   }
 
   size_t start_index = 0;
-  Temporaries temps(graph_, is_instance_call ? 1 : 0);
+  Temporaries temps(graph_);
   if (is_instance_call) {
     HInstruction* arg = LoadLocal(is_range ? register_index : args[0], Primitive::kPrimNot);
-    HNullCheck* null_check = new (arena_) HNullCheck(arg, dex_offset);
+    HNullCheck* null_check = new (arena_) HNullCheck(arg, dex_pc);
     current_block_->AddInstruction(null_check);
     temps.Add(null_check);
     invoke->SetArgumentAt(0, null_check);
@@ -425,7 +449,7 @@
     bool is_wide = (type == Primitive::kPrimLong) || (type == Primitive::kPrimDouble);
     if (!is_range && is_wide && args[i] + 1 != args[i + 1]) {
       LOG(WARNING) << "Non sequential register pair in " << dex_compilation_unit_->GetSymbol()
-                   << " at " << dex_offset;
+                   << " at " << dex_pc;
       // We do not implement non sequential register pair.
       return false;
     }
@@ -443,7 +467,7 @@
 }
 
 bool HGraphBuilder::BuildInstanceFieldAccess(const Instruction& instruction,
-                                             uint32_t dex_offset,
+                                             uint32_t dex_pc,
                                              bool is_put) {
   uint32_t source_or_dest_reg = instruction.VRegA_22c();
   uint32_t obj_reg = instruction.VRegB_22c();
@@ -464,9 +488,9 @@
   Primitive::Type field_type = resolved_field->GetTypeAsPrimitiveType();
 
   HInstruction* object = LoadLocal(obj_reg, Primitive::kPrimNot);
-  current_block_->AddInstruction(new (arena_) HNullCheck(object, dex_offset));
+  current_block_->AddInstruction(new (arena_) HNullCheck(object, dex_pc));
   if (is_put) {
-    Temporaries temps(graph_, 1);
+    Temporaries temps(graph_);
     HInstruction* null_check = current_block_->GetLastInstruction();
     // We need one temporary for the null check.
     temps.Add(null_check);
@@ -490,7 +514,7 @@
 
 
 bool HGraphBuilder::BuildStaticFieldAccess(const Instruction& instruction,
-                                           uint32_t dex_offset,
+                                           uint32_t dex_pc,
                                            bool is_put) {
   uint32_t source_or_dest_reg = instruction.VRegA_21c();
   uint16_t field_index = instruction.VRegB_21c();
@@ -520,18 +544,18 @@
   }
 
   HLoadClass* constant = new (arena_) HLoadClass(
-      storage_index, is_referrers_class, dex_offset);
+      storage_index, is_referrers_class, dex_pc);
   current_block_->AddInstruction(constant);
 
   HInstruction* cls = constant;
   if (!is_initialized) {
-    cls = new (arena_) HClinitCheck(constant, dex_offset);
+    cls = new (arena_) HClinitCheck(constant, dex_pc);
     current_block_->AddInstruction(cls);
   }
 
   if (is_put) {
     // We need to keep the class alive before loading the value.
-    Temporaries temps(graph_, 1);
+    Temporaries temps(graph_);
     temps.Add(cls);
     HInstruction* value = LoadLocal(source_or_dest_reg, field_type);
     DCHECK_EQ(value->GetType(), field_type);
@@ -544,29 +568,41 @@
   return true;
 }
 
-void HGraphBuilder::BuildCheckedDiv(uint16_t out_reg,
-                                    uint16_t first_reg,
-                                    int32_t second_reg,
-                                    uint32_t dex_offset,
+void HGraphBuilder::BuildCheckedDiv(uint16_t out_vreg,
+                                    uint16_t first_vreg,
+                                    int64_t second_vreg_or_constant,
+                                    uint32_t dex_pc,
                                     Primitive::Type type,
-                                    bool second_is_lit) {
-  DCHECK(type == Primitive::kPrimInt);
+                                    bool second_is_constant) {
+  DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong);
 
-  HInstruction* first = LoadLocal(first_reg, type);
-  HInstruction* second = second_is_lit ? GetIntConstant(second_reg) : LoadLocal(second_reg, type);
-  if (!second->IsIntConstant() || (second->AsIntConstant()->GetValue() == 0)) {
-    second = new (arena_) HDivZeroCheck(second, dex_offset);
-    Temporaries temps(graph_, 1);
+  HInstruction* first = LoadLocal(first_vreg, type);
+  HInstruction* second = nullptr;
+  if (second_is_constant) {
+    if (type == Primitive::kPrimInt) {
+      second = GetIntConstant(second_vreg_or_constant);
+    } else {
+      second = GetLongConstant(second_vreg_or_constant);
+    }
+  } else {
+    second = LoadLocal(second_vreg_or_constant, type);
+  }
+
+  if (!second_is_constant
+      || (type == Primitive::kPrimInt && second->AsIntConstant()->GetValue() == 0)
+      || (type == Primitive::kPrimLong && second->AsLongConstant()->GetValue() == 0)) {
+    second = new (arena_) HDivZeroCheck(second, dex_pc);
+    Temporaries temps(graph_);
     current_block_->AddInstruction(second);
     temps.Add(current_block_->GetLastInstruction());
   }
 
-  current_block_->AddInstruction(new (arena_) HDiv(type, first, second));
-  UpdateLocal(out_reg, current_block_->GetLastInstruction());
+  current_block_->AddInstruction(new (arena_) HDiv(type, first, second, dex_pc));
+  UpdateLocal(out_vreg, current_block_->GetLastInstruction());
 }
 
 void HGraphBuilder::BuildArrayAccess(const Instruction& instruction,
-                                     uint32_t dex_offset,
+                                     uint32_t dex_pc,
                                      bool is_put,
                                      Primitive::Type anticipated_type) {
   uint8_t source_or_dest_reg = instruction.VRegA_23x();
@@ -574,10 +610,10 @@
   uint8_t index_reg = instruction.VRegC_23x();
 
   // We need one temporary for the null check, one for the index, and one for the length.
-  Temporaries temps(graph_, 3);
+  Temporaries temps(graph_);
 
   HInstruction* object = LoadLocal(array_reg, Primitive::kPrimNot);
-  object = new (arena_) HNullCheck(object, dex_offset);
+  object = new (arena_) HNullCheck(object, dex_pc);
   current_block_->AddInstruction(object);
   temps.Add(object);
 
@@ -585,28 +621,28 @@
   current_block_->AddInstruction(length);
   temps.Add(length);
   HInstruction* index = LoadLocal(index_reg, Primitive::kPrimInt);
-  index = new (arena_) HBoundsCheck(index, length, dex_offset);
+  index = new (arena_) HBoundsCheck(index, length, dex_pc);
   current_block_->AddInstruction(index);
   temps.Add(index);
   if (is_put) {
     HInstruction* value = LoadLocal(source_or_dest_reg, anticipated_type);
     // TODO: Insert a type check node if the type is Object.
     current_block_->AddInstruction(new (arena_) HArraySet(
-        object, index, value, anticipated_type, dex_offset));
+        object, index, value, anticipated_type, dex_pc));
   } else {
     current_block_->AddInstruction(new (arena_) HArrayGet(object, index, anticipated_type));
     UpdateLocal(source_or_dest_reg, current_block_->GetLastInstruction());
   }
 }
 
-void HGraphBuilder::BuildFilledNewArray(uint32_t dex_offset,
+void HGraphBuilder::BuildFilledNewArray(uint32_t dex_pc,
                                         uint32_t type_index,
                                         uint32_t number_of_vreg_arguments,
                                         bool is_range,
                                         uint32_t* args,
                                         uint32_t register_index) {
   HInstruction* length = GetIntConstant(number_of_vreg_arguments);
-  HInstruction* object = new (arena_) HNewArray(length, dex_offset, type_index);
+  HInstruction* object = new (arena_) HNewArray(length, dex_pc, type_index);
   current_block_->AddInstruction(object);
 
   const char* descriptor = dex_file_->StringByTypeIdx(type_index);
@@ -618,13 +654,13 @@
   bool is_reference_array = (primitive == 'L') || (primitive == '[');
   Primitive::Type type = is_reference_array ? Primitive::kPrimNot : Primitive::kPrimInt;
 
-  Temporaries temps(graph_, 1);
+  Temporaries temps(graph_);
   temps.Add(object);
   for (size_t i = 0; i < number_of_vreg_arguments; ++i) {
     HInstruction* value = LoadLocal(is_range ? register_index + i : args[i], type);
     HInstruction* index = GetIntConstant(i);
     current_block_->AddInstruction(
-        new (arena_) HArraySet(object, index, value, type, dex_offset));
+        new (arena_) HArraySet(object, index, value, type, dex_pc));
   }
   latest_result_ = object;
 }
@@ -634,26 +670,26 @@
                                        const T* data,
                                        uint32_t element_count,
                                        Primitive::Type anticipated_type,
-                                       uint32_t dex_offset) {
+                                       uint32_t dex_pc) {
   for (uint32_t i = 0; i < element_count; ++i) {
     HInstruction* index = GetIntConstant(i);
     HInstruction* value = GetIntConstant(data[i]);
     current_block_->AddInstruction(new (arena_) HArraySet(
-      object, index, value, anticipated_type, dex_offset));
+      object, index, value, anticipated_type, dex_pc));
   }
 }
 
-void HGraphBuilder::BuildFillArrayData(const Instruction& instruction, uint32_t dex_offset) {
-  Temporaries temps(graph_, 1);
+void HGraphBuilder::BuildFillArrayData(const Instruction& instruction, uint32_t dex_pc) {
+  Temporaries temps(graph_);
   HInstruction* array = LoadLocal(instruction.VRegA_31t(), Primitive::kPrimNot);
-  HNullCheck* null_check = new (arena_) HNullCheck(array, dex_offset);
+  HNullCheck* null_check = new (arena_) HNullCheck(array, dex_pc);
   current_block_->AddInstruction(null_check);
   temps.Add(null_check);
 
   HInstruction* length = new (arena_) HArrayLength(null_check);
   current_block_->AddInstruction(length);
 
-  int32_t payload_offset = instruction.VRegB_31t() + dex_offset;
+  int32_t payload_offset = instruction.VRegB_31t() + dex_pc;
   const Instruction::ArrayDataPayload* payload =
       reinterpret_cast<const Instruction::ArrayDataPayload*>(code_start_ + payload_offset);
   const uint8_t* data = payload->data;
@@ -662,7 +698,7 @@
   // Implementation of this DEX instruction seems to be that the bounds check is
   // done before doing any stores.
   HInstruction* last_index = GetIntConstant(payload->element_count - 1);
-  current_block_->AddInstruction(new (arena_) HBoundsCheck(last_index, length, dex_offset));
+  current_block_->AddInstruction(new (arena_) HBoundsCheck(last_index, length, dex_pc));
 
   switch (payload->element_width) {
     case 1:
@@ -670,27 +706,27 @@
                          reinterpret_cast<const int8_t*>(data),
                          element_count,
                          Primitive::kPrimByte,
-                         dex_offset);
+                         dex_pc);
       break;
     case 2:
       BuildFillArrayData(null_check,
                          reinterpret_cast<const int16_t*>(data),
                          element_count,
                          Primitive::kPrimShort,
-                         dex_offset);
+                         dex_pc);
       break;
     case 4:
       BuildFillArrayData(null_check,
                          reinterpret_cast<const int32_t*>(data),
                          element_count,
                          Primitive::kPrimInt,
-                         dex_offset);
+                         dex_pc);
       break;
     case 8:
       BuildFillWideArrayData(null_check,
                              reinterpret_cast<const int64_t*>(data),
                              element_count,
-                             dex_offset);
+                             dex_pc);
       break;
     default:
       LOG(FATAL) << "Unknown element width for " << payload->element_width;
@@ -700,12 +736,12 @@
 void HGraphBuilder::BuildFillWideArrayData(HInstruction* object,
                                            const int64_t* data,
                                            uint32_t element_count,
-                                           uint32_t dex_offset) {
+                                           uint32_t dex_pc) {
   for (uint32_t i = 0; i < element_count; ++i) {
     HInstruction* index = GetIntConstant(i);
     HInstruction* value = GetLongConstant(data[i]);
     current_block_->AddInstruction(new (arena_) HArraySet(
-      object, index, value, Primitive::kPrimLong, dex_offset));
+      object, index, value, Primitive::kPrimLong, dex_pc));
   }
 }
 
@@ -713,7 +749,7 @@
                                    uint8_t destination,
                                    uint8_t reference,
                                    uint16_t type_index,
-                                   uint32_t dex_offset) {
+                                   uint32_t dex_pc) {
   bool type_known_final;
   bool type_known_abstract;
   bool is_referrers_class;
@@ -724,32 +760,32 @@
     return false;
   }
   HInstruction* object = LoadLocal(reference, Primitive::kPrimNot);
-  HLoadClass* cls = new (arena_) HLoadClass(type_index, is_referrers_class, dex_offset);
+  HLoadClass* cls = new (arena_) HLoadClass(type_index, is_referrers_class, dex_pc);
   current_block_->AddInstruction(cls);
   // The class needs a temporary before being used by the type check.
-  Temporaries temps(graph_, 1);
+  Temporaries temps(graph_);
   temps.Add(cls);
   if (instruction.Opcode() == Instruction::INSTANCE_OF) {
     current_block_->AddInstruction(
-        new (arena_) HInstanceOf(object, cls, type_known_final, dex_offset));
+        new (arena_) HInstanceOf(object, cls, type_known_final, dex_pc));
     UpdateLocal(destination, current_block_->GetLastInstruction());
   } else {
     DCHECK_EQ(instruction.Opcode(), Instruction::CHECK_CAST);
     current_block_->AddInstruction(
-        new (arena_) HCheckCast(object, cls, type_known_final, dex_offset));
+        new (arena_) HCheckCast(object, cls, type_known_final, dex_pc));
   }
   return true;
 }
 
-void HGraphBuilder::PotentiallyAddSuspendCheck(int32_t target_offset, uint32_t dex_offset) {
+void HGraphBuilder::PotentiallyAddSuspendCheck(int32_t target_offset, uint32_t dex_pc) {
   if (target_offset <= 0) {
     // Unconditionnally add a suspend check to backward branches. We can remove
     // them after we recognize loops in the graph.
-    current_block_->AddInstruction(new (arena_) HSuspendCheck(dex_offset));
+    current_block_->AddInstruction(new (arena_) HSuspendCheck(dex_pc));
   }
 }
 
-bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction, uint32_t dex_offset) {
+bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction, uint32_t dex_pc) {
   if (current_block_ == nullptr) {
     return true;  // Dead code
   }
@@ -852,8 +888,8 @@
     }
 
 #define IF_XX(comparison, cond) \
-    case Instruction::IF_##cond: If_22t<comparison>(instruction, dex_offset); break; \
-    case Instruction::IF_##cond##Z: If_21t<comparison>(instruction, dex_offset); break
+    case Instruction::IF_##cond: If_22t<comparison>(instruction, dex_pc); break; \
+    case Instruction::IF_##cond##Z: If_21t<comparison>(instruction, dex_pc); break
 
     IF_XX(HEqual, EQ);
     IF_XX(HNotEqual, NE);
@@ -866,8 +902,8 @@
     case Instruction::GOTO_16:
     case Instruction::GOTO_32: {
       int32_t offset = instruction.GetTargetOffset();
-      PotentiallyAddSuspendCheck(offset, dex_offset);
-      HBasicBlock* target = FindBlockStartingAt(offset + dex_offset);
+      PotentiallyAddSuspendCheck(offset, dex_pc);
+      HBasicBlock* target = FindBlockStartingAt(offset + dex_pc);
       DCHECK(target != nullptr);
       current_block_->AddInstruction(new (arena_) HGoto());
       current_block_->AddSuccessor(target);
@@ -904,7 +940,7 @@
       uint32_t number_of_vreg_arguments = instruction.VRegA_35c();
       uint32_t args[5];
       instruction.GetVarArgs(args);
-      if (!BuildInvoke(instruction, dex_offset, method_idx,
+      if (!BuildInvoke(instruction, dex_pc, method_idx,
                        number_of_vreg_arguments, false, args, -1)) {
         return false;
       }
@@ -919,7 +955,7 @@
       uint32_t method_idx = instruction.VRegB_3rc();
       uint32_t number_of_vreg_arguments = instruction.VRegA_3rc();
       uint32_t register_index = instruction.VRegC();
-      if (!BuildInvoke(instruction, dex_offset, method_idx,
+      if (!BuildInvoke(instruction, dex_pc, method_idx,
                        number_of_vreg_arguments, true, nullptr, register_index)) {
         return false;
       }
@@ -1033,17 +1069,23 @@
 
     case Instruction::DIV_INT: {
       BuildCheckedDiv(instruction.VRegA(), instruction.VRegB(), instruction.VRegC(),
-                      dex_offset, Primitive::kPrimInt, false);
+                      dex_pc, Primitive::kPrimInt, false);
+      break;
+    }
+
+    case Instruction::DIV_LONG: {
+      BuildCheckedDiv(instruction.VRegA(), instruction.VRegB(), instruction.VRegC(),
+                      dex_pc, Primitive::kPrimLong, false);
       break;
     }
 
     case Instruction::DIV_FLOAT: {
-      Binop_23x<HDiv>(instruction, Primitive::kPrimFloat);
+      Binop_23x<HDiv>(instruction, Primitive::kPrimFloat, dex_pc);
       break;
     }
 
     case Instruction::DIV_DOUBLE: {
-      Binop_23x<HDiv>(instruction, Primitive::kPrimDouble);
+      Binop_23x<HDiv>(instruction, Primitive::kPrimDouble, dex_pc);
       break;
     }
 
@@ -1134,17 +1176,23 @@
 
     case Instruction::DIV_INT_2ADDR: {
       BuildCheckedDiv(instruction.VRegA(), instruction.VRegA(), instruction.VRegB(),
-                      dex_offset, Primitive::kPrimInt, false);
+                      dex_pc, Primitive::kPrimInt, false);
+      break;
+    }
+
+    case Instruction::DIV_LONG_2ADDR: {
+      BuildCheckedDiv(instruction.VRegA(), instruction.VRegA(), instruction.VRegB(),
+                      dex_pc, Primitive::kPrimLong, false);
       break;
     }
 
     case Instruction::DIV_FLOAT_2ADDR: {
-      Binop_12x<HDiv>(instruction, Primitive::kPrimFloat);
+      Binop_12x<HDiv>(instruction, Primitive::kPrimFloat, dex_pc);
       break;
     }
 
     case Instruction::DIV_DOUBLE_2ADDR: {
-      Binop_12x<HDiv>(instruction, Primitive::kPrimDouble);
+      Binop_12x<HDiv>(instruction, Primitive::kPrimDouble, dex_pc);
       break;
     }
 
@@ -1241,13 +1289,13 @@
     case Instruction::DIV_INT_LIT16:
     case Instruction::DIV_INT_LIT8: {
       BuildCheckedDiv(instruction.VRegA(), instruction.VRegB(), instruction.VRegC(),
-                      dex_offset, Primitive::kPrimInt, true);
+                      dex_pc, Primitive::kPrimInt, true);
       break;
     }
 
     case Instruction::NEW_INSTANCE: {
       current_block_->AddInstruction(
-          new (arena_) HNewInstance(dex_offset, instruction.VRegB_21c()));
+          new (arena_) HNewInstance(dex_pc, instruction.VRegB_21c()));
       UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction());
       break;
     }
@@ -1255,7 +1303,7 @@
     case Instruction::NEW_ARRAY: {
       HInstruction* length = LoadLocal(instruction.VRegB_22c(), Primitive::kPrimInt);
       current_block_->AddInstruction(
-          new (arena_) HNewArray(length, dex_offset, instruction.VRegC_22c()));
+          new (arena_) HNewArray(length, dex_pc, instruction.VRegC_22c()));
       UpdateLocal(instruction.VRegA_22c(), current_block_->GetLastInstruction());
       break;
     }
@@ -1265,7 +1313,7 @@
       uint32_t type_index = instruction.VRegB_35c();
       uint32_t args[5];
       instruction.GetVarArgs(args);
-      BuildFilledNewArray(dex_offset, type_index, number_of_vreg_arguments, false, args, 0);
+      BuildFilledNewArray(dex_pc, type_index, number_of_vreg_arguments, false, args, 0);
       break;
     }
 
@@ -1274,12 +1322,12 @@
       uint32_t type_index = instruction.VRegB_3rc();
       uint32_t register_index = instruction.VRegC_3rc();
       BuildFilledNewArray(
-          dex_offset, type_index, number_of_vreg_arguments, true, nullptr, register_index);
+          dex_pc, type_index, number_of_vreg_arguments, true, nullptr, register_index);
       break;
     }
 
     case Instruction::FILL_ARRAY_DATA: {
-      BuildFillArrayData(instruction, dex_offset);
+      BuildFillArrayData(instruction, dex_pc);
       break;
     }
 
@@ -1305,7 +1353,7 @@
     case Instruction::IGET_BYTE:
     case Instruction::IGET_CHAR:
     case Instruction::IGET_SHORT: {
-      if (!BuildInstanceFieldAccess(instruction, dex_offset, false)) {
+      if (!BuildInstanceFieldAccess(instruction, dex_pc, false)) {
         return false;
       }
       break;
@@ -1318,7 +1366,7 @@
     case Instruction::IPUT_BYTE:
     case Instruction::IPUT_CHAR:
     case Instruction::IPUT_SHORT: {
-      if (!BuildInstanceFieldAccess(instruction, dex_offset, true)) {
+      if (!BuildInstanceFieldAccess(instruction, dex_pc, true)) {
         return false;
       }
       break;
@@ -1331,7 +1379,7 @@
     case Instruction::SGET_BYTE:
     case Instruction::SGET_CHAR:
     case Instruction::SGET_SHORT: {
-      if (!BuildStaticFieldAccess(instruction, dex_offset, false)) {
+      if (!BuildStaticFieldAccess(instruction, dex_pc, false)) {
         return false;
       }
       break;
@@ -1344,7 +1392,7 @@
     case Instruction::SPUT_BYTE:
     case Instruction::SPUT_CHAR:
     case Instruction::SPUT_SHORT: {
-      if (!BuildStaticFieldAccess(instruction, dex_offset, true)) {
+      if (!BuildStaticFieldAccess(instruction, dex_pc, true)) {
         return false;
       }
       break;
@@ -1352,11 +1400,11 @@
 
 #define ARRAY_XX(kind, anticipated_type)                                          \
     case Instruction::AGET##kind: {                                               \
-      BuildArrayAccess(instruction, dex_offset, false, anticipated_type);         \
+      BuildArrayAccess(instruction, dex_pc, false, anticipated_type);         \
       break;                                                                      \
     }                                                                             \
     case Instruction::APUT##kind: {                                               \
-      BuildArrayAccess(instruction, dex_offset, true, anticipated_type);          \
+      BuildArrayAccess(instruction, dex_pc, true, anticipated_type);          \
       break;                                                                      \
     }
 
@@ -1372,7 +1420,7 @@
       HInstruction* object = LoadLocal(instruction.VRegB_12x(), Primitive::kPrimNot);
       // No need for a temporary for the null check, it is the only input of the following
       // instruction.
-      object = new (arena_) HNullCheck(object, dex_offset);
+      object = new (arena_) HNullCheck(object, dex_pc);
       current_block_->AddInstruction(object);
       current_block_->AddInstruction(new (arena_) HArrayLength(object));
       UpdateLocal(instruction.VRegA_12x(), current_block_->GetLastInstruction());
@@ -1380,13 +1428,13 @@
     }
 
     case Instruction::CONST_STRING: {
-      current_block_->AddInstruction(new (arena_) HLoadString(instruction.VRegB_21c(), dex_offset));
+      current_block_->AddInstruction(new (arena_) HLoadString(instruction.VRegB_21c(), dex_pc));
       UpdateLocal(instruction.VRegA_21c(), current_block_->GetLastInstruction());
       break;
     }
 
     case Instruction::CONST_STRING_JUMBO: {
-      current_block_->AddInstruction(new (arena_) HLoadString(instruction.VRegB_31c(), dex_offset));
+      current_block_->AddInstruction(new (arena_) HLoadString(instruction.VRegB_31c(), dex_pc));
       UpdateLocal(instruction.VRegA_31c(), current_block_->GetLastInstruction());
       break;
     }
@@ -1403,7 +1451,7 @@
         return false;
       }
       current_block_->AddInstruction(
-          new (arena_) HLoadClass(type_index, is_referrers_class, dex_offset));
+          new (arena_) HLoadClass(type_index, is_referrers_class, dex_pc));
       UpdateLocal(instruction.VRegA_21c(), current_block_->GetLastInstruction());
       break;
     }
@@ -1416,7 +1464,7 @@
 
     case Instruction::THROW: {
       HInstruction* exception = LoadLocal(instruction.VRegA_11x(), Primitive::kPrimNot);
-      current_block_->AddInstruction(new (arena_) HThrow(exception, dex_offset));
+      current_block_->AddInstruction(new (arena_) HThrow(exception, dex_pc));
       // A throw instruction must branch to the exit block.
       current_block_->AddSuccessor(exit_block_);
       // We finished building this block. Set the current block to null to avoid
@@ -1429,7 +1477,7 @@
       uint8_t destination = instruction.VRegA_22c();
       uint8_t reference = instruction.VRegB_22c();
       uint16_t type_index = instruction.VRegC_22c();
-      if (!BuildTypeCheck(instruction, destination, reference, type_index, dex_offset)) {
+      if (!BuildTypeCheck(instruction, destination, reference, type_index, dex_pc)) {
         return false;
       }
       break;
@@ -1438,7 +1486,7 @@
     case Instruction::CHECK_CAST: {
       uint8_t reference = instruction.VRegA_21c();
       uint16_t type_index = instruction.VRegB_21c();
-      if (!BuildTypeCheck(instruction, -1, reference, type_index, dex_offset)) {
+      if (!BuildTypeCheck(instruction, -1, reference, type_index, dex_pc)) {
         return false;
       }
       break;
@@ -1448,7 +1496,7 @@
       current_block_->AddInstruction(new (arena_) HMonitorOperation(
           LoadLocal(instruction.VRegA_11x(), Primitive::kPrimNot),
           HMonitorOperation::kEnter,
-          dex_offset));
+          dex_pc));
       break;
     }
 
@@ -1456,7 +1504,7 @@
       current_block_->AddInstruction(new (arena_) HMonitorOperation(
           LoadLocal(instruction.VRegA_11x(), Primitive::kPrimNot),
           HMonitorOperation::kExit,
-          dex_offset));
+          dex_pc));
       break;
     }
 
diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h
index 9cf8305..799e628 100644
--- a/compiler/optimizing/builder.h
+++ b/compiler/optimizing/builder.h
@@ -76,7 +76,7 @@
   // Analyzes the dex instruction and adds HInstruction to the graph
   // to execute that instruction. Returns whether the instruction can
   // be handled.
-  bool AnalyzeDexInstruction(const Instruction& instruction, uint32_t dex_offset);
+  bool AnalyzeDexInstruction(const Instruction& instruction, uint32_t dex_pc);
 
   // Finds all instructions that start a new block, and populates branch_targets_ with
   // the newly created blocks.
@@ -92,7 +92,7 @@
   HLocal* GetLocalAt(int register_index) const;
   void UpdateLocal(int register_index, HInstruction* instruction) const;
   HInstruction* LoadLocal(int register_index, Primitive::Type type) const;
-  void PotentiallyAddSuspendCheck(int32_t target_offset, uint32_t dex_offset);
+  void PotentiallyAddSuspendCheck(int32_t target_offset, uint32_t dex_pc);
   void InitializeParameters(uint16_t number_of_parameters);
 
   template<typename T>
@@ -102,16 +102,22 @@
   void Binop_23x(const Instruction& instruction, Primitive::Type type);
 
   template<typename T>
+  void Binop_23x(const Instruction& instruction, Primitive::Type type, uint32_t dex_pc);
+
+  template<typename T>
   void Binop_12x(const Instruction& instruction, Primitive::Type type);
 
   template<typename T>
+  void Binop_12x(const Instruction& instruction, Primitive::Type type, uint32_t dex_pc);
+
+  template<typename T>
   void Binop_22b(const Instruction& instruction, bool reverse);
 
   template<typename T>
   void Binop_22s(const Instruction& instruction, bool reverse);
 
-  template<typename T> void If_21t(const Instruction& instruction, uint32_t dex_offset);
-  template<typename T> void If_22t(const Instruction& instruction, uint32_t dex_offset);
+  template<typename T> void If_21t(const Instruction& instruction, uint32_t dex_pc);
+  template<typename T> void If_22t(const Instruction& instruction, uint32_t dex_pc);
 
   void Conversion_12x(const Instruction& instruction,
                       Primitive::Type input_type,
@@ -119,27 +125,27 @@
 
   void BuildCheckedDiv(uint16_t out_reg,
                        uint16_t first_reg,
-                       int32_t second_reg,  // can be a constant
-                       uint32_t dex_offset,
+                       int64_t second_reg_or_constant,
+                       uint32_t dex_pc,
                        Primitive::Type type,
                        bool second_is_lit);
 
   void BuildReturn(const Instruction& instruction, Primitive::Type type);
 
   // Builds an instance field access node and returns whether the instruction is supported.
-  bool BuildInstanceFieldAccess(const Instruction& instruction, uint32_t dex_offset, bool is_put);
+  bool BuildInstanceFieldAccess(const Instruction& instruction, uint32_t dex_pc, bool is_put);
 
   // Builds a static field access node and returns whether the instruction is supported.
-  bool BuildStaticFieldAccess(const Instruction& instruction, uint32_t dex_offset, bool is_put);
+  bool BuildStaticFieldAccess(const Instruction& instruction, uint32_t dex_pc, bool is_put);
 
   void BuildArrayAccess(const Instruction& instruction,
-                        uint32_t dex_offset,
+                        uint32_t dex_pc,
                         bool is_get,
                         Primitive::Type anticipated_type);
 
   // Builds an invocation node and returns whether the instruction is supported.
   bool BuildInvoke(const Instruction& instruction,
-                   uint32_t dex_offset,
+                   uint32_t dex_pc,
                    uint32_t method_idx,
                    uint32_t number_of_vreg_arguments,
                    bool is_range,
@@ -147,14 +153,14 @@
                    uint32_t register_index);
 
   // Builds a new array node and the instructions that fill it.
-  void BuildFilledNewArray(uint32_t dex_offset,
+  void BuildFilledNewArray(uint32_t dex_pc,
                            uint32_t type_index,
                            uint32_t number_of_vreg_arguments,
                            bool is_range,
                            uint32_t* args,
                            uint32_t register_index);
 
-  void BuildFillArrayData(const Instruction& instruction, uint32_t dex_offset);
+  void BuildFillArrayData(const Instruction& instruction, uint32_t dex_pc);
 
   // Fills the given object with data as specified in the fill-array-data
   // instruction. Currently only used for non-reference and non-floating point
@@ -164,14 +170,14 @@
                           const T* data,
                           uint32_t element_count,
                           Primitive::Type anticipated_type,
-                          uint32_t dex_offset);
+                          uint32_t dex_pc);
 
   // Fills the given object with data as specified in the fill-array-data
   // instruction. The data must be for long and double arrays.
   void BuildFillWideArrayData(HInstruction* object,
                               const int64_t* data,
                               uint32_t element_count,
-                              uint32_t dex_offset);
+                              uint32_t dex_pc);
 
   // Builds a `HInstanceOf`, or a `HCheckCast` instruction.
   // Returns whether we succeeded in building the instruction.
@@ -179,7 +185,7 @@
                       uint8_t destination,
                       uint8_t reference,
                       uint16_t type_index,
-                      uint32_t dex_offset);
+                      uint32_t dex_pc);
 
   ArenaAllocator* const arena_;
 
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 6b5ec1d..4d71cb7 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -51,7 +51,7 @@
     MarkNotLeaf();
   }
   ComputeFrameSize(GetGraph()->GetNumberOfLocalVRegs()
-                     + GetGraph()->GetNumberOfTemporaries()
+                     + GetGraph()->GetTemporariesVRegSlots()
                      + 1 /* filler */,
                    0, /* the baseline compiler does not have live registers at slow path */
                    GetGraph()->GetMaximumNumberOfOutVRegs()
@@ -150,12 +150,15 @@
 
 Location CodeGenerator::GetTemporaryLocation(HTemporary* temp) const {
   uint16_t number_of_locals = GetGraph()->GetNumberOfLocalVRegs();
+  // The type of the previous instruction tells us if we need a single or double stack slot.
+  Primitive::Type type = temp->GetType();
+  int32_t temp_size = (type == Primitive::kPrimLong) || (type == Primitive::kPrimDouble) ? 2 : 1;
   // Use the temporary region (right below the dex registers).
   int32_t slot = GetFrameSize() - FrameEntrySpillSize()
                                 - kVRegSize  // filler
                                 - (number_of_locals * kVRegSize)
-                                - ((1 + temp->GetIndex()) * kVRegSize);
-  return Location::StackSlot(slot);
+                                - ((temp_size + temp->GetIndex()) * kVRegSize);
+  return temp_size == 2 ? Location::DoubleStackSlot(slot) : Location::StackSlot(slot);
 }
 
 int32_t CodeGenerator::GetStackSlot(HLocal* local) const {
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 6f5a3cb..56cd75f 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -41,7 +41,7 @@
 static constexpr int kNumberOfPushedRegistersAtEntry = 1 + 2;  // LR, R6, R7
 static constexpr int kCurrentMethodStackOffset = 0;
 
-static constexpr Register kRuntimeParameterCoreRegisters[] = { R0, R1, R2 };
+static constexpr Register kRuntimeParameterCoreRegisters[] = { R0, R1, R2, R3 };
 static constexpr size_t kRuntimeParameterCoreRegistersLength =
     arraysize(kRuntimeParameterCoreRegisters);
 static constexpr SRegister kRuntimeParameterFpuRegisters[] = { };
@@ -670,13 +670,13 @@
       __ LoadSFromOffset(destination.As<SRegister>(), SP, source.GetStackIndex());
     }
   } else {
-    DCHECK(destination.IsStackSlot());
+    DCHECK(destination.IsStackSlot()) << destination;
     if (source.IsRegister()) {
       __ StoreToOffset(kStoreWord, source.As<Register>(), SP, destination.GetStackIndex());
     } else if (source.IsFpuRegister()) {
       __ StoreSToOffset(source.As<SRegister>(), SP, destination.GetStackIndex());
     } else {
-      DCHECK(source.IsStackSlot());
+      DCHECK(source.IsStackSlot()) << source;
       __ LoadFromOffset(kLoadWord, IP, SP, source.GetStackIndex());
       __ StoreToOffset(kStoreWord, IP, SP, destination.GetStackIndex());
     }
@@ -778,26 +778,29 @@
     return;
   }
 
-  if (instruction->IsIntConstant()) {
-    int32_t value = instruction->AsIntConstant()->GetValue();
-    if (location.IsRegister()) {
-      __ LoadImmediate(location.As<Register>(), value);
-    } else {
-      DCHECK(location.IsStackSlot());
-      __ LoadImmediate(IP, value);
-      __ StoreToOffset(kStoreWord, IP, SP, location.GetStackIndex());
-    }
-  } else if (instruction->IsLongConstant()) {
-    int64_t value = instruction->AsLongConstant()->GetValue();
-    if (location.IsRegisterPair()) {
-      __ LoadImmediate(location.AsRegisterPairLow<Register>(), Low32Bits(value));
-      __ LoadImmediate(location.AsRegisterPairHigh<Register>(), High32Bits(value));
-    } else {
-      DCHECK(location.IsDoubleStackSlot());
-      __ LoadImmediate(IP, Low32Bits(value));
-      __ StoreToOffset(kStoreWord, IP, SP, location.GetStackIndex());
-      __ LoadImmediate(IP, High32Bits(value));
-      __ StoreToOffset(kStoreWord, IP, SP, location.GetHighStackIndex(kArmWordSize));
+  if (locations != nullptr && locations->Out().IsConstant()) {
+    HConstant* const_to_move = locations->Out().GetConstant();
+    if (const_to_move->IsIntConstant()) {
+      int32_t value = const_to_move->AsIntConstant()->GetValue();
+      if (location.IsRegister()) {
+        __ LoadImmediate(location.As<Register>(), value);
+      } else {
+        DCHECK(location.IsStackSlot());
+        __ LoadImmediate(IP, value);
+        __ StoreToOffset(kStoreWord, IP, SP, location.GetStackIndex());
+      }
+    } else if (const_to_move->IsLongConstant()) {
+      int64_t value = const_to_move->AsLongConstant()->GetValue();
+      if (location.IsRegisterPair()) {
+        __ LoadImmediate(location.AsRegisterPairLow<Register>(), Low32Bits(value));
+        __ LoadImmediate(location.AsRegisterPairHigh<Register>(), High32Bits(value));
+      } else {
+        DCHECK(location.IsDoubleStackSlot());
+        __ LoadImmediate(IP, Low32Bits(value));
+        __ StoreToOffset(kStoreWord, IP, SP, location.GetStackIndex());
+        __ LoadImmediate(IP, High32Bits(value));
+        __ StoreToOffset(kStoreWord, IP, SP, location.GetHighStackIndex(kArmWordSize));
+      }
     }
   } else if (instruction->IsLoadLocal()) {
     uint32_t stack_slot = GetStackSlot(instruction->AsLoadLocal()->GetLocal());
@@ -822,7 +825,12 @@
     }
   } else if (instruction->IsTemporary()) {
     Location temp_location = GetTemporaryLocation(instruction->AsTemporary());
-    Move32(location, temp_location);
+    if (temp_location.IsStackSlot()) {
+      Move32(location, temp_location);
+    } else {
+      DCHECK(temp_location.IsDoubleStackSlot());
+      Move64(location, temp_location);
+    }
   } else {
     DCHECK((instruction->GetNext() == move_for) || instruction->GetNext()->IsTemporary());
     switch (instruction->GetType()) {
@@ -1693,8 +1701,11 @@
 }
 
 void LocationsBuilderARM::VisitDiv(HDiv* div) {
-  LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(div, LocationSummary::kNoCall);
+  LocationSummary::CallKind call_kind = div->GetResultType() == Primitive::kPrimLong
+      ? LocationSummary::kCall
+      : LocationSummary::kNoCall;
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(div, call_kind);
+
   switch (div->GetResultType()) {
     case Primitive::kPrimInt: {
       locations->SetInAt(0, Location::RequiresRegister());
@@ -1703,7 +1714,13 @@
       break;
     }
     case Primitive::kPrimLong: {
-      LOG(FATAL) << "Not implemented div type" << div->GetResultType();
+      InvokeRuntimeCallingConvention calling_convention;
+      locations->SetInAt(0, Location::RegisterPairLocation(
+          calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1)));
+      locations->SetInAt(1, Location::RegisterPairLocation(
+          calling_convention.GetRegisterAt(2), calling_convention.GetRegisterAt(3)));
+      // The runtime helper puts the output in R0,R2.
+      locations->SetOut(Location::RegisterPairLocation(R0, R2));
       break;
     }
     case Primitive::kPrimFloat:
@@ -1732,7 +1749,15 @@
     }
 
     case Primitive::kPrimLong: {
-      LOG(FATAL) << "Not implemented div type" << div->GetResultType();
+      InvokeRuntimeCallingConvention calling_convention;
+      DCHECK_EQ(calling_convention.GetRegisterAt(0), first.AsRegisterPairLow<Register>());
+      DCHECK_EQ(calling_convention.GetRegisterAt(1), first.AsRegisterPairHigh<Register>());
+      DCHECK_EQ(calling_convention.GetRegisterAt(2), second.AsRegisterPairLow<Register>());
+      DCHECK_EQ(calling_convention.GetRegisterAt(3), second.AsRegisterPairHigh<Register>());
+      DCHECK_EQ(R0, out.AsRegisterPairLow<Register>());
+      DCHECK_EQ(R2, out.AsRegisterPairHigh<Register>());
+
+      codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pLdiv), div, div->GetDexPc());
       break;
     }
 
@@ -1756,7 +1781,7 @@
 void LocationsBuilderARM::VisitDivZeroCheck(HDivZeroCheck* instruction) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
-  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
   if (instruction->HasUses()) {
     locations->SetOut(Location::SameAsFirstInput());
   }
@@ -1769,9 +1794,36 @@
   LocationSummary* locations = instruction->GetLocations();
   Location value = locations->InAt(0);
 
-  DCHECK(value.IsRegister()) << value;
-  __ cmp(value.As<Register>(), ShifterOperand(0));
-  __ b(slow_path->GetEntryLabel(), EQ);
+  switch (instruction->GetType()) {
+    case Primitive::kPrimInt: {
+      if (value.IsRegister()) {
+        __ cmp(value.As<Register>(), ShifterOperand(0));
+        __ b(slow_path->GetEntryLabel(), EQ);
+      } else {
+        DCHECK(value.IsConstant()) << value;
+        if (value.GetConstant()->AsIntConstant()->GetValue() == 0) {
+          __ b(slow_path->GetEntryLabel());
+        }
+      }
+      break;
+    }
+    case Primitive::kPrimLong: {
+      if (value.IsRegisterPair()) {
+        __ orrs(IP,
+                value.AsRegisterPairLow<Register>(),
+                ShifterOperand(value.AsRegisterPairHigh<Register>()));
+        __ b(slow_path->GetEntryLabel(), EQ);
+      } else {
+        DCHECK(value.IsConstant()) << value;
+        if (value.GetConstant()->AsLongConstant()->GetValue() == 0) {
+          __ b(slow_path->GetEntryLabel());
+        }
+      }
+      break;
+    default:
+      LOG(FATAL) << "Unexpected type for HDivZeroCheck " << instruction->GetType();
+    }
+  }
 }
 
 void LocationsBuilderARM::VisitNewInstance(HNewInstance* instruction) {
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 100f380..79445b6 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -36,7 +36,7 @@
 static constexpr int kNumberOfPushedRegistersAtEntry = 1;
 static constexpr int kCurrentMethodStackOffset = 0;
 
-static constexpr Register kRuntimeParameterCoreRegisters[] = { EAX, ECX, EDX };
+static constexpr Register kRuntimeParameterCoreRegisters[] = { EAX, ECX, EDX, EBX };
 static constexpr size_t kRuntimeParameterCoreRegistersLength =
     arraysize(kRuntimeParameterCoreRegisters);
 static constexpr XmmRegister kRuntimeParameterFpuRegisters[] = { };
@@ -575,7 +575,7 @@
       __ movss(destination.As<XmmRegister>(), Address(ESP, source.GetStackIndex()));
     }
   } else {
-    DCHECK(destination.IsStackSlot());
+    DCHECK(destination.IsStackSlot()) << destination;
     if (source.IsRegister()) {
       __ movl(Address(ESP, destination.GetStackIndex()), source.As<Register>());
     } else if (source.IsFpuRegister()) {
@@ -636,7 +636,7 @@
       LOG(FATAL) << "Unimplemented";
     }
   } else {
-    DCHECK(destination.IsDoubleStackSlot());
+    DCHECK(destination.IsDoubleStackSlot()) << destination;
     if (source.IsRegisterPair()) {
       __ movl(Address(ESP, destination.GetStackIndex()), source.AsRegisterPairLow<Register>());
       __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)),
@@ -662,31 +662,44 @@
 }
 
 void CodeGeneratorX86::Move(HInstruction* instruction, Location location, HInstruction* move_for) {
-  if (instruction->IsIntConstant()) {
-    Immediate imm(instruction->AsIntConstant()->GetValue());
-    if (location.IsRegister()) {
-      __ movl(location.As<Register>(), imm);
-    } else if (location.IsStackSlot()) {
-      __ movl(Address(ESP, location.GetStackIndex()), imm);
-    } else {
-      DCHECK(location.IsConstant());
-      DCHECK_EQ(location.GetConstant(), instruction);
-    }
-  } else if (instruction->IsLongConstant()) {
-    int64_t value = instruction->AsLongConstant()->GetValue();
-    if (location.IsRegister()) {
-      __ movl(location.AsRegisterPairLow<Register>(), Immediate(Low32Bits(value)));
-      __ movl(location.AsRegisterPairHigh<Register>(), Immediate(High32Bits(value)));
-    } else if (location.IsDoubleStackSlot()) {
-      __ movl(Address(ESP, location.GetStackIndex()), Immediate(Low32Bits(value)));
-      __ movl(Address(ESP, location.GetHighStackIndex(kX86WordSize)), Immediate(High32Bits(value)));
-    } else {
-      DCHECK(location.IsConstant());
-      DCHECK_EQ(location.GetConstant(), instruction);
+  LocationSummary* locations = instruction->GetLocations();
+  if (locations != nullptr && locations->Out().Equals(location)) {
+    return;
+  }
+
+  if (locations != nullptr && locations->Out().IsConstant()) {
+    HConstant* const_to_move = locations->Out().GetConstant();
+    if (const_to_move->IsIntConstant()) {
+      Immediate imm(const_to_move->AsIntConstant()->GetValue());
+      if (location.IsRegister()) {
+        __ movl(location.As<Register>(), imm);
+      } else if (location.IsStackSlot()) {
+        __ movl(Address(ESP, location.GetStackIndex()), imm);
+      } else {
+        DCHECK(location.IsConstant());
+        DCHECK_EQ(location.GetConstant(), const_to_move);
+      }
+    } else if (const_to_move->IsLongConstant()) {
+      int64_t value = const_to_move->AsLongConstant()->GetValue();
+      if (location.IsRegisterPair()) {
+        __ movl(location.AsRegisterPairLow<Register>(), Immediate(Low32Bits(value)));
+        __ movl(location.AsRegisterPairHigh<Register>(), Immediate(High32Bits(value)));
+      } else if (location.IsDoubleStackSlot()) {
+        __ movl(Address(ESP, location.GetStackIndex()), Immediate(Low32Bits(value)));
+        __ movl(Address(ESP, location.GetHighStackIndex(kX86WordSize)), Immediate(High32Bits(value)));
+      } else {
+        DCHECK(location.IsConstant());
+        DCHECK_EQ(location.GetConstant(), instruction);
+      }
     }
   } else if (instruction->IsTemporary()) {
     Location temp_location = GetTemporaryLocation(instruction->AsTemporary());
-    Move32(location, temp_location);
+    if (temp_location.IsStackSlot()) {
+      Move32(location, temp_location);
+    } else {
+      DCHECK(temp_location.IsDoubleStackSlot());
+      Move64(location, temp_location);
+    }
   } else if (instruction->IsLoadLocal()) {
     int slot = GetStackSlot(instruction->AsLoadLocal()->GetLocal());
     switch (instruction->GetType()) {
@@ -718,12 +731,12 @@
       case Primitive::kPrimInt:
       case Primitive::kPrimNot:
       case Primitive::kPrimFloat:
-        Move32(location, instruction->GetLocations()->Out());
+        Move32(location, locations->Out());
         break;
 
       case Primitive::kPrimLong:
       case Primitive::kPrimDouble:
-        Move64(location, instruction->GetLocations()->Out());
+        Move64(location, locations->Out());
         break;
 
       default:
@@ -1661,8 +1674,11 @@
 }
 
 void LocationsBuilderX86::VisitDiv(HDiv* div) {
-  LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(div, LocationSummary::kNoCall);
+  LocationSummary::CallKind call_kind = div->GetResultType() == Primitive::kPrimLong
+      ? LocationSummary::kCall
+      : LocationSummary::kNoCall;
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(div, call_kind);
+
   switch (div->GetResultType()) {
     case Primitive::kPrimInt: {
       locations->SetInAt(0, Location::RegisterLocation(EAX));
@@ -1673,7 +1689,13 @@
       break;
     }
     case Primitive::kPrimLong: {
-      LOG(FATAL) << "Not implemented div type" << div->GetResultType();
+      InvokeRuntimeCallingConvention calling_convention;
+      locations->SetInAt(0, Location::RegisterPairLocation(
+          calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1)));
+      locations->SetInAt(1, Location::RegisterPairLocation(
+          calling_convention.GetRegisterAt(2), calling_convention.GetRegisterAt(3)));
+      // Runtime helper puts the result in EAX, EDX.
+      locations->SetOut(Location::RegisterPairLocation(EAX, EDX));
       break;
     }
     case Primitive::kPrimFloat:
@@ -1691,12 +1713,13 @@
 
 void InstructionCodeGeneratorX86::VisitDiv(HDiv* div) {
   LocationSummary* locations = div->GetLocations();
+  Location out = locations->Out();
   Location first = locations->InAt(0);
   Location second = locations->InAt(1);
-  DCHECK(first.Equals(locations->Out()));
 
   switch (div->GetResultType()) {
     case Primitive::kPrimInt: {
+      DCHECK(first.Equals(out));
       Register first_reg = first.As<Register>();
       Register second_reg = second.As<Register>();
       DCHECK_EQ(EAX, first_reg);
@@ -1723,16 +1746,28 @@
     }
 
     case Primitive::kPrimLong: {
-      LOG(FATAL) << "Not implemented div type" << div->GetResultType();
+      InvokeRuntimeCallingConvention calling_convention;
+      DCHECK_EQ(calling_convention.GetRegisterAt(0), first.AsRegisterPairLow<Register>());
+      DCHECK_EQ(calling_convention.GetRegisterAt(1), first.AsRegisterPairHigh<Register>());
+      DCHECK_EQ(calling_convention.GetRegisterAt(2), second.AsRegisterPairLow<Register>());
+      DCHECK_EQ(calling_convention.GetRegisterAt(3), second.AsRegisterPairHigh<Register>());
+      DCHECK_EQ(EAX, out.AsRegisterPairLow<Register>());
+      DCHECK_EQ(EDX, out.AsRegisterPairHigh<Register>());
+
+      __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pLdiv)));
+      codegen_->RecordPcInfo(div, div->GetDexPc());
+
       break;
     }
 
     case Primitive::kPrimFloat: {
+      DCHECK(first.Equals(out));
       __ divss(first.As<XmmRegister>(), second.As<XmmRegister>());
       break;
     }
 
     case Primitive::kPrimDouble: {
+      DCHECK(first.Equals(out));
       __ divsd(first.As<XmmRegister>(), second.As<XmmRegister>());
       break;
     }
@@ -1745,7 +1780,21 @@
 void LocationsBuilderX86::VisitDivZeroCheck(HDivZeroCheck* instruction) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
-  locations->SetInAt(0, Location::Any());
+  switch (instruction->GetType()) {
+    case Primitive::kPrimInt: {
+      locations->SetInAt(0, Location::Any());
+      break;
+    }
+    case Primitive::kPrimLong: {
+      locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
+      if (!instruction->IsConstant()) {
+        locations->AddTemp(Location::RequiresRegister());
+      }
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unexpected type for HDivZeroCheck " << instruction->GetType();
+  }
   if (instruction->HasUses()) {
     locations->SetOut(Location::SameAsFirstInput());
   }
@@ -1758,18 +1807,39 @@
   LocationSummary* locations = instruction->GetLocations();
   Location value = locations->InAt(0);
 
-  if (value.IsRegister()) {
-    __ testl(value.As<Register>(), value.As<Register>());
-  } else if (value.IsStackSlot()) {
-    __ cmpl(Address(ESP, value.GetStackIndex()), Immediate(0));
-  } else {
-    DCHECK(value.IsConstant()) << value;
-    if (value.GetConstant()->AsIntConstant()->GetValue() == 0) {
-    __ jmp(slow_path->GetEntryLabel());
+  switch (instruction->GetType()) {
+    case Primitive::kPrimInt: {
+      if (value.IsRegister()) {
+        __ testl(value.As<Register>(), value.As<Register>());
+        __ j(kEqual, slow_path->GetEntryLabel());
+      } else if (value.IsStackSlot()) {
+        __ cmpl(Address(ESP, value.GetStackIndex()), Immediate(0));
+        __ j(kEqual, slow_path->GetEntryLabel());
+      } else {
+        DCHECK(value.IsConstant()) << value;
+        if (value.GetConstant()->AsIntConstant()->GetValue() == 0) {
+        __ jmp(slow_path->GetEntryLabel());
+        }
+      }
+      break;
     }
-    return;
+    case Primitive::kPrimLong: {
+      if (value.IsRegisterPair()) {
+        Register temp = locations->GetTemp(0).As<Register>();
+        __ movl(temp, value.AsRegisterPairLow<Register>());
+        __ orl(temp, value.AsRegisterPairHigh<Register>());
+        __ j(kEqual, slow_path->GetEntryLabel());
+      } else {
+        DCHECK(value.IsConstant()) << value;
+        if (value.GetConstant()->AsLongConstant()->GetValue() == 0) {
+          __ jmp(slow_path->GetEntryLabel());
+        }
+      }
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unexpected type for HDivZeroCheck" << instruction->GetType();
   }
-  __ j(kEqual, slow_path->GetEntryLabel());
 }
 
 void LocationsBuilderX86::VisitNewInstance(HNewInstance* instruction) {
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 6f3b161..300daa3 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -108,16 +108,23 @@
 
 class DivMinusOneSlowPathX86_64 : public SlowPathCodeX86_64 {
  public:
-  explicit DivMinusOneSlowPathX86_64(Register reg) : reg_(reg) {}
+  explicit DivMinusOneSlowPathX86_64(Register reg, Primitive::Type type)
+      : reg_(reg), type_(type) {}
 
   virtual void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     __ Bind(GetEntryLabel());
-    __ negl(CpuRegister(reg_));
+    if (type_ == Primitive::kPrimInt) {
+      __ negl(CpuRegister(reg_));
+    } else {
+      DCHECK_EQ(Primitive::kPrimLong, type_);
+      __ negq(CpuRegister(reg_));
+    }
     __ jmp(GetExitLabel());
   }
 
  private:
-  Register reg_;
+  const Register reg_;
+  const Primitive::Type type_;
   DISALLOW_COPY_AND_ASSIGN(DivMinusOneSlowPathX86_64);
 };
 
@@ -577,26 +584,34 @@
 void CodeGeneratorX86_64::Move(HInstruction* instruction,
                                Location location,
                                HInstruction* move_for) {
-  if (instruction->IsIntConstant()) {
-    Immediate imm(instruction->AsIntConstant()->GetValue());
-    if (location.IsRegister()) {
-      __ movl(location.As<CpuRegister>(), imm);
-    } else if (location.IsStackSlot()) {
-      __ movl(Address(CpuRegister(RSP), location.GetStackIndex()), imm);
-    } else {
-      DCHECK(location.IsConstant());
-      DCHECK_EQ(location.GetConstant(), instruction);
-    }
-  } else if (instruction->IsLongConstant()) {
-    int64_t value = instruction->AsLongConstant()->GetValue();
-    if (location.IsRegister()) {
-      __ movq(location.As<CpuRegister>(), Immediate(value));
-    } else if (location.IsDoubleStackSlot()) {
-      __ movq(CpuRegister(TMP), Immediate(value));
-      __ movq(Address(CpuRegister(RSP), location.GetStackIndex()), CpuRegister(TMP));
-    } else {
-      DCHECK(location.IsConstant());
-      DCHECK_EQ(location.GetConstant(), instruction);
+  LocationSummary* locations = instruction->GetLocations();
+  if (locations != nullptr && locations->Out().Equals(location)) {
+    return;
+  }
+
+  if (locations != nullptr && locations->Out().IsConstant()) {
+    HConstant* const_to_move = locations->Out().GetConstant();
+    if (const_to_move->IsIntConstant()) {
+      Immediate imm(const_to_move->AsIntConstant()->GetValue());
+      if (location.IsRegister()) {
+        __ movl(location.As<CpuRegister>(), imm);
+      } else if (location.IsStackSlot()) {
+        __ movl(Address(CpuRegister(RSP), location.GetStackIndex()), imm);
+      } else {
+        DCHECK(location.IsConstant());
+        DCHECK_EQ(location.GetConstant(), const_to_move);
+      }
+    } else if (const_to_move->IsLongConstant()) {
+      int64_t value = const_to_move->AsLongConstant()->GetValue();
+      if (location.IsRegister()) {
+        __ movq(location.As<CpuRegister>(), Immediate(value));
+      } else if (location.IsDoubleStackSlot()) {
+        __ movq(CpuRegister(TMP), Immediate(value));
+        __ movq(Address(CpuRegister(RSP), location.GetStackIndex()), CpuRegister(TMP));
+      } else {
+        DCHECK(location.IsConstant());
+        DCHECK_EQ(location.GetConstant(), const_to_move);
+      }
     }
   } else if (instruction->IsLoadLocal()) {
     switch (instruction->GetType()) {
@@ -633,7 +648,7 @@
       case Primitive::kPrimLong:
       case Primitive::kPrimFloat:
       case Primitive::kPrimDouble:
-        Move(location, instruction->GetLocations()->Out());
+        Move(location, locations->Out());
         break;
 
       default:
@@ -1608,7 +1623,8 @@
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(div, LocationSummary::kNoCall);
   switch (div->GetResultType()) {
-    case Primitive::kPrimInt: {
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong: {
       locations->SetInAt(0, Location::RegisterLocation(RAX));
       locations->SetInAt(1, Location::RequiresRegister());
       locations->SetOut(Location::SameAsFirstInput());
@@ -1616,10 +1632,7 @@
       locations->AddTemp(Location::RegisterLocation(RDX));
       break;
     }
-    case Primitive::kPrimLong: {
-      LOG(FATAL) << "Not implemented div type" << div->GetResultType();
-      break;
-    }
+
     case Primitive::kPrimFloat:
     case Primitive::kPrimDouble: {
       locations->SetInAt(0, Location::RequiresFpuRegister());
@@ -1639,38 +1652,42 @@
   Location second = locations->InAt(1);
   DCHECK(first.Equals(locations->Out()));
 
-  switch (div->GetResultType()) {
-    case Primitive::kPrimInt: {
+  Primitive::Type type = div->GetResultType();
+  switch (type) {
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong: {
       CpuRegister first_reg = first.As<CpuRegister>();
       CpuRegister second_reg = second.As<CpuRegister>();
       DCHECK_EQ(RAX,  first_reg.AsRegister());
       DCHECK_EQ(RDX, locations->GetTemp(0).As<CpuRegister>().AsRegister());
 
       SlowPathCodeX86_64* slow_path =
-          new (GetGraph()->GetArena()) DivMinusOneSlowPathX86_64(first_reg.AsRegister());
+          new (GetGraph()->GetArena()) DivMinusOneSlowPathX86_64(first_reg.AsRegister(), type);
       codegen_->AddSlowPath(slow_path);
 
-      // 0x80000000/-1 triggers an arithmetic exception!
-      // Dividing by -1 is actually negation and -0x800000000 = 0x80000000 so
-      // it's safe to just use negl instead of more complex comparisons.
+      // 0x80000000(00000000)/-1 triggers an arithmetic exception!
+      // Dividing by -1 is actually negation and -0x800000000(00000000) = 0x80000000(00000000)
+      // so it's safe to just use negl instead of more complex comparisons.
 
       __ cmpl(second_reg, Immediate(-1));
       __ j(kEqual, slow_path->GetEntryLabel());
 
-      // edx:eax <- sign-extended of eax
-      __ cdq();
-      // eax = quotient, edx = remainder
-      __ idivl(second_reg);
+      if (type == Primitive::kPrimInt) {
+        // edx:eax <- sign-extended of eax
+        __ cdq();
+        // eax = quotient, edx = remainder
+        __ idivl(second_reg);
+      } else {
+        // rdx:rax <- sign-extended of rax
+        __ cqo();
+        // rax = quotient, rdx = remainder
+        __ idivq(second_reg);
+      }
 
       __ Bind(slow_path->GetExitLabel());
       break;
     }
 
-    case Primitive::kPrimLong: {
-      LOG(FATAL) << "Not implemented div type" << div->GetResultType();
-      break;
-    }
-
     case Primitive::kPrimFloat: {
       __ divss(first.As<XmmRegister>(), second.As<XmmRegister>());
       break;
@@ -1703,18 +1720,40 @@
   LocationSummary* locations = instruction->GetLocations();
   Location value = locations->InAt(0);
 
-  if (value.IsRegister()) {
-    __ testl(value.As<CpuRegister>(), value.As<CpuRegister>());
-  } else if (value.IsStackSlot()) {
-    __ cmpl(Address(CpuRegister(RSP), value.GetStackIndex()), Immediate(0));
-  } else {
-    DCHECK(value.IsConstant()) << value;
-    if (value.GetConstant()->AsIntConstant()->GetValue() == 0) {
-      __ jmp(slow_path->GetEntryLabel());
+  switch (instruction->GetType()) {
+    case Primitive::kPrimInt: {
+      if (value.IsRegister()) {
+        __ testl(value.As<CpuRegister>(), value.As<CpuRegister>());
+        __ j(kEqual, slow_path->GetEntryLabel());
+      } else if (value.IsStackSlot()) {
+        __ cmpl(Address(CpuRegister(RSP), value.GetStackIndex()), Immediate(0));
+        __ j(kEqual, slow_path->GetEntryLabel());
+      } else {
+        DCHECK(value.IsConstant()) << value;
+        if (value.GetConstant()->AsIntConstant()->GetValue() == 0) {
+        __ jmp(slow_path->GetEntryLabel());
+        }
+      }
+      break;
     }
-    return;
+    case Primitive::kPrimLong: {
+      if (value.IsRegister()) {
+        __ testq(value.As<CpuRegister>(), value.As<CpuRegister>());
+        __ j(kEqual, slow_path->GetEntryLabel());
+      } else if (value.IsDoubleStackSlot()) {
+        __ cmpq(Address(CpuRegister(RSP), value.GetStackIndex()), Immediate(0));
+        __ j(kEqual, slow_path->GetEntryLabel());
+      } else {
+        DCHECK(value.IsConstant()) << value;
+        if (value.GetConstant()->AsLongConstant()->GetValue() == 0) {
+        __ jmp(slow_path->GetEntryLabel());
+        }
+      }
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unexpected type for HDivZeroCheck " << instruction->GetType();
   }
-  __ j(kEqual, slow_path->GetEntryLabel());
 }
 
 void LocationsBuilderX86_64::VisitNewInstance(HNewInstance* instruction) {
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 6224a11..5af3cdd 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -90,7 +90,7 @@
         maximum_number_of_out_vregs_(0),
         number_of_vregs_(0),
         number_of_in_vregs_(0),
-        number_of_temporaries_(0),
+        temporaries_vreg_slots_(0),
         current_instruction_id_(0) {}
 
   ArenaAllocator* GetArena() const { return arena_; }
@@ -129,12 +129,12 @@
     maximum_number_of_out_vregs_ = std::max(new_value, maximum_number_of_out_vregs_);
   }
 
-  void UpdateNumberOfTemporaries(size_t count) {
-    number_of_temporaries_ = std::max(count, number_of_temporaries_);
+  void UpdateTemporariesVRegSlots(size_t slots) {
+    temporaries_vreg_slots_ = std::max(slots, temporaries_vreg_slots_);
   }
 
-  size_t GetNumberOfTemporaries() const {
-    return number_of_temporaries_;
+  size_t GetTemporariesVRegSlots() const {
+    return temporaries_vreg_slots_;
   }
 
   void SetNumberOfVRegs(uint16_t number_of_vregs) {
@@ -192,8 +192,8 @@
   // The number of virtual registers used by parameters of this method.
   uint16_t number_of_in_vregs_;
 
-  // The number of temporaries that will be needed for the baseline compiler.
-  size_t number_of_temporaries_;
+  // Number of vreg size slots that the temporaries use (used in baseline compiler).
+  size_t temporaries_vreg_slots_;
 
   // The current id to assign to a newly added instruction. See HInstruction.id_.
   int current_instruction_id_;
@@ -1750,8 +1750,8 @@
 
 class HDiv : public HBinaryOperation {
  public:
-  HDiv(Primitive::Type result_type, HInstruction* left, HInstruction* right)
-      : HBinaryOperation(result_type, left, right) {}
+  HDiv(Primitive::Type result_type, HInstruction* left, HInstruction* right, uint32_t dex_pc)
+      : HBinaryOperation(result_type, left, right), dex_pc_(dex_pc) {}
 
   virtual int32_t Evaluate(int32_t x, int32_t y) const {
     // Our graph structure ensures we never have 0 for `y` during constant folding.
@@ -1761,9 +1761,13 @@
   }
   virtual int64_t Evaluate(int64_t x, int64_t y) const { return x / y; }
 
+  uint32_t GetDexPc() const { return dex_pc_; }
+
   DECLARE_INSTRUCTION(Div);
 
  private:
+  const uint32_t dex_pc_;
+
   DISALLOW_COPY_AND_ASSIGN(HDiv);
 };
 
@@ -2158,8 +2162,8 @@
  * Some DEX instructions are folded into multiple HInstructions that need
  * to stay live until the last HInstruction. This class
  * is used as a marker for the baseline compiler to ensure its preceding
- * HInstruction stays live. `index` is the temporary number that is used
- * for knowing the stack offset where to store the instruction.
+ * HInstruction stays live. `index` represents the stack location index of the
+ * instruction (the actual offset is computed as index * vreg_size).
  */
 class HTemporary : public HTemplateInstruction<0> {
  public:
diff --git a/compiler/optimizing/register_allocator_test.cc b/compiler/optimizing/register_allocator_test.cc
index 3d81362..ba4be34 100644
--- a/compiler/optimizing/register_allocator_test.cc
+++ b/compiler/optimizing/register_allocator_test.cc
@@ -713,7 +713,7 @@
   graph->AddBlock(block);
   entry->AddSuccessor(block);
 
-  *div = new (allocator) HDiv(Primitive::kPrimInt, first, second);
+  *div = new (allocator) HDiv(Primitive::kPrimInt, first, second, 0);  // don't care about dex_pc.
   block->AddInstruction(*div);
 
   block->AddInstruction(new (allocator) HExit());
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index 5d1c9af..bd08b1f 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -909,6 +909,21 @@
 }
 
 
+void X86_64Assembler::cmpl(const Address& address, CpuRegister reg) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitOptionalRex32(reg, address);
+  EmitUint8(0x39);
+  EmitOperand(reg.LowBits(), address);
+}
+
+
+void X86_64Assembler::cmpl(const Address& address, const Immediate& imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitOptionalRex32(address);
+  EmitComplex(7, address, imm);
+}
+
+
 void X86_64Assembler::cmpq(CpuRegister reg0, CpuRegister reg1) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitRex64(reg0, reg1);
@@ -933,6 +948,14 @@
 }
 
 
+void X86_64Assembler::cmpq(const Address& address, const Immediate& imm) {
+  CHECK(imm.is_int32());  // cmpq only supports 32b immediate.
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitRex64(address);
+  EmitComplex(7, address, imm);
+}
+
+
 void X86_64Assembler::addl(CpuRegister dst, CpuRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitOptionalRex32(dst, src);
@@ -949,21 +972,6 @@
 }
 
 
-void X86_64Assembler::cmpl(const Address& address, CpuRegister reg) {
-  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
-  EmitOptionalRex32(reg, address);
-  EmitUint8(0x39);
-  EmitOperand(reg.LowBits(), address);
-}
-
-
-void X86_64Assembler::cmpl(const Address& address, const Immediate& imm) {
-  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
-  EmitOptionalRex32(address);
-  EmitComplex(7, address, imm);
-}
-
-
 void X86_64Assembler::testl(CpuRegister reg1, CpuRegister reg2) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitOptionalRex32(reg1, reg2);
@@ -998,6 +1006,14 @@
 }
 
 
+void X86_64Assembler::testq(CpuRegister reg1, CpuRegister reg2) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitRex64(reg1, reg2);
+  EmitUint8(0x85);
+  EmitRegisterOperand(reg1.LowBits(), reg2.LowBits());
+}
+
+
 void X86_64Assembler::testq(CpuRegister reg, const Address& address) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitRex64(reg);
@@ -1267,6 +1283,13 @@
 }
 
 
+void X86_64Assembler::cqo() {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitRex64();
+  EmitUint8(0x99);
+}
+
+
 void X86_64Assembler::idivl(CpuRegister reg) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitOptionalRex32(reg);
@@ -1275,6 +1298,14 @@
 }
 
 
+void X86_64Assembler::idivq(CpuRegister reg) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitRex64(reg);
+  EmitUint8(0xF7);
+  EmitUint8(0xF8 | reg.LowBits());
+}
+
+
 void X86_64Assembler::imull(CpuRegister dst, CpuRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitOptionalRex32(dst, src);
@@ -1820,10 +1851,20 @@
   }
 }
 
+void X86_64Assembler::EmitRex64() {
+  EmitOptionalRex(false, true, false, false, false);
+}
+
 void X86_64Assembler::EmitRex64(CpuRegister reg) {
   EmitOptionalRex(false, true, false, false, reg.NeedsRex());
 }
 
+void X86_64Assembler::EmitRex64(const Operand& operand) {
+  uint8_t rex = operand.rex();
+  rex |= 0x48;  // REX.W000
+  EmitUint8(rex);
+}
+
 void X86_64Assembler::EmitRex64(CpuRegister dst, CpuRegister src) {
   EmitOptionalRex(false, true, dst.NeedsRex(), false, src.NeedsRex());
 }
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index 285b4cf..b46f6f7 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -390,10 +390,12 @@
   void cmpq(CpuRegister reg0, CpuRegister reg1);
   void cmpq(CpuRegister reg0, const Immediate& imm);
   void cmpq(CpuRegister reg0, const Address& address);
+  void cmpq(const Address& address, const Immediate& imm);
 
   void testl(CpuRegister reg1, CpuRegister reg2);
   void testl(CpuRegister reg, const Immediate& imm);
 
+  void testq(CpuRegister reg1, CpuRegister reg2);
   void testq(CpuRegister reg, const Address& address);
 
   void andl(CpuRegister dst, const Immediate& imm);
@@ -432,8 +434,10 @@
   void subq(CpuRegister dst, const Address& address);
 
   void cdq();
+  void cqo();
 
   void idivl(CpuRegister reg);
+  void idivq(CpuRegister reg);
 
   void imull(CpuRegister dst, CpuRegister src);
   void imull(CpuRegister reg, const Immediate& imm);
@@ -669,7 +673,9 @@
   void EmitOptionalRex32(XmmRegister dst, const Operand& operand);
 
   // Emit a REX.W prefix plus necessary register bit encodings.
+  void EmitRex64();
   void EmitRex64(CpuRegister reg);
+  void EmitRex64(const Operand& operand);
   void EmitRex64(CpuRegister dst, CpuRegister src);
   void EmitRex64(CpuRegister dst, const Operand& operand);
   void EmitRex64(XmmRegister dst, CpuRegister src);
diff --git a/oatdump/oatdump.cc b/oatdump/oatdump.cc
index cdf48c3..f1f1a56 100644
--- a/oatdump/oatdump.cc
+++ b/oatdump/oatdump.cc
@@ -107,6 +107,10 @@
           "  --no-disassemble may be used to disable disassembly.\n"
           "      Example: --no-disassemble\n"
           "\n");
+  fprintf(stderr,
+          "  --method-filter=<method name>: only dumps methods that contain the filter.\n"
+          "      Example: --method-filter=foo\n"
+          "\n");
 }
 
 const char* image_roots_descriptions_[] = {
@@ -356,12 +360,14 @@
                    bool dump_vmap,
                    bool disassemble_code,
                    bool absolute_addresses,
+                   const char* method_filter,
                    Handle<mirror::ClassLoader>* class_loader)
     : dump_raw_mapping_table_(dump_raw_mapping_table),
       dump_raw_gc_map_(dump_raw_gc_map),
       dump_vmap_(dump_vmap),
       disassemble_code_(disassemble_code),
       absolute_addresses_(absolute_addresses),
+      method_filter_(method_filter),
       class_loader_(class_loader) {}
 
   const bool dump_raw_mapping_table_;
@@ -369,6 +375,7 @@
   const bool dump_vmap_;
   const bool disassemble_code_;
   const bool absolute_addresses_;
+  const char* const method_filter_;
   Handle<mirror::ClassLoader>* class_loader_;
 };
 
@@ -686,8 +693,13 @@
                      uint32_t dex_method_idx, const DexFile::CodeItem* code_item,
                      uint32_t method_access_flags) {
     bool success = true;
+    std::string pretty_method = PrettyMethod(dex_method_idx, dex_file, true);
+    if (pretty_method.find(options_->method_filter_) == std::string::npos) {
+      return success;
+    }
+
     os << StringPrintf("%d: %s (dex_method_idx=%d)\n",
-                       class_method_index, PrettyMethod(dex_method_idx, dex_file, true).c_str(),
+                       class_method_index, pretty_method.c_str(),
                        dex_method_idx);
     Indenter indent1_filter(os.rdbuf(), kIndentChar, kIndentBy1Count);
     std::unique_ptr<std::ostream> indent1_os(new std::ostream(&indent1_filter));
@@ -2179,6 +2191,8 @@
       } else if (option.starts_with("--symbolize=")) {
         oat_filename_ = option.substr(strlen("--symbolize=")).data();
         symbolize_ = true;
+      } else if (option.starts_with("--method-filter=")) {
+        method_filter_ = option.substr(strlen("--method-filter=")).data();
       } else {
         fprintf(stderr, "Unknown argument %s\n", option.data());
         usage();
@@ -2200,6 +2214,7 @@
   }
 
   const char* oat_filename_ = nullptr;
+  const char* method_filter_ = "";
   const char* image_location_ = nullptr;
   const char* boot_image_location_ = nullptr;
   InstructionSet instruction_set_ = kRuntimeISA;
@@ -2231,6 +2246,7 @@
       args.dump_vmap_,
       args.disassemble_code_,
       absolute_addresses,
+      args.method_filter_,
       nullptr));
 
   std::unique_ptr<Runtime> runtime;
diff --git a/test/417-optimizing-arith-div/src/Main.java b/test/417-optimizing-arith-div/src/Main.java
index 5825d24..a5dea15 100644
--- a/test/417-optimizing-arith-div/src/Main.java
+++ b/test/417-optimizing-arith-div/src/Main.java
@@ -78,18 +78,33 @@
     } catch (java.lang.RuntimeException e) {
     }
   }
+
+  public static void expectDivisionByZero(long value) {
+    try {
+      $opt$Div(value, 0L);
+      throw new Error("Expected RuntimeException when dividing by 0");
+    } catch (java.lang.RuntimeException e) {
+    }
+    try {
+      $opt$DivZero(value);
+      throw new Error("Expected RuntimeException when dividing by 0");
+    } catch (java.lang.RuntimeException e) {
+    }
+  }
+
   public static void main(String[] args) {
     div();
   }
 
   public static void div() {
     divInt();
+    divLong();
     divFloat();
     divDouble();
   }
 
   private static void divInt() {
-    expectEquals(2, $opt$DivLit(6));
+    expectEquals(2, $opt$DivConst(6));
     expectEquals(2, $opt$Div(6, 3));
     expectEquals(6, $opt$Div(6, 1));
     expectEquals(-2, $opt$Div(6, -3));
@@ -111,6 +126,35 @@
     expectDivisionByZero(Integer.MIN_VALUE);
   }
 
+  private static void divLong() {
+    expectEquals(2L, $opt$DivConst(6L));
+    expectEquals(2L, $opt$Div(6L, 3L));
+    expectEquals(6L, $opt$Div(6L, 1L));
+    expectEquals(-2L, $opt$Div(6L, -3L));
+    expectEquals(1L, $opt$Div(4L, 3L));
+    expectEquals(-1L, $opt$Div(4L, -3L));
+    expectEquals(5L, $opt$Div(23L, 4L));
+    expectEquals(-5L, $opt$Div(-23L, 4L));
+
+    expectEquals(-Integer.MAX_VALUE, $opt$Div(Integer.MAX_VALUE, -1L));
+    expectEquals(2147483648L, $opt$Div(Integer.MIN_VALUE, -1L));
+    expectEquals(-1073741824L, $opt$Div(Integer.MIN_VALUE, 2L));
+
+    expectEquals(-Long.MAX_VALUE, $opt$Div(Long.MAX_VALUE, -1L));
+    expectEquals(Long.MIN_VALUE, $opt$Div(Long.MIN_VALUE, -1L)); // overflow
+
+    expectEquals(11111111111111L, $opt$Div(33333333333333L, 3L));
+    expectEquals(3L, $opt$Div(33333333333333L, 11111111111111L));
+
+    expectEquals(0L, $opt$Div(0L, Long.MAX_VALUE));
+    expectEquals(0L, $opt$Div(0L, Long.MIN_VALUE));
+
+    expectDivisionByZero(0L);
+    expectDivisionByZero(1L);
+    expectDivisionByZero(Long.MAX_VALUE);
+    expectDivisionByZero(Long.MIN_VALUE);
+  }
+
   private static void divFloat() {
     expectApproxEquals(1.6666666F, $opt$Div(5F, 3F));
     expectApproxEquals(0F, $opt$Div(0F, 3F));
@@ -178,10 +222,22 @@
   }
 
   // Division by literals != 0 should not generate checks.
-  static int $opt$DivLit(int a) {
+  static int $opt$DivConst(int a) {
     return a / 3;
   }
 
+  static long $opt$DivConst(long a) {
+    return a / 3L;
+  }
+
+  static long $opt$Div(long a, long b) {
+    return a / b;
+  }
+
+  static long $opt$DivZero(long a) {
+    return a / 0L;
+  }
+
   static float $opt$Div(float a, float b) {
     return a / b;
   }
diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk
index a6f31b4..3b949d6 100644
--- a/test/Android.run-test.mk
+++ b/test/Android.run-test.mk
@@ -286,8 +286,7 @@
 TEST_ART_BROKEN_NDEBUG_TESTS :=
 
 # Known broken tests for the default compiler (Quick).
-TEST_ART_BROKEN_DEFAULT_RUN_TESTS := \
-  412-new-array
+TEST_ART_BROKEN_DEFAULT_RUN_TESTS :=
 
 ifneq (,$(filter default,$(COMPILER_TYPES)))
   ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
@@ -317,6 +316,7 @@
   010-instance \
   011-array-copy \
   013-math2 \
+  014-math3 \
   016-intern \
   017-float \
   018-stack-overflow \
@@ -449,6 +449,7 @@
   424-checkcast \
   426-monitor \
   427-bitwise \
+  427-bounds \
   700-LoadArgRegs \
   701-easy-div-rem \
   702-LargeBranchOffset \