Merge "Revert "Don't use the compiler driver for method resolution.""
diff --git a/compiler/Android.mk b/compiler/Android.mk
index 564bd7e..0ed843b 100644
--- a/compiler/Android.mk
+++ b/compiler/Android.mk
@@ -69,6 +69,7 @@
 	optimizing/code_generator_utils.cc \
 	optimizing/constant_folding.cc \
 	optimizing/dead_code_elimination.cc \
+	optimizing/dex_cache_array_fixups_arm.cc \
 	optimizing/graph_checker.cc \
 	optimizing/graph_visualizer.cc \
 	optimizing/gvn.cc \
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index a98d9c6..76bf951 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -724,7 +724,9 @@
                       graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       call_patches_(MethodReferenceComparator(),
                     graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
-      relative_call_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {
+      relative_call_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      dex_cache_arrays_base_labels_(std::less<HArmDexCacheArraysBase*>(),
+                                    graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {
   // Always save the LR register to mimic Quick.
   AddAllocatedRegister(Location::RegisterLocation(LR));
 }
@@ -1922,10 +1924,18 @@
                                          codegen_->GetAssembler(),
                                          codegen_->GetInstructionSetFeatures());
   if (intrinsic.TryDispatch(invoke)) {
+    if (invoke->GetLocations()->CanCall() && invoke->HasPcRelativeDexCache()) {
+      invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::Any());
+    }
     return;
   }
 
   HandleInvoke(invoke);
+
+  // For PC-relative dex cache the invoke has an extra input, the PC-relative address base.
+  if (invoke->HasPcRelativeDexCache()) {
+    invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::RequiresRegister());
+  }
 }
 
 static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorARM* codegen) {
@@ -5818,16 +5828,6 @@
 HInvokeStaticOrDirect::DispatchInfo CodeGeneratorARM::GetSupportedInvokeStaticOrDirectDispatch(
       const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
       MethodReference target_method) {
-  if (desired_dispatch_info.method_load_kind ==
-      HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative) {
-    // TODO: Implement this type. For the moment, we fall back to kDexCacheViaMethod.
-    return HInvokeStaticOrDirect::DispatchInfo {
-      HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod,
-      HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod,
-      0u,
-      0u
-    };
-  }
   if (desired_dispatch_info.code_ptr_location ==
       HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative) {
     const DexFile& outer_dex_file = GetGraph()->GetDexFile();
@@ -5850,6 +5850,32 @@
   return desired_dispatch_info;
 }
 
+Register CodeGeneratorARM::GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke,
+                                                                 Register temp) {
+  DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u);
+  Location location = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
+  if (!invoke->GetLocations()->Intrinsified()) {
+    return location.AsRegister<Register>();
+  }
+  // For intrinsics we allow any location, so it may be on the stack.
+  if (!location.IsRegister()) {
+    __ LoadFromOffset(kLoadWord, temp, SP, location.GetStackIndex());
+    return temp;
+  }
+  // For register locations, check if the register was saved. If so, get it from the stack.
+  // Note: There is a chance that the register was saved but not overwritten, so we could
+  // save one load. However, since this is just an intrinsic slow path we prefer this
+  // simple and more robust approach rather that trying to determine if that's the case.
+  SlowPathCode* slow_path = GetCurrentSlowPath();
+  DCHECK(slow_path != nullptr);  // For intrinsified invokes the call is emitted on the slow path.
+  if (slow_path->IsCoreRegisterSaved(location.AsRegister<Register>())) {
+    int stack_offset = slow_path->GetStackOffsetOfCoreRegister(location.AsRegister<Register>());
+    __ LoadFromOffset(kLoadWord, temp, SP, stack_offset);
+    return temp;
+  }
+  return location.AsRegister<Register>();
+}
+
 void CodeGeneratorARM::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) {
   // For better instruction scheduling we load the direct code pointer before the method pointer.
   switch (invoke->GetCodePtrLocation()) {
@@ -5881,11 +5907,15 @@
       __ LoadLiteral(temp.AsRegister<Register>(),
                      DeduplicateMethodAddressLiteral(invoke->GetTargetMethod()));
       break;
-    case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative:
-      // TODO: Implement this type.
-      // Currently filtered out by GetSupportedInvokeStaticOrDirectDispatch().
-      LOG(FATAL) << "Unsupported";
-      UNREACHABLE();
+    case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: {
+      HArmDexCacheArraysBase* base =
+          invoke->InputAt(invoke->GetSpecialInputIndex())->AsArmDexCacheArraysBase();
+      Register base_reg = GetInvokeStaticOrDirectExtraParameter(invoke,
+                                                                temp.AsRegister<Register>());
+      int32_t offset = invoke->GetDexCacheArrayOffset() - base->GetElementOffset();
+      __ LoadFromOffset(kLoadWord, temp.AsRegister<Register>(), base_reg, offset);
+      break;
+    }
     case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: {
       Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
       Register method_reg;
@@ -5970,7 +6000,11 @@
 
 void CodeGeneratorARM::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) {
   DCHECK(linker_patches->empty());
-  size_t size = method_patches_.size() + call_patches_.size() + relative_call_patches_.size();
+  size_t size =
+      method_patches_.size() +
+      call_patches_.size() +
+      relative_call_patches_.size() +
+      /* MOVW+MOVT for each base */ 2u * dex_cache_arrays_base_labels_.size();
   linker_patches->reserve(size);
   for (const auto& entry : method_patches_) {
     const MethodReference& target_method = entry.first;
@@ -5996,6 +6030,28 @@
                                                              info.target_method.dex_file,
                                                              info.target_method.dex_method_index));
   }
+  for (const auto& pair : dex_cache_arrays_base_labels_) {
+    HArmDexCacheArraysBase* base = pair.first;
+    const DexCacheArraysBaseLabels* labels = &pair.second;
+    const DexFile& dex_file = base->GetDexFile();
+    size_t base_element_offset = base->GetElementOffset();
+    DCHECK(labels->add_pc_label.IsBound());
+    uint32_t add_pc_offset = dchecked_integral_cast<uint32_t>(labels->add_pc_label.Position());
+    // Add MOVW patch.
+    DCHECK(labels->movw_label.IsBound());
+    uint32_t movw_offset = dchecked_integral_cast<uint32_t>(labels->movw_label.Position());
+    linker_patches->push_back(LinkerPatch::DexCacheArrayPatch(movw_offset,
+                                                              &dex_file,
+                                                              add_pc_offset,
+                                                              base_element_offset));
+    // Add MOVT patch.
+    DCHECK(labels->movt_label.IsBound());
+    uint32_t movt_offset = dchecked_integral_cast<uint32_t>(labels->movt_label.Position());
+    linker_patches->push_back(LinkerPatch::DexCacheArrayPatch(movt_offset,
+                                                              &dex_file,
+                                                              add_pc_offset,
+                                                              base_element_offset));
+  }
 }
 
 Literal* CodeGeneratorARM::DeduplicateMethodLiteral(MethodReference target_method,
@@ -6107,6 +6163,23 @@
   }
 }
 
+void LocationsBuilderARM::VisitArmDexCacheArraysBase(HArmDexCacheArraysBase* base) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(base);
+  locations->SetOut(Location::RequiresRegister());
+  codegen_->AddDexCacheArraysBase(base);
+}
+
+void InstructionCodeGeneratorARM::VisitArmDexCacheArraysBase(HArmDexCacheArraysBase* base) {
+  Register base_reg = base->GetLocations()->Out().AsRegister<Register>();
+  CodeGeneratorARM::DexCacheArraysBaseLabels* labels = codegen_->GetDexCacheArraysBaseLabels(base);
+  __ BindTrackedLabel(&labels->movw_label);
+  __ movw(base_reg, 0u);
+  __ BindTrackedLabel(&labels->movt_label);
+  __ movt(base_reg, 0u);
+  __ BindTrackedLabel(&labels->add_pc_label);
+  __ add(base_reg, base_reg, ShifterOperand(PC));
+}
+
 void CodeGeneratorARM::MoveFromReturnRegister(Location trg, Primitive::Type type) {
   if (!trg.IsValid()) {
     DCHECK(type == Primitive::kPrimVoid);
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index 89de4f8..193add2 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -373,6 +373,31 @@
 
   void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) OVERRIDE;
 
+  // The PC-relative base address is loaded with three instructions, MOVW+MOVT
+  // to load the offset to base_reg and then ADD base_reg, PC. The offset is
+  // calculated from the ADD's effective PC, i.e. PC+4 on Thumb2. Though we
+  // currently emit these 3 instructions together, instruction scheduling could
+  // split this sequence apart, so we keep separate labels for each of them.
+  struct DexCacheArraysBaseLabels {
+    DexCacheArraysBaseLabels() = default;
+    DexCacheArraysBaseLabels(DexCacheArraysBaseLabels&& other) = default;
+
+    Label movw_label;
+    Label movt_label;
+    Label add_pc_label;
+  };
+
+  void AddDexCacheArraysBase(HArmDexCacheArraysBase* base) {
+    DexCacheArraysBaseLabels labels;
+    dex_cache_arrays_base_labels_.Put(base, std::move(labels));
+  }
+
+  DexCacheArraysBaseLabels* GetDexCacheArraysBaseLabels(HArmDexCacheArraysBase* base) {
+    auto it = dex_cache_arrays_base_labels_.find(base);
+    DCHECK(it != dex_cache_arrays_base_labels_.end());
+    return &it->second;
+  }
+
   // Generate a read barrier for a heap reference within `instruction`.
   //
   // A read barrier for an object reference read from the heap is
@@ -419,7 +444,12 @@
   void GenerateReadBarrierForRoot(HInstruction* instruction, Location out, Location root);
 
  private:
+  Register GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, Register temp);
+
   using MethodToLiteralMap = ArenaSafeMap<MethodReference, Literal*, MethodReferenceComparator>;
+  using DexCacheArraysBaseToLabelsMap = ArenaSafeMap<HArmDexCacheArraysBase*,
+                                                     DexCacheArraysBaseLabels,
+                                                     std::less<HArmDexCacheArraysBase*>>;
 
   Literal* DeduplicateMethodLiteral(MethodReference target_method, MethodToLiteralMap* map);
   Literal* DeduplicateMethodAddressLiteral(MethodReference target_method);
@@ -441,6 +471,8 @@
   // Using ArenaDeque<> which retains element addresses on push/emplace_back().
   ArenaDeque<MethodPatchInfo<Label>> relative_call_patches_;
 
+  DexCacheArraysBaseToLabelsMap dex_cache_arrays_base_labels_;
+
   DISALLOW_COPY_AND_ASSIGN(CodeGeneratorARM);
 };
 
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 1fc09a8..a0d31da 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -1929,8 +1929,7 @@
 
   // For PC-relative dex cache the invoke has an extra input, the PC-relative address base.
   if (invoke->HasPcRelativeDexCache()) {
-    invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(),
-                                    Location::RequiresRegister());
+    invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::RequiresRegister());
   }
 
   if (codegen_->IsBaseline()) {
diff --git a/compiler/optimizing/dex_cache_array_fixups_arm.cc b/compiler/optimizing/dex_cache_array_fixups_arm.cc
new file mode 100644
index 0000000..6582063
--- /dev/null
+++ b/compiler/optimizing/dex_cache_array_fixups_arm.cc
@@ -0,0 +1,92 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dex_cache_array_fixups_arm.h"
+
+#include "base/arena_containers.h"
+#include "utils/dex_cache_arrays_layout-inl.h"
+
+namespace art {
+namespace arm {
+
+/**
+ * Finds instructions that need the dex cache arrays base as an input.
+ */
+class DexCacheArrayFixupsVisitor : public HGraphVisitor {
+ public:
+  explicit DexCacheArrayFixupsVisitor(HGraph* graph)
+      : HGraphVisitor(graph),
+        dex_cache_array_bases_(std::less<const DexFile*>(),
+                               // Attribute memory use to code generator.
+                               graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {}
+
+  void MoveBasesIfNeeded() {
+    for (const auto& entry : dex_cache_array_bases_) {
+      // Bring the base closer to the first use (previously, it was in the
+      // entry block) and relieve some pressure on the register allocator
+      // while avoiding recalculation of the base in a loop.
+      HArmDexCacheArraysBase* base = entry.second;
+      base->MoveBeforeFirstUserAndOutOfLoops();
+    }
+  }
+
+ private:
+  void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE {
+    // If this is an invoke with PC-relative access to the dex cache methods array,
+    // we need to add the dex cache arrays base as the special input.
+    if (invoke->HasPcRelativeDexCache()) {
+      // Initialize base for target method dex file if needed.
+      MethodReference target_method = invoke->GetTargetMethod();
+      HArmDexCacheArraysBase* base = GetOrCreateDexCacheArrayBase(*target_method.dex_file);
+      // Update the element offset in base.
+      DexCacheArraysLayout layout(kArmPointerSize, target_method.dex_file);
+      base->UpdateElementOffset(layout.MethodOffset(target_method.dex_method_index));
+      // Add the special argument base to the method.
+      DCHECK(!invoke->HasCurrentMethodInput());
+      invoke->AddSpecialInput(base);
+    }
+  }
+
+  HArmDexCacheArraysBase* GetOrCreateDexCacheArrayBase(const DexFile& dex_file) {
+    // Ensure we only initialize the pointer once for each dex file.
+    auto lb = dex_cache_array_bases_.lower_bound(&dex_file);
+    if (lb != dex_cache_array_bases_.end() &&
+        !dex_cache_array_bases_.key_comp()(&dex_file, lb->first)) {
+      return lb->second;
+    }
+
+    // Insert the base at the start of the entry block, move it to a better
+    // position later in MoveBaseIfNeeded().
+    HArmDexCacheArraysBase* base = new (GetGraph()->GetArena()) HArmDexCacheArraysBase(dex_file);
+    HBasicBlock* entry_block = GetGraph()->GetEntryBlock();
+    entry_block->InsertInstructionBefore(base, entry_block->GetFirstInstruction());
+    dex_cache_array_bases_.PutBefore(lb, &dex_file, base);
+    return base;
+  }
+
+  using DexCacheArraysBaseMap =
+      ArenaSafeMap<const DexFile*, HArmDexCacheArraysBase*, std::less<const DexFile*>>;
+  DexCacheArraysBaseMap dex_cache_array_bases_;
+};
+
+void DexCacheArrayFixups::Run() {
+  DexCacheArrayFixupsVisitor visitor(graph_);
+  visitor.VisitInsertionOrder();
+  visitor.MoveBasesIfNeeded();
+}
+
+}  // namespace arm
+}  // namespace art
diff --git a/compiler/optimizing/dex_cache_array_fixups_arm.h b/compiler/optimizing/dex_cache_array_fixups_arm.h
new file mode 100644
index 0000000..015f910
--- /dev/null
+++ b/compiler/optimizing/dex_cache_array_fixups_arm.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_DEX_CACHE_ARRAY_FIXUPS_ARM_H_
+#define ART_COMPILER_OPTIMIZING_DEX_CACHE_ARRAY_FIXUPS_ARM_H_
+
+#include "nodes.h"
+#include "optimization.h"
+
+namespace art {
+namespace arm {
+
+class DexCacheArrayFixups : public HOptimization {
+ public:
+  DexCacheArrayFixups(HGraph* graph, OptimizingCompilerStats* stats)
+      : HOptimization(graph, "dex_cache_array_fixups_arm", stats) {}
+
+  void Run() OVERRIDE;
+};
+
+}  // namespace arm
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_DEX_CACHE_ARRAY_FIXUPS_ARM_H_
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index b5ac773..847d147 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -1177,6 +1177,59 @@
   }
 }
 
+void HInstruction::MoveBeforeFirstUserAndOutOfLoops() {
+  DCHECK(!CanThrow());
+  DCHECK(!HasSideEffects());
+  DCHECK(!HasEnvironmentUses());
+  DCHECK(HasNonEnvironmentUses());
+  DCHECK(!IsPhi());  // Makes no sense for Phi.
+  DCHECK_EQ(InputCount(), 0u);
+
+  // Find the target block.
+  HUseIterator<HInstruction*> uses_it(GetUses());
+  HBasicBlock* target_block = uses_it.Current()->GetUser()->GetBlock();
+  uses_it.Advance();
+  while (!uses_it.Done() && uses_it.Current()->GetUser()->GetBlock() == target_block) {
+    uses_it.Advance();
+  }
+  if (!uses_it.Done()) {
+    // This instruction has uses in two or more blocks. Find the common dominator.
+    CommonDominator finder(target_block);
+    for (; !uses_it.Done(); uses_it.Advance()) {
+      finder.Update(uses_it.Current()->GetUser()->GetBlock());
+    }
+    target_block = finder.Get();
+    DCHECK(target_block != nullptr);
+  }
+  // Move to the first dominator not in a loop.
+  while (target_block->IsInLoop()) {
+    target_block = target_block->GetDominator();
+    DCHECK(target_block != nullptr);
+  }
+
+  // Find insertion position.
+  HInstruction* insert_pos = nullptr;
+  for (HUseIterator<HInstruction*> uses_it2(GetUses()); !uses_it2.Done(); uses_it2.Advance()) {
+    if (uses_it2.Current()->GetUser()->GetBlock() == target_block &&
+        (insert_pos == nullptr || uses_it2.Current()->GetUser()->StrictlyDominates(insert_pos))) {
+      insert_pos = uses_it2.Current()->GetUser();
+    }
+  }
+  if (insert_pos == nullptr) {
+    // No user in `target_block`, insert before the control flow instruction.
+    insert_pos = target_block->GetLastInstruction();
+    DCHECK(insert_pos->IsControlFlow());
+    // Avoid splitting HCondition from HIf to prevent unnecessary materialization.
+    if (insert_pos->IsIf()) {
+      HInstruction* if_input = insert_pos->AsIf()->InputAt(0);
+      if (if_input == insert_pos->GetPrevious()) {
+        insert_pos = if_input;
+      }
+    }
+  }
+  MoveBefore(insert_pos);
+}
+
 HBasicBlock* HBasicBlock::SplitBefore(HInstruction* cursor) {
   DCHECK(!graph_->IsInSsaForm()) << "Support for SSA form not implemented.";
   DCHECK_EQ(cursor->GetBlock(), this);
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index d5110a7..441aa04 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -1095,7 +1095,12 @@
   M(UShr, BinaryOperation)                                              \
   M(Xor, BinaryOperation)                                               \
 
+#ifndef ART_ENABLE_CODEGEN_arm
 #define FOR_EACH_CONCRETE_INSTRUCTION_ARM(M)
+#else
+#define FOR_EACH_CONCRETE_INSTRUCTION_ARM(M)                            \
+  M(ArmDexCacheArraysBase, Instruction)
+#endif
 
 #ifndef ART_ENABLE_CODEGEN_arm64
 #define FOR_EACH_CONCRETE_INSTRUCTION_ARM64(M)
@@ -1952,6 +1957,14 @@
   // Move `this` instruction before `cursor`.
   void MoveBefore(HInstruction* cursor);
 
+  // Move `this` before its first user and out of any loops. If there is no
+  // out-of-loop user that dominates all other users, move the instruction
+  // to the end of the out-of-loop common dominator of the user's blocks.
+  //
+  // This can be used only on non-throwing instructions with no side effects that
+  // have at least one use but no environment uses.
+  void MoveBeforeFirstUserAndOutOfLoops();
+
 #define INSTRUCTION_TYPE_CHECK(type, super)                                    \
   bool Is##type() const { return (As##type() != nullptr); }                    \
   virtual const H##type* As##type() const { return nullptr; }                  \
@@ -5566,6 +5579,9 @@
 
 }  // namespace art
 
+#ifdef ART_ENABLE_CODEGEN_arm
+#include "nodes_arm.h"
+#endif
 #ifdef ART_ENABLE_CODEGEN_arm64
 #include "nodes_arm64.h"
 #endif
diff --git a/compiler/optimizing/nodes_arm.h b/compiler/optimizing/nodes_arm.h
new file mode 100644
index 0000000..6a1dbb9
--- /dev/null
+++ b/compiler/optimizing/nodes_arm.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_NODES_ARM_H_
+#define ART_COMPILER_OPTIMIZING_NODES_ARM_H_
+
+namespace art {
+
+class HArmDexCacheArraysBase : public HExpression<0> {
+ public:
+  explicit HArmDexCacheArraysBase(const DexFile& dex_file)
+      : HExpression(Primitive::kPrimInt, SideEffects::None(), kNoDexPc),
+        dex_file_(&dex_file),
+        element_offset_(static_cast<size_t>(-1)) { }
+
+  void UpdateElementOffset(size_t element_offset) {
+    // Use the lowest offset from the requested elements so that all offsets from
+    // this base are non-negative because our assemblers emit negative-offset loads
+    // as a sequence of two or more instructions. (However, positive offsets beyond
+    // 4KiB also require two or more instructions, so this simple heuristic could
+    // be improved for cases where there is a dense cluster of elements far from
+    // the lowest offset. This is expected to be rare enough though, so we choose
+    // not to spend compile time on elaborate calculations.)
+    element_offset_ = std::min(element_offset_, element_offset);
+  }
+
+  const DexFile& GetDexFile() const {
+    return *dex_file_;
+  }
+
+  size_t GetElementOffset() const {
+    return element_offset_;
+  }
+
+  DECLARE_INSTRUCTION(ArmDexCacheArraysBase);
+
+ private:
+  const DexFile* dex_file_;
+  size_t element_offset_;
+
+  DISALLOW_COPY_AND_ASSIGN(HArmDexCacheArraysBase);
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_NODES_ARM_H_
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index 8440813..4ee7fca7 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -20,6 +20,10 @@
 #include <stdint.h>
 
 #ifdef ART_ENABLE_CODEGEN_arm64
+#include "dex_cache_array_fixups_arm.h"
+#endif
+
+#ifdef ART_ENABLE_CODEGEN_arm64
 #include "instruction_simplifier_arm64.h"
 #endif
 
@@ -435,6 +439,17 @@
                                  PassObserver* pass_observer) {
   ArenaAllocator* arena = graph->GetArena();
   switch (instruction_set) {
+#ifdef ART_ENABLE_CODEGEN_arm
+    case kThumb2:
+    case kArm: {
+      arm::DexCacheArrayFixups* fixups = new (arena) arm::DexCacheArrayFixups(graph, stats);
+      HOptimization* arm_optimizations[] = {
+        fixups
+      };
+      RunOptimizations(arm_optimizations, arraysize(arm_optimizations), pass_observer);
+      break;
+    }
+#endif
 #ifdef ART_ENABLE_CODEGEN_arm64
     case kArm64: {
       arm64::InstructionSimplifierArm64* simplifier =
diff --git a/compiler/optimizing/pc_relative_fixups_x86.cc b/compiler/optimizing/pc_relative_fixups_x86.cc
index 808a1dc..b383f1e 100644
--- a/compiler/optimizing/pc_relative_fixups_x86.cc
+++ b/compiler/optimizing/pc_relative_fixups_x86.cc
@@ -26,6 +26,15 @@
  public:
   explicit PCRelativeHandlerVisitor(HGraph* graph) : HGraphVisitor(graph), base_(nullptr) {}
 
+  void MoveBaseIfNeeded() {
+    if (base_ != nullptr) {
+      // Bring the base closer to the first use (previously, it was in the
+      // entry block) and relieve some pressure on the register allocator
+      // while avoiding recalculation of the base in a loop.
+      base_->MoveBeforeFirstUserAndOutOfLoops();
+    }
+  }
+
  private:
   void VisitAdd(HAdd* add) OVERRIDE {
     BinaryFP(add);
@@ -72,7 +81,7 @@
   void VisitPackedSwitch(HPackedSwitch* switch_insn) OVERRIDE {
     // We need to replace the HPackedSwitch with a HX86PackedSwitch in order to
     // address the constant area.
-    InitializePCRelativeBasePointer(switch_insn);
+    InitializePCRelativeBasePointer();
     HGraph* graph = GetGraph();
     HBasicBlock* block = switch_insn->GetBlock();
     HX86PackedSwitch* x86_switch = new (graph->GetArena()) HX86PackedSwitch(
@@ -84,22 +93,22 @@
     block->ReplaceAndRemoveInstructionWith(switch_insn, x86_switch);
   }
 
-  void InitializePCRelativeBasePointer(HInstruction* user) {
+  void InitializePCRelativeBasePointer() {
     // Ensure we only initialize the pointer once.
     if (base_ != nullptr) {
       return;
     }
 
-    HGraph* graph = GetGraph();
-    HBasicBlock* entry = graph->GetEntryBlock();
-    base_ = new (graph->GetArena()) HX86ComputeBaseMethodAddress();
-    HInstruction* insert_pos = (user->GetBlock() == entry) ? user : entry->GetLastInstruction();
-    entry->InsertInstructionBefore(base_, insert_pos);
+    // Insert the base at the start of the entry block, move it to a better
+    // position later in MoveBaseIfNeeded().
+    base_ = new (GetGraph()->GetArena()) HX86ComputeBaseMethodAddress();
+    HBasicBlock* entry_block = GetGraph()->GetEntryBlock();
+    entry_block->InsertInstructionBefore(base_, entry_block->GetFirstInstruction());
     DCHECK(base_ != nullptr);
   }
 
   void ReplaceInput(HInstruction* insn, HConstant* value, int input_index, bool materialize) {
-    InitializePCRelativeBasePointer(insn);
+    InitializePCRelativeBasePointer();
     HX86LoadFromConstantTable* load_constant =
         new (GetGraph()->GetArena()) HX86LoadFromConstantTable(base_, value, materialize);
     insn->GetBlock()->InsertInstructionBefore(load_constant, insn);
@@ -111,7 +120,7 @@
     // addressing, we need the PC-relative address base.
     HInvokeStaticOrDirect* invoke_static_or_direct = invoke->AsInvokeStaticOrDirect();
     if (invoke_static_or_direct != nullptr && invoke_static_or_direct->HasPcRelativeDexCache()) {
-      InitializePCRelativeBasePointer(invoke);
+      InitializePCRelativeBasePointer();
       // Add the extra parameter base_.
       DCHECK(!invoke_static_or_direct->HasCurrentMethodInput());
       invoke_static_or_direct->AddSpecialInput(base_);
@@ -133,6 +142,7 @@
 void PcRelativeFixups::Run() {
   PCRelativeHandlerVisitor visitor(graph_);
   visitor.VisitInsertionOrder();
+  visitor.MoveBaseIfNeeded();
 }
 
 }  // namespace x86
diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc
index 0d05c49..bcdbeec 100644
--- a/compiler/optimizing/reference_type_propagation.cc
+++ b/compiler/optimizing/reference_type_propagation.cc
@@ -127,6 +127,87 @@
   }
 }
 
+static void CheckHasNoTypedInputs(HInstruction* root_instr) {
+  ArenaAllocatorAdapter<void> adapter =
+      root_instr->GetBlock()->GetGraph()->GetArena()->Adapter(kArenaAllocReferenceTypePropagation);
+
+  ArenaVector<HPhi*> visited_phis(adapter);
+  ArenaVector<HInstruction*> worklist(adapter);
+  worklist.push_back(root_instr);
+
+  while (!worklist.empty()) {
+    HInstruction* instr = worklist.back();
+    worklist.pop_back();
+
+    if (instr->IsPhi() || instr->IsBoundType() || instr->IsNullCheck()) {
+      // Expect that both `root_instr` and its inputs have invalid RTI.
+      ScopedObjectAccess soa(Thread::Current());
+      DCHECK(!instr->GetReferenceTypeInfo().IsValid()) << "Instruction should not have valid RTI.";
+
+      // Insert all unvisited inputs to the worklist.
+      for (HInputIterator it(instr); !it.Done(); it.Advance()) {
+        HInstruction* input = it.Current();
+        if (input->IsPhi()) {
+          if (ContainsElement(visited_phis, input->AsPhi())) {
+            continue;
+          } else {
+            visited_phis.push_back(input->AsPhi());
+          }
+        }
+        worklist.push_back(input);
+      }
+    } else if (instr->IsNullConstant()) {
+      // The only input of `root_instr` allowed to have valid RTI because it is ignored.
+    } else {
+      LOG(FATAL) << "Unexpected input " << instr->DebugName() << instr->GetId() << " with RTI "
+          << instr->GetReferenceTypeInfo();
+      UNREACHABLE();
+    }
+  }
+}
+
+template<typename Functor>
+static void ForEachUntypedInstruction(HGraph* graph, Functor fn) {
+  ScopedObjectAccess soa(Thread::Current());
+  for (HReversePostOrderIterator block_it(*graph); !block_it.Done(); block_it.Advance()) {
+    for (HInstructionIterator it(block_it.Current()->GetPhis()); !it.Done(); it.Advance()) {
+      HInstruction* instr = it.Current();
+      if (instr->GetType() == Primitive::kPrimNot && !instr->GetReferenceTypeInfo().IsValid()) {
+        fn(instr);
+      }
+    }
+    for (HInstructionIterator it(block_it.Current()->GetInstructions()); !it.Done(); it.Advance()) {
+      HInstruction* instr = it.Current();
+      if (instr->GetType() == Primitive::kPrimNot && !instr->GetReferenceTypeInfo().IsValid()) {
+        fn(instr);
+      }
+    }
+  }
+}
+
+void ReferenceTypePropagation::SetUntypedInstructionsToObject() {
+  // In some cases, the fix-point iteration will leave kPrimNot instructions with
+  // invalid RTI because bytecode does not provide enough typing information.
+  // Set the RTI of such instructions to Object.
+  // Example:
+  //   MyClass a = null, b = null;
+  //   while (a == null) {
+  //     if (cond) { a = b; } else { b = a; }
+  //   }
+
+  if (kIsDebugBuild) {
+    // Test that if we are going to set RTI from invalid to Object, that
+    // instruction did not have any typed instructions in its def-use chain
+    // and therefore its type could not be inferred.
+    ForEachUntypedInstruction(graph_, [](HInstruction* instr) { CheckHasNoTypedInputs(instr); });
+  }
+
+  ReferenceTypeInfo obj_rti = ReferenceTypeInfo::Create(object_class_handle_, /* is_exact */ false);
+  ForEachUntypedInstruction(graph_, [obj_rti](HInstruction* instr) {
+    instr->SetReferenceTypeInfo(obj_rti);
+  });
+}
+
 void ReferenceTypePropagation::Run() {
   // To properly propagate type info we need to visit in the dominator-based order.
   // Reverse post order guarantees a node's dominators are visited first.
@@ -136,6 +217,7 @@
   }
 
   ProcessWorklist();
+  SetUntypedInstructionsToObject();
   ValidateTypes();
 }
 
@@ -534,8 +616,9 @@
 void RTPVisitor::VisitNullCheck(HNullCheck* instr) {
   ScopedObjectAccess soa(Thread::Current());
   ReferenceTypeInfo parent_rti = instr->InputAt(0)->GetReferenceTypeInfo();
-  DCHECK(parent_rti.IsValid());
-  instr->SetReferenceTypeInfo(parent_rti);
+  if (parent_rti.IsValid()) {
+    instr->SetReferenceTypeInfo(parent_rti);
+  }
 }
 
 void RTPVisitor::VisitFakeString(HFakeString* instr) {
@@ -588,11 +671,16 @@
   }
 
   if (phi->GetBlock()->IsLoopHeader()) {
+    ScopedObjectAccess soa(Thread::Current());
     // Set the initial type for the phi. Use the non back edge input for reaching
     // a fixed point faster.
+    HInstruction* first_input = phi->InputAt(0);
+    ReferenceTypeInfo first_input_rti = first_input->GetReferenceTypeInfo();
+    if (first_input_rti.IsValid() && !first_input->IsNullConstant()) {
+      phi->SetCanBeNull(first_input->CanBeNull());
+      phi->SetReferenceTypeInfo(first_input_rti);
+    }
     AddToWorklist(phi);
-    phi->SetCanBeNull(phi->InputAt(0)->CanBeNull());
-    phi->SetReferenceTypeInfo(phi->InputAt(0)->GetReferenceTypeInfo());
   } else {
     // Eagerly compute the type of the phi, for quicker convergence. Note
     // that we don't need to add users to the worklist because we are
@@ -683,7 +771,7 @@
       instr->SetReferenceTypeInfo(parent_rti);
     }
   } else if (instr->IsArrayGet()) {
-    // TODO: consider if it's worth "looking back" and bounding the input object
+    // TODO: consider if it's worth "looking back" and binding the input object
     // to an array type.
     UpdateArrayGet(instr->AsArrayGet(), handles_, object_class_handle_);
   } else {
@@ -770,7 +858,10 @@
       }
     }
   }
-  instr->SetReferenceTypeInfo(new_rti);
+
+  if (new_rti.IsValid()) {
+    instr->SetReferenceTypeInfo(new_rti);
+  }
 }
 
 // Re-computes and updates the nullability of the instruction. Returns whether or
diff --git a/compiler/optimizing/reference_type_propagation.h b/compiler/optimizing/reference_type_propagation.h
index 5c05592..21789e1 100644
--- a/compiler/optimizing/reference_type_propagation.h
+++ b/compiler/optimizing/reference_type_propagation.h
@@ -57,6 +57,7 @@
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   void ValidateTypes();
+  void SetUntypedInstructionsToObject();
 
   StackHandleScopeCollection* handles_;
 
diff --git a/compiler/utils/arm/assembler_thumb2.cc b/compiler/utils/arm/assembler_thumb2.cc
index 7ad5b44..cdeb443 100644
--- a/compiler/utils/arm/assembler_thumb2.cc
+++ b/compiler/utils/arm/assembler_thumb2.cc
@@ -2569,30 +2569,19 @@
 
 void Thumb2Assembler::movw(Register rd, uint16_t imm16, Condition cond) {
   CheckCondition(cond);
-  bool must_be_32bit = force_32bit_;
-  if (IsHighRegister(rd)|| imm16 >= 256u) {
-    must_be_32bit = true;
-  }
-
-  if (must_be_32bit) {
-    // Use encoding T3.
-    uint32_t imm4 = (imm16 >> 12) & 15U /* 0b1111 */;
-    uint32_t i = (imm16 >> 11) & 1U /* 0b1 */;
-    uint32_t imm3 = (imm16 >> 8) & 7U /* 0b111 */;
-    uint32_t imm8 = imm16 & 0xff;
-    int32_t encoding = B31 | B30 | B29 | B28 |
-                    B25 | B22 |
-                    static_cast<uint32_t>(rd) << 8 |
-                    i << 26 |
-                    imm4 << 16 |
-                    imm3 << 12 |
-                    imm8;
-    Emit32(encoding);
-  } else {
-    int16_t encoding = B13 | static_cast<uint16_t>(rd) << 8 |
-                imm16;
-    Emit16(encoding);
-  }
+  // Always 32 bits, encoding T3. (Other encondings are called MOV, not MOVW.)
+  uint32_t imm4 = (imm16 >> 12) & 15U /* 0b1111 */;
+  uint32_t i = (imm16 >> 11) & 1U /* 0b1 */;
+  uint32_t imm3 = (imm16 >> 8) & 7U /* 0b111 */;
+  uint32_t imm8 = imm16 & 0xff;
+  int32_t encoding = B31 | B30 | B29 | B28 |
+                  B25 | B22 |
+                  static_cast<uint32_t>(rd) << 8 |
+                  i << 26 |
+                  imm4 << 16 |
+                  imm3 << 12 |
+                  imm8;
+  Emit32(encoding);
 }
 
 
diff --git a/compiler/utils/assembler_thumb_test.cc b/compiler/utils/assembler_thumb_test.cc
index 1de51a2..5ae2cc2 100644
--- a/compiler/utils/assembler_thumb_test.cc
+++ b/compiler/utils/assembler_thumb_test.cc
@@ -832,11 +832,12 @@
 TEST(Thumb2AssemblerTest, MovWMovT) {
   arm::Thumb2Assembler assembler;
 
-  __ movw(R4, 0);         // 16 bit.
-  __ movw(R4, 0x34);      // 16 bit.
-  __ movw(R9, 0x34);      // 32 bit due to high register.
-  __ movw(R3, 0x1234);    // 32 bit due to large value.
-  __ movw(R9, 0xffff);    // 32 bit due to large value and high register.
+  // Always 32 bit.
+  __ movw(R4, 0);
+  __ movw(R4, 0x34);
+  __ movw(R9, 0x34);
+  __ movw(R3, 0x1234);
+  __ movw(R9, 0xffff);
 
   // Always 32 bit.
   __ movt(R0, 0);
diff --git a/compiler/utils/assembler_thumb_test_expected.cc.inc b/compiler/utils/assembler_thumb_test_expected.cc.inc
index 9246c82..886295e 100644
--- a/compiler/utils/assembler_thumb_test_expected.cc.inc
+++ b/compiler/utils/assembler_thumb_test_expected.cc.inc
@@ -439,14 +439,14 @@
   nullptr
 };
 const char* MovWMovTResults[] = {
-  "   0:	2400      	movs	r4, #0\n",
-  "   2:	2434      	movs	r4, #52	; 0x34\n",
-  "   4:	f240 0934 	movw	r9, #52	; 0x34\n",
-  "   8:	f241 2334 	movw	r3, #4660	; 0x1234\n",
-  "   c:	f64f 79ff 	movw	r9, #65535	; 0xffff\n",
-  "  10:	f2c0 0000 	movt	r0, #0\n",
-  "  14:	f2c1 2034 	movt	r0, #4660	; 0x1234\n",
-  "  18:	f6cf 71ff 	movt	r1, #65535	; 0xffff\n",
+  "   0:	f240 0400 	movw  r4, #0\n",
+  "   4:	f240 0434 	movw  r4, #52 ; 0x34\n",
+  "   8:	f240 0934 	movw	r9, #52	; 0x34\n",
+  "   c:	f241 2334 	movw	r3, #4660	; 0x1234\n",
+  "  10:	f64f 79ff 	movw	r9, #65535	; 0xffff\n",
+  "  14:	f2c0 0000 	movt	r0, #0\n",
+  "  18:	f2c1 2034 	movt	r0, #4660	; 0x1234\n",
+  "  1c:	f6cf 71ff 	movt	r1, #65535	; 0xffff\n",
   nullptr
 };
 const char* SpecialAddSubResults[] = {
diff --git a/test/450-checker-types/src/Main.java b/test/450-checker-types/src/Main.java
index ec63057..cea1d5a 100644
--- a/test/450-checker-types/src/Main.java
+++ b/test/450-checker-types/src/Main.java
@@ -618,6 +618,35 @@
     getSuper();
   }
 
+  /// CHECK-START: void Main.testLoopPhiWithNullFirstInput(boolean) reference_type_propagation (after)
+  /// CHECK-DAG:  <<Null:l\d+>>      NullConstant
+  /// CHECK-DAG:  <<Main:l\d+>>      NewInstance klass:Main exact:true
+  /// CHECK-DAG:  <<LoopPhi:l\d+>>   Phi [<<Null>>,<<LoopPhi>>,<<Main>>] klass:Main exact:true
+  private void testLoopPhiWithNullFirstInput(boolean cond) {
+    Main a = null;
+    while (a == null) {
+      if (cond) {
+        a = new Main();
+      }
+    }
+  }
+
+  /// CHECK-START: void Main.testLoopPhisWithNullAndCrossUses(boolean) reference_type_propagation (after)
+  /// CHECK-DAG:  <<Null:l\d+>>      NullConstant
+  /// CHECK-DAG:  <<PhiA:l\d+>>      Phi [<<Null>>,<<PhiB:l\d+>>,<<PhiA>>] klass:java.lang.Object exact:false
+  /// CHECK-DAG:  <<PhiB>>           Phi [<<Null>>,<<PhiB>>,<<PhiA>>] klass:java.lang.Object exact:false
+  private void testLoopPhisWithNullAndCrossUses(boolean cond) {
+    Main a = null;
+    Main b = null;
+    while (a == null) {
+      if (cond) {
+        a = b;
+      } else {
+        b = a;
+      }
+    }
+  }
+
   public static void main(String[] args) {
   }
 }
diff --git a/tools/run-jdwp-tests.sh b/tools/run-jdwp-tests.sh
index 47fc50f..c79f4b9 100755
--- a/tools/run-jdwp-tests.sh
+++ b/tools/run-jdwp-tests.sh
@@ -128,7 +128,7 @@
       --vm-arg -Djpda.settings.verbose=true \
       --vm-arg -Djpda.settings.syncPort=34016 \
       --vm-arg -Djpda.settings.transportAddress=127.0.0.1:55107 \
-      --vm-arg -Djpda.settings.debuggeeJavaPath="\"$art_debugee $image $debuggee_args\"" \
+      --vm-arg -Djpda.settings.debuggeeJavaPath="$art_debugee $image $debuggee_args" \
       --classpath $test_jar \
       --vm-arg -Xcompiler-option --vm-arg --debuggable \
       $test