Improved induction variable analysis and loop optimizations.

Rationale:
Rather than half-baked reconstructing cycles during loop optimizations,
this CL passes the SCC computed during induction variable analysis
to the loop optimizer (trading some memory for more optimizations).
This further improves CaffeineLogic from 6000us down to 4200us (dx)
and 2200us to 1690us (jack). Note that this is on top of prior
improvements in previous CLs. Also, some narrowing type concerns
are taken care of during transfer operations.

Test: test-art-host
Change-Id: Ice2764811a70073c5014b3a05fb51f39fd2f4c3c
diff --git a/compiler/optimizing/induction_var_analysis.cc b/compiler/optimizing/induction_var_analysis.cc
index 38937bf..1d1921a 100644
--- a/compiler/optimizing/induction_var_analysis.cc
+++ b/compiler/optimizing/induction_var_analysis.cc
@@ -23,12 +23,12 @@
  * Since graph traversal may enter a SCC at any position, an initial representation may be rotated,
  * along dependences, viz. any of (a, b, c, d), (d, a, b, c)  (c, d, a, b), (b, c, d, a) assuming
  * a chain of dependences (mutual independent items may occur in arbitrary order). For proper
- * classification, the lexicographically first entry-phi is rotated to the front.
+ * classification, the lexicographically first loop-phi is rotated to the front.
  */
 static void RotateEntryPhiFirst(HLoopInformation* loop,
                                 ArenaVector<HInstruction*>* scc,
                                 ArenaVector<HInstruction*>* new_scc) {
-  // Find very first entry-phi.
+  // Find very first loop-phi.
   const HInstructionList& phis = loop->GetHeader()->GetPhis();
   HInstruction* phi = nullptr;
   size_t phi_pos = -1;
@@ -41,7 +41,7 @@
     }
   }
 
-  // If found, bring that entry-phi to front.
+  // If found, bring that loop-phi to front.
   if (phi != nullptr) {
     new_scc->clear();
     for (size_t i = 0; i < size; i++) {
@@ -94,7 +94,9 @@
              graph->GetArena()->Adapter(kArenaAllocInductionVarAnalysis)),
       type_(Primitive::kPrimVoid),
       induction_(std::less<HLoopInformation*>(),
-                 graph->GetArena()->Adapter(kArenaAllocInductionVarAnalysis)) {
+                 graph->GetArena()->Adapter(kArenaAllocInductionVarAnalysis)),
+      cycles_(std::less<HPhi*>(),
+              graph->GetArena()->Adapter(kArenaAllocInductionVarAnalysis)) {
 }
 
 void HInductionVarAnalysis::Run() {
@@ -245,13 +247,13 @@
   const size_t size = scc_.size();
   DCHECK_GE(size, 1u);
 
-  // Rotate proper entry-phi to front.
+  // Rotate proper loop-phi to front.
   if (size > 1) {
     ArenaVector<HInstruction*> other(graph_->GetArena()->Adapter(kArenaAllocInductionVarAnalysis));
     RotateEntryPhiFirst(loop, &scc_, &other);
   }
 
-  // Analyze from entry-phi onwards.
+  // Analyze from loop-phi onwards.
   HInstruction* phi = scc_[0];
   if (!phi->IsLoopHeaderPhi()) {
     return;
@@ -263,6 +265,9 @@
     return;
   }
 
+  // Store interesting cycle.
+  AssignCycle(phi->AsPhi());
+
   // Singleton is wrap-around induction if all internal links have the same meaning.
   if (size == 1) {
     InductionInfo* update = TransferPhi(loop, phi, /* input_index */ 1);
@@ -366,6 +371,7 @@
   // can be combined with an invariant to yield a similar result. Even two linear inputs can
   // be combined. All other combinations fail, however.
   if (a != nullptr && b != nullptr) {
+    type_ = Narrowest(type_, Narrowest(a->type, b->type));
     if (a->induction_class == kInvariant && b->induction_class == kInvariant) {
       return CreateInvariantOp(op, a, b);
     } else if (a->induction_class == kLinear && b->induction_class == kLinear) {
@@ -402,6 +408,7 @@
   // can be multiplied with an invariant to yield a similar but multiplied result.
   // Two non-invariant inputs cannot be multiplied, however.
   if (a != nullptr && b != nullptr) {
+    type_ = Narrowest(type_, Narrowest(a->type, b->type));
     if (a->induction_class == kInvariant && b->induction_class == kInvariant) {
       return CreateInvariantOp(kMul, a, b);
     } else if (a->induction_class == kInvariant) {
@@ -442,6 +449,7 @@
   // Transfer over a unary negation: an invariant, linear, wrap-around, or periodic input
   // yields a similar but negated induction as result.
   if (a != nullptr) {
+    type_ = Narrowest(type_, a->type);
     if (a->induction_class == kInvariant) {
       return CreateInvariantOp(kNeg, nullptr, a);
     }
@@ -941,6 +949,23 @@
   return new (graph_->GetArena()) InductionInfo(kInvariant, op, a, b, nullptr, b->type);
 }
 
+
+void HInductionVarAnalysis::AssignCycle(HPhi* phi) {
+  ArenaSet<HInstruction*>* set = &cycles_.Put(phi, ArenaSet<HInstruction*>(
+      graph_->GetArena()->Adapter(kArenaAllocInductionVarAnalysis)))->second;
+  for (HInstruction* i : scc_) {
+    set->insert(i);
+  }
+}
+
+ArenaSet<HInstruction*>* HInductionVarAnalysis::LookupCycle(HPhi* phi) {
+  auto it = cycles_.find(phi);
+  if (it != cycles_.end()) {
+    return &it->second;
+  }
+  return nullptr;
+}
+
 bool HInductionVarAnalysis::IsExact(InductionInfo* info, int64_t* value) {
   return InductionVarRange(this).IsConstant(info, InductionVarRange::kExact, value);
 }
diff --git a/compiler/optimizing/induction_var_analysis.h b/compiler/optimizing/induction_var_analysis.h
index d190782..7027179 100644
--- a/compiler/optimizing/induction_var_analysis.h
+++ b/compiler/optimizing/induction_var_analysis.h
@@ -214,6 +214,8 @@
   InductionInfo* LookupInfo(HLoopInformation* loop, HInstruction* instruction);
   InductionInfo* CreateConstant(int64_t value, Primitive::Type type);
   InductionInfo* CreateSimplifiedInvariant(InductionOp op, InductionInfo* a, InductionInfo* b);
+  void AssignCycle(HPhi* phi);
+  ArenaSet<HInstruction*>* LookupCycle(HPhi* phi);
 
   // Constants.
   bool IsExact(InductionInfo* info, /*out*/ int64_t* value);
@@ -240,6 +242,11 @@
    */
   ArenaSafeMap<HLoopInformation*, ArenaSafeMap<HInstruction*, InductionInfo*>> induction_;
 
+  /**
+   * Preserves induction cycle information for each loop-phi.
+   */
+  ArenaSafeMap<HPhi*, ArenaSet<HInstruction*>> cycles_;
+
   friend class InductionVarAnalysisTest;
   friend class InductionVarRange;
   friend class InductionVarRangeTest;
diff --git a/compiler/optimizing/induction_var_analysis_test.cc b/compiler/optimizing/induction_var_analysis_test.cc
index 7599c8f..031f1d7 100644
--- a/compiler/optimizing/induction_var_analysis_test.cc
+++ b/compiler/optimizing/induction_var_analysis_test.cc
@@ -740,6 +740,31 @@
   EXPECT_STREQ("((100) (TC-loop) ((0) < (100)))", GetTripCount(0).c_str());
 }
 
+TEST_F(InductionVarAnalysisTest, ByteInductionDerivedIntLoopControl) {
+  // Setup:
+  // for (int i = 0; i < 100; i++) {
+  //   k = (byte) i;
+  //   a[k] = 0;
+  //   k = k + 1
+  //   a[k] = 0;
+  // }
+  BuildLoopNest(1);
+  HInstruction* conv = InsertInstruction(
+      new (&allocator_) HTypeConversion(Primitive::kPrimByte, basic_[0], -1), 0);
+  HInstruction* store1 = InsertArrayStore(conv, 0);
+  HInstruction* add = InsertInstruction(
+      new (&allocator_) HAdd(Primitive::kPrimInt, conv, constant1_), 0);
+  HInstruction* store2 = InsertArrayStore(add, 0);
+
+  PerformInductionVarAnalysis();
+
+  // Byte induction (k) is "transferred" over conversion into addition (k + 1).
+  // This means only values within byte range can be trusted (even though
+  // addition can jump out of the range of course).
+  EXPECT_STREQ("((1) * i + (0)):PrimByte", GetInductionInfo(store1->InputAt(1), 0).c_str());
+  EXPECT_STREQ("((1) * i + (1)):PrimByte", GetInductionInfo(store2->InputAt(1), 0).c_str());
+}
+
 TEST_F(InductionVarAnalysisTest, ByteLoopControl1) {
   // Setup:
   // for (byte i = -128; i < 127; i++) {  // just fits!
diff --git a/compiler/optimizing/induction_var_range.h b/compiler/optimizing/induction_var_range.h
index 2f70046..034cf32 100644
--- a/compiler/optimizing/induction_var_range.h
+++ b/compiler/optimizing/induction_var_range.h
@@ -136,10 +136,20 @@
    */
   void ReVisit(HLoopInformation* loop) {
     induction_analysis_->induction_.erase(loop);
+    for (HInstructionIterator it(loop->GetHeader()->GetPhis()); !it.Done(); it.Advance()) {
+      induction_analysis_->cycles_.erase(it.Current()->AsPhi());
+    }
     induction_analysis_->VisitLoop(loop);
   }
 
   /**
+   * Lookup an interesting cycle associated with an entry phi.
+   */
+  ArenaSet<HInstruction*>* LookupCycle(HPhi* phi) const {
+    return induction_analysis_->LookupCycle(phi);
+  }
+
+  /**
    * Checks if header logic of a loop terminates.
    */
   bool IsFinite(HLoopInformation* loop) const;
diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc
index b88e73b..51be1d1 100644
--- a/compiler/optimizing/loop_optimization.cc
+++ b/compiler/optimizing/loop_optimization.cc
@@ -20,82 +20,6 @@
 
 namespace art {
 
-// Detects a potential induction cycle. Note that the actual induction
-// information is queried later if its last value is really needed.
-static bool IsPhiInduction(HPhi* phi, ArenaSet<HInstruction*>* iset) {
-  DCHECK(iset->empty());
-  HInputsRef inputs = phi->GetInputs();
-  if (inputs.size() == 2) {
-    HLoopInformation* loop_info = phi->GetBlock()->GetLoopInformation();
-    HInstruction* op = inputs[1];
-    if (op->GetBlock()->GetLoopInformation() == loop_info) {
-      // Chase a simple chain back to phi.
-      while (!op->IsPhi()) {
-        // Binary operation with single use in same loop.
-        if (!op->IsBinaryOperation() || !op->GetUses().HasExactlyOneElement()) {
-          return false;
-        }
-        // Chase back either through left or right operand.
-        iset->insert(op);
-        HInstruction* a = op->InputAt(0);
-        HInstruction* b = op->InputAt(1);
-        if (a->GetBlock()->GetLoopInformation() == loop_info && b != phi) {
-          op = a;
-        } else if (b->GetBlock()->GetLoopInformation() == loop_info) {
-          op = b;
-        } else {
-          return false;
-        }
-      }
-      // Closed the cycle?
-      if (op == phi) {
-       iset->insert(phi);
-       return true;
-      }
-    }
-  }
-  return false;
-}
-
-// Find: phi: Phi(init, addsub)
-//       s:   SuspendCheck
-//       c:   Condition(phi, bound)
-//       i:   If(c)
-// TODO: Find a less pattern matching approach?
-static bool IsEmptyHeader(HBasicBlock* block, ArenaSet<HInstruction*>* iset) {
-  DCHECK(iset->empty());
-  HInstruction* phi = block->GetFirstPhi();
-  if (phi != nullptr && phi->GetNext() == nullptr && IsPhiInduction(phi->AsPhi(), iset)) {
-    HInstruction* s = block->GetFirstInstruction();
-    if (s != nullptr && s->IsSuspendCheck()) {
-      HInstruction* c = s->GetNext();
-      if (c != nullptr && c->IsCondition() && c->GetUses().HasExactlyOneElement()) {
-        HInstruction* i = c->GetNext();
-        if (i != nullptr && i->IsIf() && i->InputAt(0) == c) {
-          iset->insert(c);
-          iset->insert(s);
-          return true;
-        }
-      }
-    }
-  }
-  return false;
-}
-
-// Does the loop-body consist of induction cycle and direct control flow only?
-static bool IsEmptyBody(HBasicBlock* block, ArenaSet<HInstruction*>* iset) {
-  if (block->GetFirstPhi() == nullptr) {
-    for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
-      HInstruction* instruction = it.Current();
-      if (!instruction->IsGoto() && iset->find(instruction) == iset->end()) {
-        return false;
-      }
-    }
-    return true;
-  }
-  return false;
-}
-
 // Remove the instruction from the graph. A bit more elaborate than the usual
 // instruction removal, since there may be a cycle in the use structure.
 static void RemoveFromCycle(HInstruction* instruction) {
@@ -242,7 +166,7 @@
     HPhi* phi = it.Current()->AsPhi();
     iset_->clear();
     int32_t use_count = 0;
-    if (IsPhiInduction(phi, iset_) &&
+    if (IsPhiInduction(phi) &&
         IsOnlyUsedAfterLoop(node->loop_info, phi, &use_count) &&
         TryReplaceWithLastValue(phi, use_count, preheader)) {
       for (HInstruction* i : *iset_) {
@@ -256,15 +180,14 @@
 void HLoopOptimization::SimplifyBlocks(LoopNode* node) {
   for (HBlocksInLoopIterator it(*node->loop_info); !it.Done(); it.Advance()) {
     HBasicBlock* block = it.Current();
-    // Remove instructions that are dead, usually resulting from eliminating induction cycles.
+    // Remove instructions that are dead.
     for (HBackwardInstructionIterator i(block->GetInstructions()); !i.Done(); i.Advance()) {
       HInstruction* instruction = i.Current();
       if (instruction->IsDeadAndRemovable()) {
         block->RemoveInstruction(instruction);
       }
     }
-    // Remove trivial control flow blocks from the loop-body, again usually resulting
-    // from eliminating induction cycles.
+    // Remove trivial control flow blocks from the loop-body.
     if (block->GetPredecessors().size() == 1 &&
         block->GetSuccessors().size() == 1 &&
         block->GetFirstInstruction()->IsGoto()) {
@@ -314,8 +237,8 @@
   // subsequent index uses, if any, with the last value and remove the loop.
   iset_->clear();
   int32_t use_count = 0;
-  if (IsEmptyHeader(header, iset_) &&
-      IsEmptyBody(body, iset_) &&
+  if (IsEmptyHeader(header) &&
+      IsEmptyBody(body) &&
       IsOnlyUsedAfterLoop(node->loop_info, header->GetFirstPhi(), &use_count) &&
       TryReplaceWithLastValue(header->GetFirstPhi(), use_count, preheader)) {
     body->DisconnectAndDelete();
@@ -333,6 +256,68 @@
   }
 }
 
+bool HLoopOptimization::IsPhiInduction(HPhi* phi) {
+  ArenaSet<HInstruction*>* set = induction_range_.LookupCycle(phi);
+  if (set != nullptr) {
+    for (HInstruction* i : *set) {
+      // Check that, other than phi, instruction are removable with uses contained in the cycle.
+      // TODO: investigate what cases are no longer in the graph.
+      if (i != phi) {
+        if (!i->IsInBlock() || !i->IsRemovable()) {
+          return false;
+        }
+        for (const HUseListNode<HInstruction*>& use : i->GetUses()) {
+          if (set->find(use.GetUser()) == set->end()) {
+            return false;
+          }
+        }
+      }
+    }
+    DCHECK(iset_->empty());
+    iset_->insert(set->begin(), set->end());  // copy
+    return true;
+  }
+  return false;
+}
+
+// Find: phi: Phi(init, addsub)
+//       s:   SuspendCheck
+//       c:   Condition(phi, bound)
+//       i:   If(c)
+// TODO: Find a less pattern matching approach?
+bool HLoopOptimization::IsEmptyHeader(HBasicBlock* block) {
+  DCHECK(iset_->empty());
+  HInstruction* phi = block->GetFirstPhi();
+  if (phi != nullptr && phi->GetNext() == nullptr && IsPhiInduction(phi->AsPhi())) {
+    HInstruction* s = block->GetFirstInstruction();
+    if (s != nullptr && s->IsSuspendCheck()) {
+      HInstruction* c = s->GetNext();
+      if (c != nullptr && c->IsCondition() && c->GetUses().HasExactlyOneElement()) {
+        HInstruction* i = c->GetNext();
+        if (i != nullptr && i->IsIf() && i->InputAt(0) == c) {
+          iset_->insert(c);
+          iset_->insert(s);
+          return true;
+        }
+      }
+    }
+  }
+  return false;
+}
+
+bool HLoopOptimization::IsEmptyBody(HBasicBlock* block) {
+  if (block->GetFirstPhi() == nullptr) {
+    for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
+      HInstruction* instruction = it.Current();
+      if (!instruction->IsGoto() && iset_->find(instruction) == iset_->end()) {
+        return false;
+      }
+    }
+    return true;
+  }
+  return false;
+}
+
 bool HLoopOptimization::IsOnlyUsedAfterLoop(HLoopInformation* loop_info,
                                             HInstruction* instruction,
                                             /*out*/ int32_t* use_count) {
diff --git a/compiler/optimizing/loop_optimization.h b/compiler/optimizing/loop_optimization.h
index 4113357..e18d175 100644
--- a/compiler/optimizing/loop_optimization.h
+++ b/compiler/optimizing/loop_optimization.h
@@ -64,6 +64,10 @@
   void SimplifyBlocks(LoopNode* node);
   void RemoveIfEmptyInnerLoop(LoopNode* node);
 
+  bool IsPhiInduction(HPhi* phi);
+  bool IsEmptyHeader(HBasicBlock* block);
+  bool IsEmptyBody(HBasicBlock* block);
+
   bool IsOnlyUsedAfterLoop(HLoopInformation* loop_info,
                            HInstruction* instruction,
                            /*out*/ int32_t* use_count);
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 6f4f3c9..257ccea 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -1931,7 +1931,7 @@
     return !HasEnvironmentUses() && GetUses().HasExactlyOneElement();
   }
 
-  bool IsDeadAndRemovable() const {
+  bool IsRemovable() const {
     return
         !HasSideEffects() &&
         !CanThrow() &&
@@ -1939,11 +1939,14 @@
         !IsControlFlow() &&
         !IsNativeDebugInfo() &&
         !IsParameterValue() &&
-        !HasUses() &&
         // If we added an explicit barrier then we should keep it.
         !IsMemoryBarrier();
   }
 
+  bool IsDeadAndRemovable() const {
+    return IsRemovable() && !HasUses();
+  }
+
   // Does this instruction strictly dominate `other_instruction`?
   // Returns false if this instruction and `other_instruction` are the same.
   // Aborts if this instruction and `other_instruction` are both phis.