Unrolling and dynamic loop peeling framework in vectorizer.

Rationale:
This CL introduces the basic framework for dynamically peeling
(to obtain aligned access) and unrolling the vector loop (to reduce
looping overhead and allow more target specific optimizations
on e.g. SIMD loads and stores).

NOTE:
The current heuristics are "bogus" and merely meant to exercise
the new framework. This CL focuses on introducing correct code for
the vectorizer. Heuristics and the memory computations for alignment
are to be implemented later.

Test: test-art-target, test-art-host

Change-Id: I010af1475f42f92fd1daa6a967d7a85922beace8
diff --git a/compiler/optimizing/loop_optimization.h b/compiler/optimizing/loop_optimization.h
index cc6343a..de4bd85 100644
--- a/compiler/optimizing/loop_optimization.h
+++ b/compiler/optimizing/loop_optimization.h
@@ -116,14 +116,15 @@
   void OptimizeInnerLoop(LoopNode* node);
 
   // Vectorization analysis and synthesis.
-  bool CanVectorize(LoopNode* node, HBasicBlock* block, int64_t trip_count);
+  bool ShouldVectorize(LoopNode* node, HBasicBlock* block, int64_t trip_count);
   void Vectorize(LoopNode* node, HBasicBlock* block, HBasicBlock* exit, int64_t trip_count);
   void GenerateNewLoop(LoopNode* node,
                        HBasicBlock* block,
                        HBasicBlock* new_preheader,
                        HInstruction* lo,
                        HInstruction* hi,
-                       HInstruction* step);
+                       HInstruction* step,
+                       uint32_t unroll);
   bool VectorizeDef(LoopNode* node, HInstruction* instruction, bool generate_code);
   bool VectorizeUse(LoopNode* node,
                     HInstruction* instruction,
@@ -133,10 +134,11 @@
   bool TrySetVectorType(Primitive::Type type, /*out*/ uint64_t* restrictions);
   bool TrySetVectorLength(uint32_t length);
   void GenerateVecInv(HInstruction* org, Primitive::Type type);
-  void GenerateVecSub(HInstruction* org, HInstruction* off);
+  void GenerateVecSub(HInstruction* org, HInstruction* offset);
   void GenerateVecMem(HInstruction* org,
                       HInstruction* opa,
                       HInstruction* opb,
+                      HInstruction* offset,
                       Primitive::Type type);
   void GenerateVecOp(HInstruction* org,
                      HInstruction* opa,
@@ -151,6 +153,11 @@
                                 Primitive::Type type,
                                 uint64_t restrictions);
 
+  // Vectorization heuristics.
+  bool IsVectorizationProfitable(int64_t trip_count);
+  void SetPeelingCandidate(int64_t trip_count);
+  uint32_t GetUnrollingFactor(HBasicBlock* block, int64_t trip_count);
+
   // Helpers.
   bool TrySetPhiInduction(HPhi* phi, bool restrict_uses);
   bool TrySetSimpleLoopHeader(HBasicBlock* block);
@@ -208,20 +215,25 @@
   // Contents reside in phase-local heap memory.
   ArenaSet<ArrayReference>* vector_refs_;
 
+  // Dynamic loop peeling candidate for alignment.
+  const ArrayReference* vector_peeling_candidate_;
+
+  // Dynamic data dependence test of the form a != b.
+  HInstruction* vector_runtime_test_a_;
+  HInstruction* vector_runtime_test_b_;
+
   // Mapping used during vectorization synthesis for both the scalar peeling/cleanup
-  // loop (simd_ is false) and the actual vector loop (simd_ is true). The data
+  // loop (mode is kSequential) and the actual vector loop (mode is kVector). The data
   // structure maps original instructions into the new instructions.
   // Contents reside in phase-local heap memory.
   ArenaSafeMap<HInstruction*, HInstruction*>* vector_map_;
 
   // Temporary vectorization bookkeeping.
+  VectorMode vector_mode_;  // synthesis mode
   HBasicBlock* vector_preheader_;  // preheader of the new loop
   HBasicBlock* vector_header_;  // header of the new loop
   HBasicBlock* vector_body_;  // body of the new loop
-  HInstruction* vector_runtime_test_a_;
-  HInstruction* vector_runtime_test_b_;  // defines a != b runtime test
-  HPhi* vector_phi_;  // the Phi representing the normalized loop index
-  VectorMode vector_mode_;  // selects synthesis mode
+  HInstruction* vector_index_;  // normalized index of the new loop
 
   friend class LoopOptimizationTest;