Bug fix on shift that exceeds "lane width".

Rationale:
ARM is a bit less forgiving on shifting more than
the lane width of the SIMD instruction (rejecting
such cases is no loss, since it yields 0 anyway
and should be optimized differently).

Bug: 37776122
Test: test-art-target, test-art-host
Change-Id: I22d04afbfce82b4593f17c2f48c1fd5a0805d305
diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc
index c783dde..bbc55dd 100644
--- a/compiler/optimizing/loop_optimization.cc
+++ b/compiler/optimizing/loop_optimization.cc
@@ -833,17 +833,22 @@
     // TODO: accept symbolic, albeit loop invariant shift factors.
     HInstruction* opa = instruction->InputAt(0);
     HInstruction* opb = instruction->InputAt(1);
-    if (VectorizeUse(node, opa, generate_code, type, restrictions) && opb->IsIntConstant()) {
-      if (generate_code) {
-        // Make sure shift factor only looks at lower bits, as defined for sequential shifts.
-        // Note that even the narrower SIMD shifts do the right thing after that.
-        int32_t mask = (instruction->GetType() == Primitive::kPrimLong)
-            ? kMaxLongShiftDistance
-            : kMaxIntShiftDistance;
-        HInstruction* s = graph_->GetIntConstant(opb->AsIntConstant()->GetValue() & mask);
-        GenerateVecOp(instruction, vector_map_->Get(opa), s, type);
+    int64_t value = 0;
+    if (VectorizeUse(node, opa, generate_code, type, restrictions) && IsInt64AndGet(opb, &value)) {
+      // Make sure shift distance only looks at lower bits, as defined for sequential shifts.
+      int64_t mask = (instruction->GetType() == Primitive::kPrimLong)
+          ? kMaxLongShiftDistance
+          : kMaxIntShiftDistance;
+      int64_t distance = value & mask;
+      // Restrict shift distance to packed data type width.
+      int64_t max_distance = Primitive::ComponentSize(type) * 8;
+      if (0 <= distance && distance < max_distance) {
+        if (generate_code) {
+          HInstruction* s = graph_->GetIntConstant(distance);
+          GenerateVecOp(instruction, vector_map_->Get(opa), s, type);
+        }
+        return true;
       }
-      return true;
     }
   } else if (instruction->IsInvokeStaticOrDirect()) {
     // Accept particular intrinsics.