Min/max SIMDization support.

Rationale:
The more vectorized, the better!

Test: test-art-target, test-art-host

Change-Id: I758becca5beaa5b97fab2ab70f2e00cb53458703
diff --git a/compiler/optimizing/code_generator_vector_arm64.cc b/compiler/optimizing/code_generator_vector_arm64.cc
index 57f7e6b..0739c6e 100644
--- a/compiler/optimizing/code_generator_vector_arm64.cc
+++ b/compiler/optimizing/code_generator_vector_arm64.cc
@@ -468,7 +468,50 @@
 }
 
 void InstructionCodeGeneratorARM64::VisitVecMin(HVecMin* instruction) {
-  LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
+  LocationSummary* locations = instruction->GetLocations();
+  VRegister lhs = VRegisterFrom(locations->InAt(0));
+  VRegister rhs = VRegisterFrom(locations->InAt(1));
+  VRegister dst = VRegisterFrom(locations->Out());
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimByte:
+      DCHECK_EQ(16u, instruction->GetVectorLength());
+      if (instruction->IsUnsigned()) {
+        __ Umin(dst.V16B(), lhs.V16B(), rhs.V16B());
+      } else {
+        __ Smin(dst.V16B(), lhs.V16B(), rhs.V16B());
+      }
+      break;
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      if (instruction->IsUnsigned()) {
+        __ Umin(dst.V8H(), lhs.V8H(), rhs.V8H());
+      } else {
+        __ Smin(dst.V8H(), lhs.V8H(), rhs.V8H());
+      }
+      break;
+    case Primitive::kPrimInt:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      if (instruction->IsUnsigned()) {
+        __ Umin(dst.V4S(), lhs.V4S(), rhs.V4S());
+      } else {
+        __ Smin(dst.V4S(), lhs.V4S(), rhs.V4S());
+      }
+      break;
+    case Primitive::kPrimFloat:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      DCHECK(!instruction->IsUnsigned());
+      __ Fmin(dst.V4S(), lhs.V4S(), rhs.V4S());
+      break;
+    case Primitive::kPrimDouble:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      DCHECK(!instruction->IsUnsigned());
+      __ Fmin(dst.V2D(), lhs.V2D(), rhs.V2D());
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
 }
 
 void LocationsBuilderARM64::VisitVecMax(HVecMax* instruction) {
@@ -476,7 +519,50 @@
 }
 
 void InstructionCodeGeneratorARM64::VisitVecMax(HVecMax* instruction) {
-  LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
+  LocationSummary* locations = instruction->GetLocations();
+  VRegister lhs = VRegisterFrom(locations->InAt(0));
+  VRegister rhs = VRegisterFrom(locations->InAt(1));
+  VRegister dst = VRegisterFrom(locations->Out());
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimByte:
+      DCHECK_EQ(16u, instruction->GetVectorLength());
+      if (instruction->IsUnsigned()) {
+        __ Umax(dst.V16B(), lhs.V16B(), rhs.V16B());
+      } else {
+        __ Smax(dst.V16B(), lhs.V16B(), rhs.V16B());
+      }
+      break;
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      if (instruction->IsUnsigned()) {
+        __ Umax(dst.V8H(), lhs.V8H(), rhs.V8H());
+      } else {
+        __ Smax(dst.V8H(), lhs.V8H(), rhs.V8H());
+      }
+      break;
+    case Primitive::kPrimInt:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      if (instruction->IsUnsigned()) {
+        __ Umax(dst.V4S(), lhs.V4S(), rhs.V4S());
+      } else {
+        __ Smax(dst.V4S(), lhs.V4S(), rhs.V4S());
+      }
+      break;
+    case Primitive::kPrimFloat:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      DCHECK(!instruction->IsUnsigned());
+      __ Fmax(dst.V4S(), lhs.V4S(), rhs.V4S());
+      break;
+    case Primitive::kPrimDouble:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      DCHECK(!instruction->IsUnsigned());
+      __ Fmax(dst.V2D(), lhs.V2D(), rhs.V2D());
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
 }
 
 void LocationsBuilderARM64::VisitVecAnd(HVecAnd* instruction) {
diff --git a/compiler/optimizing/code_generator_vector_x86.cc b/compiler/optimizing/code_generator_vector_x86.cc
index 5bb19c1..14782d7 100644
--- a/compiler/optimizing/code_generator_vector_x86.cc
+++ b/compiler/optimizing/code_generator_vector_x86.cc
@@ -483,7 +483,51 @@
 }
 
 void InstructionCodeGeneratorX86::VisitVecMin(HVecMin* instruction) {
-  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+  LocationSummary* locations = instruction->GetLocations();
+  DCHECK(locations->InAt(0).Equals(locations->Out()));
+  XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
+  XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimByte:
+      DCHECK_EQ(16u, instruction->GetVectorLength());
+      if (instruction->IsUnsigned()) {
+        __ pminub(dst, src);
+      } else {
+        __ pminsb(dst, src);
+      }
+      break;
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      if (instruction->IsUnsigned()) {
+        __ pminuw(dst, src);
+      } else {
+        __ pminsw(dst, src);
+      }
+      break;
+    case Primitive::kPrimInt:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      if (instruction->IsUnsigned()) {
+        __ pminud(dst, src);
+      } else {
+        __ pminsd(dst, src);
+      }
+      break;
+    // Next cases are sloppy wrt 0.0 vs -0.0.
+    case Primitive::kPrimFloat:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      DCHECK(!instruction->IsUnsigned());
+      __ minps(dst, src);
+      break;
+    case Primitive::kPrimDouble:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      DCHECK(!instruction->IsUnsigned());
+      __ minpd(dst, src);
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
 }
 
 void LocationsBuilderX86::VisitVecMax(HVecMax* instruction) {
@@ -491,7 +535,51 @@
 }
 
 void InstructionCodeGeneratorX86::VisitVecMax(HVecMax* instruction) {
-  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+  LocationSummary* locations = instruction->GetLocations();
+  DCHECK(locations->InAt(0).Equals(locations->Out()));
+  XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
+  XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimByte:
+      DCHECK_EQ(16u, instruction->GetVectorLength());
+      if (instruction->IsUnsigned()) {
+        __ pmaxub(dst, src);
+      } else {
+        __ pmaxsb(dst, src);
+      }
+      break;
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      if (instruction->IsUnsigned()) {
+        __ pmaxuw(dst, src);
+      } else {
+        __ pmaxsw(dst, src);
+      }
+      break;
+    case Primitive::kPrimInt:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      if (instruction->IsUnsigned()) {
+        __ pmaxud(dst, src);
+      } else {
+        __ pmaxsd(dst, src);
+      }
+      break;
+    // Next cases are sloppy wrt 0.0 vs -0.0.
+    case Primitive::kPrimFloat:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      DCHECK(!instruction->IsUnsigned());
+      __ maxps(dst, src);
+      break;
+    case Primitive::kPrimDouble:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      DCHECK(!instruction->IsUnsigned());
+      __ maxpd(dst, src);
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
 }
 
 void LocationsBuilderX86::VisitVecAnd(HVecAnd* instruction) {
diff --git a/compiler/optimizing/code_generator_vector_x86_64.cc b/compiler/optimizing/code_generator_vector_x86_64.cc
index 6d4aae8..246044e 100644
--- a/compiler/optimizing/code_generator_vector_x86_64.cc
+++ b/compiler/optimizing/code_generator_vector_x86_64.cc
@@ -353,6 +353,10 @@
   DCHECK(locations->InAt(0).Equals(locations->Out()));
   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+
+  DCHECK(instruction->IsRounded());
+  DCHECK(instruction->IsUnsigned());
+
   switch (instruction->GetPackedType()) {
     case Primitive::kPrimByte:
       DCHECK_EQ(16u, instruction->GetVectorLength());
@@ -472,7 +476,51 @@
 }
 
 void InstructionCodeGeneratorX86_64::VisitVecMin(HVecMin* instruction) {
-  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+  LocationSummary* locations = instruction->GetLocations();
+  DCHECK(locations->InAt(0).Equals(locations->Out()));
+  XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
+  XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimByte:
+      DCHECK_EQ(16u, instruction->GetVectorLength());
+      if (instruction->IsUnsigned()) {
+        __ pminub(dst, src);
+      } else {
+        __ pminsb(dst, src);
+      }
+      break;
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      if (instruction->IsUnsigned()) {
+        __ pminuw(dst, src);
+      } else {
+        __ pminsw(dst, src);
+      }
+      break;
+    case Primitive::kPrimInt:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      if (instruction->IsUnsigned()) {
+        __ pminud(dst, src);
+      } else {
+        __ pminsd(dst, src);
+      }
+      break;
+    // Next cases are sloppy wrt 0.0 vs -0.0.
+    case Primitive::kPrimFloat:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      DCHECK(!instruction->IsUnsigned());
+      __ minps(dst, src);
+      break;
+    case Primitive::kPrimDouble:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      DCHECK(!instruction->IsUnsigned());
+      __ minpd(dst, src);
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
 }
 
 void LocationsBuilderX86_64::VisitVecMax(HVecMax* instruction) {
@@ -480,7 +528,51 @@
 }
 
 void InstructionCodeGeneratorX86_64::VisitVecMax(HVecMax* instruction) {
-  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+  LocationSummary* locations = instruction->GetLocations();
+  DCHECK(locations->InAt(0).Equals(locations->Out()));
+  XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
+  XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimByte:
+      DCHECK_EQ(16u, instruction->GetVectorLength());
+      if (instruction->IsUnsigned()) {
+        __ pmaxub(dst, src);
+      } else {
+        __ pmaxsb(dst, src);
+      }
+      break;
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      if (instruction->IsUnsigned()) {
+        __ pmaxuw(dst, src);
+      } else {
+        __ pmaxsw(dst, src);
+      }
+      break;
+    case Primitive::kPrimInt:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      if (instruction->IsUnsigned()) {
+        __ pmaxud(dst, src);
+      } else {
+        __ pmaxsd(dst, src);
+      }
+      break;
+    // Next cases are sloppy wrt 0.0 vs -0.0.
+    case Primitive::kPrimFloat:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      DCHECK(!instruction->IsUnsigned());
+      __ maxps(dst, src);
+      break;
+    case Primitive::kPrimDouble:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      DCHECK(!instruction->IsUnsigned());
+      __ maxpd(dst, src);
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
 }
 
 void LocationsBuilderX86_64::VisitVecAnd(HVecAnd* instruction) {
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index e5d94c3..02816cf 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -514,6 +514,14 @@
     StartAttributeStream("rounded") << std::boolalpha << hadd->IsRounded() << std::noboolalpha;
   }
 
+  void VisitVecMin(HVecMin* min) OVERRIDE {
+    StartAttributeStream("unsigned") << std::boolalpha << min->IsUnsigned() << std::noboolalpha;
+  }
+
+  void VisitVecMax(HVecMax* max) OVERRIDE {
+    StartAttributeStream("unsigned") << std::boolalpha << max->IsUnsigned() << std::noboolalpha;
+  }
+
   void VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) OVERRIDE {
     StartAttributeStream("kind") << instruction->GetOpKind();
   }
diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc
index 881802d..4067aa3 100644
--- a/compiler/optimizing/loop_optimization.cc
+++ b/compiler/optimizing/loop_optimization.cc
@@ -869,6 +869,32 @@
         }
         return false;
       }
+      case Intrinsics::kMathMinIntInt:
+      case Intrinsics::kMathMinLongLong:
+      case Intrinsics::kMathMinFloatFloat:
+      case Intrinsics::kMathMinDoubleDouble:
+      case Intrinsics::kMathMaxIntInt:
+      case Intrinsics::kMathMaxLongLong:
+      case Intrinsics::kMathMaxFloatFloat:
+      case Intrinsics::kMathMaxDoubleDouble: {
+        // Deal with vector restrictions.
+        if (HasVectorRestrictions(restrictions, kNoMinMax) ||
+            HasVectorRestrictions(restrictions, kNoHiBits)) {
+          // TODO: we can do better for some hibits cases.
+          return false;
+        }
+        // Accept MIN/MAX(x, y) for vectorizable operands.
+        HInstruction* opa = instruction->InputAt(0);
+        HInstruction* opb = instruction->InputAt(1);
+        if (VectorizeUse(node, opa, generate_code, type, restrictions) &&
+            VectorizeUse(node, opb, generate_code, type, restrictions)) {
+          if (generate_code) {
+            GenerateVecOp(instruction, vector_map_->Get(opa), vector_map_->Get(opb), type);
+          }
+          return true;
+        }
+        return false;
+      }
       default:
         return false;
     }  // switch
@@ -898,7 +924,7 @@
           *restrictions |= kNoDiv;
           return TrySetVectorLength(4);
         case Primitive::kPrimLong:
-          *restrictions |= kNoDiv | kNoMul;
+          *restrictions |= kNoDiv | kNoMul | kNoMinMax;
           return TrySetVectorLength(2);
         case Primitive::kPrimFloat:
           return TrySetVectorLength(4);
@@ -924,11 +950,13 @@
             *restrictions |= kNoDiv;
             return TrySetVectorLength(4);
           case Primitive::kPrimLong:
-            *restrictions |= kNoMul | kNoDiv | kNoShr | kNoAbs;
+            *restrictions |= kNoMul | kNoDiv | kNoShr | kNoAbs | kNoMinMax;
             return TrySetVectorLength(2);
           case Primitive::kPrimFloat:
+            *restrictions |= kNoMinMax;  // -0.0 vs +0.0
             return TrySetVectorLength(4);
           case Primitive::kPrimDouble:
+            *restrictions |= kNoMinMax;  // -0.0 vs +0.0
             return TrySetVectorLength(2);
           default:
             break;
@@ -1108,6 +1136,24 @@
             DCHECK(opb == nullptr);
             vector = new (global_allocator_) HVecAbs(global_allocator_, opa, type, vector_length_);
             break;
+          case Intrinsics::kMathMinIntInt:
+          case Intrinsics::kMathMinLongLong:
+          case Intrinsics::kMathMinFloatFloat:
+          case Intrinsics::kMathMinDoubleDouble: {
+            bool is_unsigned = false;  // TODO: detect unsigned versions
+            vector = new (global_allocator_)
+                HVecMin(global_allocator_, opa, opb, type, vector_length_, is_unsigned);
+            break;
+          }
+          case Intrinsics::kMathMaxIntInt:
+          case Intrinsics::kMathMaxLongLong:
+          case Intrinsics::kMathMaxFloatFloat:
+          case Intrinsics::kMathMaxDoubleDouble: {
+            bool is_unsigned = false;  // TODO: detect unsigned versions
+            vector = new (global_allocator_)
+                HVecMax(global_allocator_, opa, opb, type, vector_length_, is_unsigned);
+            break;
+          }
           default:
             LOG(FATAL) << "Unsupported SIMD intrinsic";
             UNREACHABLE();
diff --git a/compiler/optimizing/loop_optimization.h b/compiler/optimizing/loop_optimization.h
index 4a7da86..6d5978d 100644
--- a/compiler/optimizing/loop_optimization.h
+++ b/compiler/optimizing/loop_optimization.h
@@ -71,6 +71,7 @@
     kNoSignedHAdd    = 32,   // no signed halving add
     kNoUnroundedHAdd = 64,   // no unrounded halving add
     kNoAbs           = 128,  // no absolute value
+    kNoMinMax        = 256,  // no min/max
   };
 
   /*
diff --git a/compiler/optimizing/nodes_vector.h b/compiler/optimizing/nodes_vector.h
index 52c247b..c2bb6e7 100644
--- a/compiler/optimizing/nodes_vector.h
+++ b/compiler/optimizing/nodes_vector.h
@@ -451,13 +451,24 @@
           HInstruction* right,
           Primitive::Type packed_type,
           size_t vector_length,
+          bool is_unsigned,
           uint32_t dex_pc = kNoDexPc)
       : HVecBinaryOperation(arena, left, right, packed_type, vector_length, dex_pc) {
     DCHECK(HasConsistentPackedTypes(left, packed_type));
     DCHECK(HasConsistentPackedTypes(right, packed_type));
+    SetPackedFlag<kFieldMinOpIsUnsigned>(is_unsigned);
   }
+
+  bool IsUnsigned() const { return GetPackedFlag<kFieldMinOpIsUnsigned>(); }
+
   DECLARE_INSTRUCTION(VecMin);
+
  private:
+  // Additional packed bits.
+  static constexpr size_t kFieldMinOpIsUnsigned = HVecOperation::kNumberOfVectorOpPackedBits;
+  static constexpr size_t kNumberOfMinOpPackedBits = kFieldMinOpIsUnsigned + 1;
+  static_assert(kNumberOfMinOpPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields.");
+
   DISALLOW_COPY_AND_ASSIGN(HVecMin);
 };
 
@@ -470,13 +481,24 @@
           HInstruction* right,
           Primitive::Type packed_type,
           size_t vector_length,
+          bool is_unsigned,
           uint32_t dex_pc = kNoDexPc)
       : HVecBinaryOperation(arena, left, right, packed_type, vector_length, dex_pc) {
     DCHECK(HasConsistentPackedTypes(left, packed_type));
     DCHECK(HasConsistentPackedTypes(right, packed_type));
+    SetPackedFlag<kFieldMaxOpIsUnsigned>(is_unsigned);
   }
+
+  bool IsUnsigned() const { return GetPackedFlag<kFieldMaxOpIsUnsigned>(); }
+
   DECLARE_INSTRUCTION(VecMax);
+
  private:
+  // Additional packed bits.
+  static constexpr size_t kFieldMaxOpIsUnsigned = HVecOperation::kNumberOfVectorOpPackedBits;
+  static constexpr size_t kNumberOfMaxOpPackedBits = kFieldMaxOpIsUnsigned + 1;
+  static_assert(kNumberOfMaxOpPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields.");
+
   DISALLOW_COPY_AND_ASSIGN(HVecMax);
 };
 
diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc
index 1736618..bef32f8 100644
--- a/compiler/utils/x86/assembler_x86.cc
+++ b/compiler/utils/x86/assembler_x86.cc
@@ -1238,6 +1238,139 @@
   EmitXmmRegisterOperand(dst, src);
 }
 
+void X86Assembler::pminsb(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitUint8(0x0F);
+  EmitUint8(0x38);
+  EmitUint8(0x38);
+  EmitXmmRegisterOperand(dst, src);
+}
+
+void X86Assembler::pmaxsb(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitUint8(0x0F);
+  EmitUint8(0x38);
+  EmitUint8(0x3C);
+  EmitXmmRegisterOperand(dst, src);
+}
+
+void X86Assembler::pminsw(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitUint8(0x0F);
+  EmitUint8(0xEA);
+  EmitXmmRegisterOperand(dst, src);
+}
+
+void X86Assembler::pmaxsw(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitUint8(0x0F);
+  EmitUint8(0xEE);
+  EmitXmmRegisterOperand(dst, src);
+}
+
+void X86Assembler::pminsd(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitUint8(0x0F);
+  EmitUint8(0x38);
+  EmitUint8(0x39);
+  EmitXmmRegisterOperand(dst, src);
+}
+
+void X86Assembler::pmaxsd(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitUint8(0x0F);
+  EmitUint8(0x38);
+  EmitUint8(0x3D);
+  EmitXmmRegisterOperand(dst, src);
+}
+
+void X86Assembler::pminub(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitUint8(0x0F);
+  EmitUint8(0xDA);
+  EmitXmmRegisterOperand(dst, src);
+}
+
+void X86Assembler::pmaxub(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitUint8(0x0F);
+  EmitUint8(0xDE);
+  EmitXmmRegisterOperand(dst, src);
+}
+
+void X86Assembler::pminuw(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitUint8(0x0F);
+  EmitUint8(0x38);
+  EmitUint8(0x3A);
+  EmitXmmRegisterOperand(dst, src);
+}
+
+void X86Assembler::pmaxuw(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitUint8(0x0F);
+  EmitUint8(0x38);
+  EmitUint8(0x3E);
+  EmitXmmRegisterOperand(dst, src);
+}
+
+void X86Assembler::pminud(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitUint8(0x0F);
+  EmitUint8(0x38);
+  EmitUint8(0x3B);
+  EmitXmmRegisterOperand(dst, src);
+}
+
+void X86Assembler::pmaxud(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitUint8(0x0F);
+  EmitUint8(0x38);
+  EmitUint8(0x3F);
+  EmitXmmRegisterOperand(dst, src);
+}
+
+void X86Assembler::minps(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x0F);
+  EmitUint8(0x5D);
+  EmitXmmRegisterOperand(dst, src);
+}
+
+void X86Assembler::maxps(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x0F);
+  EmitUint8(0x5F);
+  EmitXmmRegisterOperand(dst, src);
+}
+
+void X86Assembler::minpd(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitUint8(0x0F);
+  EmitUint8(0x5D);
+  EmitXmmRegisterOperand(dst, src);
+}
+
+void X86Assembler::maxpd(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitUint8(0x0F);
+  EmitUint8(0x5F);
+  EmitXmmRegisterOperand(dst, src);
+}
 
 void X86Assembler::pcmpeqb(XmmRegister dst, XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h
index a747cda..c4bb9ee 100644
--- a/compiler/utils/x86/assembler_x86.h
+++ b/compiler/utils/x86/assembler_x86.h
@@ -498,6 +498,25 @@
   void pavgb(XmmRegister dst, XmmRegister src);  // no addr variant (for now)
   void pavgw(XmmRegister dst, XmmRegister src);
 
+  void pminsb(XmmRegister dst, XmmRegister src);  // no addr variant (for now)
+  void pmaxsb(XmmRegister dst, XmmRegister src);
+  void pminsw(XmmRegister dst, XmmRegister src);
+  void pmaxsw(XmmRegister dst, XmmRegister src);
+  void pminsd(XmmRegister dst, XmmRegister src);
+  void pmaxsd(XmmRegister dst, XmmRegister src);
+
+  void pminub(XmmRegister dst, XmmRegister src);  // no addr variant (for now)
+  void pmaxub(XmmRegister dst, XmmRegister src);
+  void pminuw(XmmRegister dst, XmmRegister src);
+  void pmaxuw(XmmRegister dst, XmmRegister src);
+  void pminud(XmmRegister dst, XmmRegister src);
+  void pmaxud(XmmRegister dst, XmmRegister src);
+
+  void minps(XmmRegister dst, XmmRegister src);  // no addr variant (for now)
+  void maxps(XmmRegister dst, XmmRegister src);
+  void minpd(XmmRegister dst, XmmRegister src);
+  void maxpd(XmmRegister dst, XmmRegister src);
+
   void pcmpeqb(XmmRegister dst, XmmRegister src);
   void pcmpeqw(XmmRegister dst, XmmRegister src);
   void pcmpeqd(XmmRegister dst, XmmRegister src);
diff --git a/compiler/utils/x86/assembler_x86_test.cc b/compiler/utils/x86/assembler_x86_test.cc
index f75f972..34f2a47 100644
--- a/compiler/utils/x86/assembler_x86_test.cc
+++ b/compiler/utils/x86/assembler_x86_test.cc
@@ -613,6 +613,70 @@
   DriverStr(RepeatFF(&x86::X86Assembler::pavgw, "pavgw %{reg2}, %{reg1}"), "pavgw");
 }
 
+TEST_F(AssemblerX86Test, PMinSB) {
+  DriverStr(RepeatFF(&x86::X86Assembler::pminsb, "pminsb %{reg2}, %{reg1}"), "pminsb");
+}
+
+TEST_F(AssemblerX86Test, PMaxSB) {
+  DriverStr(RepeatFF(&x86::X86Assembler::pmaxsb, "pmaxsb %{reg2}, %{reg1}"), "pmaxsb");
+}
+
+TEST_F(AssemblerX86Test, PMinSW) {
+  DriverStr(RepeatFF(&x86::X86Assembler::pminsw, "pminsw %{reg2}, %{reg1}"), "pminsw");
+}
+
+TEST_F(AssemblerX86Test, PMaxSW) {
+  DriverStr(RepeatFF(&x86::X86Assembler::pmaxsw, "pmaxsw %{reg2}, %{reg1}"), "pmaxsw");
+}
+
+TEST_F(AssemblerX86Test, PMinSD) {
+  DriverStr(RepeatFF(&x86::X86Assembler::pminsd, "pminsd %{reg2}, %{reg1}"), "pminsd");
+}
+
+TEST_F(AssemblerX86Test, PMaxSD) {
+  DriverStr(RepeatFF(&x86::X86Assembler::pmaxsd, "pmaxsd %{reg2}, %{reg1}"), "pmaxsd");
+}
+
+TEST_F(AssemblerX86Test, PMinUB) {
+  DriverStr(RepeatFF(&x86::X86Assembler::pminub, "pminub %{reg2}, %{reg1}"), "pminub");
+}
+
+TEST_F(AssemblerX86Test, PMaxUB) {
+  DriverStr(RepeatFF(&x86::X86Assembler::pmaxub, "pmaxub %{reg2}, %{reg1}"), "pmaxub");
+}
+
+TEST_F(AssemblerX86Test, PMinUW) {
+  DriverStr(RepeatFF(&x86::X86Assembler::pminuw, "pminuw %{reg2}, %{reg1}"), "pminuw");
+}
+
+TEST_F(AssemblerX86Test, PMaxUW) {
+  DriverStr(RepeatFF(&x86::X86Assembler::pmaxuw, "pmaxuw %{reg2}, %{reg1}"), "pmaxuw");
+}
+
+TEST_F(AssemblerX86Test, PMinUD) {
+  DriverStr(RepeatFF(&x86::X86Assembler::pminud, "pminud %{reg2}, %{reg1}"), "pminud");
+}
+
+TEST_F(AssemblerX86Test, PMaxUD) {
+  DriverStr(RepeatFF(&x86::X86Assembler::pmaxud, "pmaxud %{reg2}, %{reg1}"), "pmaxud");
+}
+
+TEST_F(AssemblerX86Test, MinPS) {
+  DriverStr(RepeatFF(&x86::X86Assembler::minps, "minps %{reg2}, %{reg1}"), "minps");
+}
+
+TEST_F(AssemblerX86Test, MaxPS) {
+  DriverStr(RepeatFF(&x86::X86Assembler::maxps, "maxps %{reg2}, %{reg1}"), "maxps");
+}
+
+TEST_F(AssemblerX86Test, MinPD) {
+  DriverStr(RepeatFF(&x86::X86Assembler::minpd, "minpd %{reg2}, %{reg1}"), "minpd");
+}
+
+TEST_F(AssemblerX86Test, MaxPD) {
+  DriverStr(RepeatFF(&x86::X86Assembler::maxpd, "maxpd %{reg2}, %{reg1}"), "maxpd");
+}
+
 TEST_F(AssemblerX86Test, PCmpeqB) {
   DriverStr(RepeatFF(&x86::X86Assembler::pcmpeqb, "pcmpeqb %{reg2}, %{reg1}"), "cmpeqb");
 }
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index 1b7a485..82d1174 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -1445,6 +1445,156 @@
   EmitXmmRegisterOperand(dst.LowBits(), src);
 }
 
+void X86_64Assembler::pminsb(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitOptionalRex32(dst, src);
+  EmitUint8(0x0F);
+  EmitUint8(0x38);
+  EmitUint8(0x38);
+  EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+void X86_64Assembler::pmaxsb(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitOptionalRex32(dst, src);
+  EmitUint8(0x0F);
+  EmitUint8(0x38);
+  EmitUint8(0x3C);
+  EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+void X86_64Assembler::pminsw(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitOptionalRex32(dst, src);
+  EmitUint8(0x0F);
+  EmitUint8(0xEA);
+  EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+void X86_64Assembler::pmaxsw(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitOptionalRex32(dst, src);
+  EmitUint8(0x0F);
+  EmitUint8(0xEE);
+  EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+void X86_64Assembler::pminsd(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitOptionalRex32(dst, src);
+  EmitUint8(0x0F);
+  EmitUint8(0x38);
+  EmitUint8(0x39);
+  EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+void X86_64Assembler::pmaxsd(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitOptionalRex32(dst, src);
+  EmitUint8(0x0F);
+  EmitUint8(0x38);
+  EmitUint8(0x3D);
+  EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+void X86_64Assembler::pminub(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitOptionalRex32(dst, src);
+  EmitUint8(0x0F);
+  EmitUint8(0xDA);
+  EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+void X86_64Assembler::pmaxub(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitOptionalRex32(dst, src);
+  EmitUint8(0x0F);
+  EmitUint8(0xDE);
+  EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+void X86_64Assembler::pminuw(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitOptionalRex32(dst, src);
+  EmitUint8(0x0F);
+  EmitUint8(0x38);
+  EmitUint8(0x3A);
+  EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+void X86_64Assembler::pmaxuw(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitOptionalRex32(dst, src);
+  EmitUint8(0x0F);
+  EmitUint8(0x38);
+  EmitUint8(0x3E);
+  EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+void X86_64Assembler::pminud(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitOptionalRex32(dst, src);
+  EmitUint8(0x0F);
+  EmitUint8(0x38);
+  EmitUint8(0x3B);
+  EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+void X86_64Assembler::pmaxud(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitOptionalRex32(dst, src);
+  EmitUint8(0x0F);
+  EmitUint8(0x38);
+  EmitUint8(0x3F);
+  EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+void X86_64Assembler::minps(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitOptionalRex32(dst, src);
+  EmitUint8(0x0F);
+  EmitUint8(0x5D);
+  EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+void X86_64Assembler::maxps(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitOptionalRex32(dst, src);
+  EmitUint8(0x0F);
+  EmitUint8(0x5F);
+  EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+void X86_64Assembler::minpd(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitOptionalRex32(dst, src);
+  EmitUint8(0x0F);
+  EmitUint8(0x5D);
+  EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+void X86_64Assembler::maxpd(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitOptionalRex32(dst, src);
+  EmitUint8(0x0F);
+  EmitUint8(0x5F);
+  EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
 void X86_64Assembler::pcmpeqb(XmmRegister dst, XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0x66);
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index 0ddc46c..6e584fe 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -526,6 +526,25 @@
   void pavgb(XmmRegister dst, XmmRegister src);  // no addr variant (for now)
   void pavgw(XmmRegister dst, XmmRegister src);
 
+  void pminsb(XmmRegister dst, XmmRegister src);  // no addr variant (for now)
+  void pmaxsb(XmmRegister dst, XmmRegister src);
+  void pminsw(XmmRegister dst, XmmRegister src);
+  void pmaxsw(XmmRegister dst, XmmRegister src);
+  void pminsd(XmmRegister dst, XmmRegister src);
+  void pmaxsd(XmmRegister dst, XmmRegister src);
+
+  void pminub(XmmRegister dst, XmmRegister src);  // no addr variant (for now)
+  void pmaxub(XmmRegister dst, XmmRegister src);
+  void pminuw(XmmRegister dst, XmmRegister src);
+  void pmaxuw(XmmRegister dst, XmmRegister src);
+  void pminud(XmmRegister dst, XmmRegister src);
+  void pmaxud(XmmRegister dst, XmmRegister src);
+
+  void minps(XmmRegister dst, XmmRegister src);  // no addr variant (for now)
+  void maxps(XmmRegister dst, XmmRegister src);
+  void minpd(XmmRegister dst, XmmRegister src);
+  void maxpd(XmmRegister dst, XmmRegister src);
+
   void pcmpeqb(XmmRegister dst, XmmRegister src);
   void pcmpeqw(XmmRegister dst, XmmRegister src);
   void pcmpeqd(XmmRegister dst, XmmRegister src);
diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc
index e7d8401..b574003 100644
--- a/compiler/utils/x86_64/assembler_x86_64_test.cc
+++ b/compiler/utils/x86_64/assembler_x86_64_test.cc
@@ -1301,6 +1301,70 @@
   DriverStr(RepeatFF(&x86_64::X86_64Assembler::pavgw, "pavgw %{reg2}, %{reg1}"), "pavgw");
 }
 
+TEST_F(AssemblerX86_64Test, Pminsb) {
+  DriverStr(RepeatFF(&x86_64::X86_64Assembler::pminsb, "pminsb %{reg2}, %{reg1}"), "pminsb");
+}
+
+TEST_F(AssemblerX86_64Test, Pmaxsb) {
+  DriverStr(RepeatFF(&x86_64::X86_64Assembler::pmaxsb, "pmaxsb %{reg2}, %{reg1}"), "pmaxsb");
+}
+
+TEST_F(AssemblerX86_64Test, Pminsw) {
+  DriverStr(RepeatFF(&x86_64::X86_64Assembler::pminsw, "pminsw %{reg2}, %{reg1}"), "pminsw");
+}
+
+TEST_F(AssemblerX86_64Test, Pmaxsw) {
+  DriverStr(RepeatFF(&x86_64::X86_64Assembler::pmaxsw, "pmaxsw %{reg2}, %{reg1}"), "pmaxsw");
+}
+
+TEST_F(AssemblerX86_64Test, Pminsd) {
+  DriverStr(RepeatFF(&x86_64::X86_64Assembler::pminsd, "pminsd %{reg2}, %{reg1}"), "pminsd");
+}
+
+TEST_F(AssemblerX86_64Test, Pmaxsd) {
+  DriverStr(RepeatFF(&x86_64::X86_64Assembler::pmaxsd, "pmaxsd %{reg2}, %{reg1}"), "pmaxsd");
+}
+
+TEST_F(AssemblerX86_64Test, Pminub) {
+  DriverStr(RepeatFF(&x86_64::X86_64Assembler::pminub, "pminub %{reg2}, %{reg1}"), "pminub");
+}
+
+TEST_F(AssemblerX86_64Test, Pmaxub) {
+  DriverStr(RepeatFF(&x86_64::X86_64Assembler::pmaxub, "pmaxub %{reg2}, %{reg1}"), "pmaxub");
+}
+
+TEST_F(AssemblerX86_64Test, Pminuw) {
+  DriverStr(RepeatFF(&x86_64::X86_64Assembler::pminuw, "pminuw %{reg2}, %{reg1}"), "pminuw");
+}
+
+TEST_F(AssemblerX86_64Test, Pmaxuw) {
+  DriverStr(RepeatFF(&x86_64::X86_64Assembler::pmaxuw, "pmaxuw %{reg2}, %{reg1}"), "pmaxuw");
+}
+
+TEST_F(AssemblerX86_64Test, Pminud) {
+  DriverStr(RepeatFF(&x86_64::X86_64Assembler::pminud, "pminud %{reg2}, %{reg1}"), "pminud");
+}
+
+TEST_F(AssemblerX86_64Test, Pmaxud) {
+  DriverStr(RepeatFF(&x86_64::X86_64Assembler::pmaxud, "pmaxud %{reg2}, %{reg1}"), "pmaxud");
+}
+
+TEST_F(AssemblerX86_64Test, Minps) {
+  DriverStr(RepeatFF(&x86_64::X86_64Assembler::minps, "minps %{reg2}, %{reg1}"), "minps");
+}
+
+TEST_F(AssemblerX86_64Test, Maxps) {
+  DriverStr(RepeatFF(&x86_64::X86_64Assembler::maxps, "maxps %{reg2}, %{reg1}"), "maxps");
+}
+
+TEST_F(AssemblerX86_64Test, Minpd) {
+  DriverStr(RepeatFF(&x86_64::X86_64Assembler::minpd, "minpd %{reg2}, %{reg1}"), "minpd");
+}
+
+TEST_F(AssemblerX86_64Test, Maxpd) {
+  DriverStr(RepeatFF(&x86_64::X86_64Assembler::maxpd, "maxpd %{reg2}, %{reg1}"), "maxpd");
+}
+
 TEST_F(AssemblerX86_64Test, PCmpeqb) {
   DriverStr(RepeatFF(&x86_64::X86_64Assembler::pcmpeqb, "pcmpeqb %{reg2}, %{reg1}"), "pcmpeqb");
 }
diff --git a/disassembler/disassembler_x86.cc b/disassembler/disassembler_x86.cc
index e12bcec..4824f70 100644
--- a/disassembler/disassembler_x86.cc
+++ b/disassembler/disassembler_x86.cc
@@ -581,13 +581,69 @@
               load = true;
               src_reg_file = dst_reg_file = SSE;
               break;
-            case 0x39:
+            case 0x37:
               opcode1 = "pcmpgtq";
               prefix[2] = 0;
               has_modrm = true;
               load = true;
               src_reg_file = dst_reg_file = SSE;
               break;
+            case 0x38:
+              opcode1 = "pminsb";
+              prefix[2] = 0;
+              has_modrm = true;
+              load = true;
+              src_reg_file = dst_reg_file = SSE;
+              break;
+            case 0x39:
+              opcode1 = "pminsd";
+              prefix[2] = 0;
+              has_modrm = true;
+              load = true;
+              src_reg_file = dst_reg_file = SSE;
+              break;
+            case 0x3A:
+              opcode1 = "pminuw";
+              prefix[2] = 0;
+              has_modrm = true;
+              load = true;
+              src_reg_file = dst_reg_file = SSE;
+              break;
+            case 0x3B:
+              opcode1 = "pminud";
+              prefix[2] = 0;
+              has_modrm = true;
+              load = true;
+              src_reg_file = dst_reg_file = SSE;
+              break;
+            case 0x3C:
+              opcode1 = "pmaxsb";
+              prefix[2] = 0;
+              has_modrm = true;
+              load = true;
+              src_reg_file = dst_reg_file = SSE;
+              break;
+            case 0x3D:
+              opcode1 = "pmaxsd";
+              prefix[2] = 0;
+              has_modrm = true;
+              load = true;
+              src_reg_file = dst_reg_file = SSE;
+              break;
+            case 0x3E:
+              opcode1 = "pmaxuw";
+              prefix[2] = 0;
+              has_modrm = true;
+              load = true;
+              src_reg_file = dst_reg_file = SSE;
+              break;
+            case 0x3F:
+              opcode1 = "pmaxud";
+              prefix[2] = 0;
+              has_modrm = true;
+              load = true;
+              src_reg_file = dst_reg_file = SSE;
+              break;
             case 0x40:
               opcode1 = "pmulld";
               prefix[2] = 0;
@@ -1133,8 +1189,12 @@
           opcode1 = opcode_tmp.c_str();
         }
         break;
+      case 0xDA:
+      case 0xDE:
       case 0xE0:
       case 0xE3:
+      case 0xEA:
+      case 0xEE:
         if (prefix[2] == 0x66) {
           src_reg_file = dst_reg_file = SSE;
           prefix[2] = 0;  // clear prefix now it's served its purpose as part of the opcode
@@ -1142,8 +1202,12 @@
           src_reg_file = dst_reg_file = MMX;
         }
         switch (*instr) {
+          case 0xDA: opcode1 = "pminub"; break;
+          case 0xDE: opcode1 = "pmaxub"; break;
           case 0xE0: opcode1 = "pavgb"; break;
           case 0xE3: opcode1 = "pavgw"; break;
+          case 0xEA: opcode1 = "pminsw"; break;
+          case 0xEE: opcode1 = "pmaxsw"; break;
         }
         prefix[2] = 0;
         has_modrm = true;
diff --git a/test/651-checker-byte-simd-minmax/expected.txt b/test/651-checker-byte-simd-minmax/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/651-checker-byte-simd-minmax/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/651-checker-byte-simd-minmax/info.txt b/test/651-checker-byte-simd-minmax/info.txt
new file mode 100644
index 0000000..73af124
--- /dev/null
+++ b/test/651-checker-byte-simd-minmax/info.txt
@@ -0,0 +1 @@
+Functional tests on min/max SIMD vectorization.
diff --git a/test/651-checker-byte-simd-minmax/src/Main.java b/test/651-checker-byte-simd-minmax/src/Main.java
new file mode 100644
index 0000000..8211ace
--- /dev/null
+++ b/test/651-checker-byte-simd-minmax/src/Main.java
@@ -0,0 +1,94 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Tests for MIN/MAX vectorization.
+ */
+public class Main {
+
+  /// CHECK-START: void Main.doitMin(byte[], byte[], byte[]) loop_optimization (before)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:b\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:b\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Min:i\d+>>  InvokeStaticOrDirect [<<Get1>>,<<Get2>>] intrinsic:MathMinIntInt loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<Cnv:b\d+>>  TypeConversion [<<Min>>]            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
+  //
+  // TODO: narrow type vectorization.
+  /// CHECK-START: void Main.doitMin(byte[], byte[], byte[]) loop_optimization (after)
+  /// CHECK-NOT: VecMin
+  private static void doitMin(byte[] x, byte[] y, byte[] z) {
+    int min = Math.min(x.length, Math.min(y.length, z.length));
+    for (int i = 0; i < min; i++) {
+      x[i] = (byte) Math.min(y[i], z[i]);
+    }
+  }
+
+  /// CHECK-START: void Main.doitMax(byte[], byte[], byte[]) loop_optimization (before)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:b\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:b\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Max:i\d+>>  InvokeStaticOrDirect [<<Get1>>,<<Get2>>] intrinsic:MathMaxIntInt loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<Cnv:b\d+>>  TypeConversion [<<Max>>]            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
+  //
+  // TODO: narrow type vectorization.
+  /// CHECK-START: void Main.doitMax(byte[], byte[], byte[]) loop_optimization (after)
+  /// CHECK-NOT: VecMax
+  private static void doitMax(byte[] x, byte[] y, byte[] z) {
+    int min = Math.min(x.length, Math.min(y.length, z.length));
+    for (int i = 0; i < min; i++) {
+      x[i] = (byte) Math.max(y[i], z[i]);
+    }
+  }
+
+  public static void main(String[] args) {
+    // Initialize cross-values for all possible values.
+    int total = 256 * 256;
+    byte[] x = new byte[total];
+    byte[] y = new byte[total];
+    byte[] z = new byte[total];
+    int k = 0;
+    for (int i = 0; i < 256; i++) {
+      for (int j = 0; j < 256; j++) {
+        x[k] = 0;
+        y[k] = (byte) i;
+        z[k] = (byte) j;
+        k++;
+      }
+    }
+
+    // And test.
+    doitMin(x, y, z);
+    for (int i = 0; i < total; i++) {
+      byte expected = (byte) Math.min(y[i], z[i]);
+      expectEquals(expected, x[i]);
+    }
+    doitMax(x, y, z);
+    for (int i = 0; i < total; i++) {
+      byte expected = (byte) Math.max(y[i], z[i]);
+      expectEquals(expected, x[i]);
+    }
+
+    System.out.println("passed");
+  }
+
+  private static void expectEquals(byte expected, byte result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+}
diff --git a/test/651-checker-char-simd-minmax/expected.txt b/test/651-checker-char-simd-minmax/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/651-checker-char-simd-minmax/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/651-checker-char-simd-minmax/info.txt b/test/651-checker-char-simd-minmax/info.txt
new file mode 100644
index 0000000..73af124
--- /dev/null
+++ b/test/651-checker-char-simd-minmax/info.txt
@@ -0,0 +1 @@
+Functional tests on min/max SIMD vectorization.
diff --git a/test/651-checker-char-simd-minmax/src/Main.java b/test/651-checker-char-simd-minmax/src/Main.java
new file mode 100644
index 0000000..5ce7b94
--- /dev/null
+++ b/test/651-checker-char-simd-minmax/src/Main.java
@@ -0,0 +1,102 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Tests for MIN/MAX vectorization.
+ */
+public class Main {
+
+  /// CHECK-START: void Main.doitMin(char[], char[], char[]) loop_optimization (before)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:c\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:c\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Min:i\d+>>  InvokeStaticOrDirect [<<Get1>>,<<Get2>>] intrinsic:MathMinIntInt loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<Cnv:c\d+>>  TypeConversion [<<Min>>]            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
+  //
+  // TODO: narrow type vectorization.
+  /// CHECK-START: void Main.doitMin(char[], char[], char[]) loop_optimization (after)
+  /// CHECK-NOT: VecMin
+  private static void doitMin(char[] x, char[] y, char[] z) {
+    int min = Math.min(x.length, Math.min(y.length, z.length));
+    for (int i = 0; i < min; i++) {
+      x[i] = (char) Math.min(y[i], z[i]);
+    }
+  }
+
+  /// CHECK-START: void Main.doitMax(char[], char[], char[]) loop_optimization (before)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:c\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:c\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Max:i\d+>>  InvokeStaticOrDirect [<<Get1>>,<<Get2>>] intrinsic:MathMaxIntInt loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<Cnv:c\d+>>  TypeConversion [<<Max>>]            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
+  //
+  // TODO: narrow type vectorization.
+  /// CHECK-START: void Main.doitMax(char[], char[], char[]) loop_optimization (after)
+  /// CHECK-NOT: VecMax
+  private static void doitMax(char[] x, char[] y, char[] z) {
+    int min = Math.min(x.length, Math.min(y.length, z.length));
+    for (int i = 0; i < min; i++) {
+      x[i] = (char) Math.max(y[i], z[i]);
+    }
+  }
+
+  public static void main(String[] args) {
+    char[] interesting = {
+      0x0000, 0x0001, 0x007f, 0x0080, 0x0081, 0x00ff,
+      0x0100, 0x0101, 0x017f, 0x0180, 0x0181, 0x01ff,
+      0x7f00, 0x7f01, 0x7f7f, 0x7f80, 0x7f81, 0x7fff,
+      0x8000, 0x8001, 0x807f, 0x8080, 0x8081, 0x80ff,
+      0x8100, 0x8101, 0x817f, 0x8180, 0x8181, 0x81ff,
+      0xff00, 0xff01, 0xff7f, 0xff80, 0xff81, 0xffff
+    };
+    // Initialize cross-values for the interesting values.
+    int total = interesting.length * interesting.length;
+    char[] x = new char[total];
+    char[] y = new char[total];
+    char[] z = new char[total];
+    int k = 0;
+    for (int i = 0; i < interesting.length; i++) {
+      for (int j = 0; j < interesting.length; j++) {
+        x[k] = 0;
+        y[k] = interesting[i];
+        z[k] = interesting[j];
+        k++;
+      }
+    }
+
+    // And test.
+    doitMin(x, y, z);
+    for (int i = 0; i < total; i++) {
+      char expected = (char) Math.min(y[i], z[i]);
+      expectEquals(expected, x[i]);
+    }
+    doitMax(x, y, z);
+    for (int i = 0; i < total; i++) {
+      char expected = (char) Math.max(y[i], z[i]);
+      expectEquals(expected, x[i]);
+    }
+
+    System.out.println("passed");
+  }
+
+  private static void expectEquals(char expected, char result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+}
diff --git a/test/651-checker-double-simd-minmax/expected.txt b/test/651-checker-double-simd-minmax/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/651-checker-double-simd-minmax/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/651-checker-double-simd-minmax/info.txt b/test/651-checker-double-simd-minmax/info.txt
new file mode 100644
index 0000000..73af124
--- /dev/null
+++ b/test/651-checker-double-simd-minmax/info.txt
@@ -0,0 +1 @@
+Functional tests on min/max SIMD vectorization.
diff --git a/test/651-checker-double-simd-minmax/src/Main.java b/test/651-checker-double-simd-minmax/src/Main.java
new file mode 100644
index 0000000..e1711ae
--- /dev/null
+++ b/test/651-checker-double-simd-minmax/src/Main.java
@@ -0,0 +1,126 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Tests for MIN/MAX vectorization.
+ */
+public class Main {
+
+  /// CHECK-START: void Main.doitMin(double[], double[], double[]) loop_optimization (before)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Min:d\d+>>  InvokeStaticOrDirect [<<Get1>>,<<Get2>>] intrinsic:MathMinDoubleDouble loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Min>>] loop:<<Loop>>      outer_loop:none
+  //
+  // TODO x86: 0.0 vs -0.0?
+  //
+  /// CHECK-START-ARM64: void Main.doitMin(double[], double[], double[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Min:d\d+>>  VecMin [<<Get1>>,<<Get2>>]          loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Min>>] loop:<<Loop>>      outer_loop:none
+  private static void doitMin(double[] x, double[] y, double[] z) {
+    int min = Math.min(x.length, Math.min(y.length, z.length));
+    for (int i = 0; i < min; i++) {
+      x[i] = Math.min(y[i], z[i]);
+    }
+  }
+
+  /// CHECK-START: void Main.doitMax(double[], double[], double[]) loop_optimization (before)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Max:d\d+>>  InvokeStaticOrDirect [<<Get1>>,<<Get2>>] intrinsic:MathMaxDoubleDouble loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Max>>] loop:<<Loop>>      outer_loop:none
+  //
+  // TODO-x86: 0.0 vs -0.0?
+  //
+  /// CHECK-START-ARM64: void Main.doitMax(double[], double[], double[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Max:d\d+>>  VecMax [<<Get1>>,<<Get2>>]          loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Max>>] loop:<<Loop>>      outer_loop:none
+  private static void doitMax(double[] x, double[] y, double[] z) {
+    int min = Math.min(x.length, Math.min(y.length, z.length));
+    for (int i = 0; i < min; i++) {
+      x[i] = Math.max(y[i], z[i]);
+    }
+  }
+
+  public static void main(String[] args) {
+    double[] interesting = {
+      -0.0f,
+      +0.0f,
+      -1.0f,
+      +1.0f,
+      -3.14f,
+      +3.14f,
+      -100.0f,
+      +100.0f,
+      -4444.44f,
+      +4444.44f,
+      Double.MIN_NORMAL,
+      Double.MIN_VALUE,
+      Double.MAX_VALUE,
+      Double.NEGATIVE_INFINITY,
+      Double.POSITIVE_INFINITY,
+      Double.NaN
+    };
+    // Initialize cross-values for the interesting values.
+    int total = interesting.length * interesting.length;
+    double[] x = new double[total];
+    double[] y = new double[total];
+    double[] z = new double[total];
+    int k = 0;
+    for (int i = 0; i < interesting.length; i++) {
+      for (int j = 0; j < interesting.length; j++) {
+        x[k] = 0;
+        y[k] = interesting[i];
+        z[k] = interesting[j];
+        k++;
+      }
+    }
+
+    // And test.
+    doitMin(x, y, z);
+    for (int i = 0; i < total; i++) {
+      double expected = Math.min(y[i], z[i]);
+      expectEquals(expected, x[i]);
+    }
+    doitMax(x, y, z);
+    for (int i = 0; i < total; i++) {
+      double expected = Math.max(y[i], z[i]);
+      expectEquals(expected, x[i]);
+    }
+
+    System.out.println("passed");
+  }
+
+  private static void expectEquals(double expected, double result) {
+    // Tests the bits directly. This distinguishes correctly between +0.0
+    // and -0.0 and returns a canonical representation for all NaN.
+    long expected_bits = Double.doubleToLongBits(expected);
+    long result_bits = Double.doubleToLongBits(result);
+    if (expected_bits != result_bits) {
+      throw new Error("Expected: " + expected +
+          "(0x" + Long.toHexString(expected_bits) + "), found: " + result +
+          "(0x" + Long.toHexString(result_bits) + ")");
+    }
+  }
+}
diff --git a/test/651-checker-float-simd-minmax/expected.txt b/test/651-checker-float-simd-minmax/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/651-checker-float-simd-minmax/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/651-checker-float-simd-minmax/info.txt b/test/651-checker-float-simd-minmax/info.txt
new file mode 100644
index 0000000..73af124
--- /dev/null
+++ b/test/651-checker-float-simd-minmax/info.txt
@@ -0,0 +1 @@
+Functional tests on min/max SIMD vectorization.
diff --git a/test/651-checker-float-simd-minmax/src/Main.java b/test/651-checker-float-simd-minmax/src/Main.java
new file mode 100644
index 0000000..bd412e0
--- /dev/null
+++ b/test/651-checker-float-simd-minmax/src/Main.java
@@ -0,0 +1,126 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Tests for MIN/MAX vectorization.
+ */
+public class Main {
+
+  /// CHECK-START: void Main.doitMin(float[], float[], float[]) loop_optimization (before)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:f\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:f\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Min:f\d+>>  InvokeStaticOrDirect [<<Get1>>,<<Get2>>] intrinsic:MathMinFloatFloat loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Min>>] loop:<<Loop>>      outer_loop:none
+  //
+  // TODO x86: 0.0 vs -0.0?
+  //
+  /// CHECK-START-ARM64: void Main.doitMin(float[], float[], float[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Min:d\d+>>  VecMin [<<Get1>>,<<Get2>>]          loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Min>>] loop:<<Loop>>      outer_loop:none
+  private static void doitMin(float[] x, float[] y, float[] z) {
+    int min = Math.min(x.length, Math.min(y.length, z.length));
+    for (int i = 0; i < min; i++) {
+      x[i] = Math.min(y[i], z[i]);
+    }
+  }
+
+  /// CHECK-START: void Main.doitMax(float[], float[], float[]) loop_optimization (before)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:f\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:f\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Max:f\d+>>  InvokeStaticOrDirect [<<Get1>>,<<Get2>>] intrinsic:MathMaxFloatFloat loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Max>>] loop:<<Loop>>      outer_loop:none
+  //
+  // TODO x86: 0.0 vs -0.0?
+  //
+  /// CHECK-START-ARM64: void Main.doitMax(float[], float[], float[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Max:d\d+>>  VecMax [<<Get1>>,<<Get2>>]          loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Max>>] loop:<<Loop>>      outer_loop:none
+  private static void doitMax(float[] x, float[] y, float[] z) {
+    int min = Math.min(x.length, Math.min(y.length, z.length));
+    for (int i = 0; i < min; i++) {
+      x[i] = Math.max(y[i], z[i]);
+    }
+  }
+
+  public static void main(String[] args) {
+    float[] interesting = {
+      -0.0f,
+      +0.0f,
+      -1.0f,
+      +1.0f,
+      -3.14f,
+      +3.14f,
+      -100.0f,
+      +100.0f,
+      -4444.44f,
+      +4444.44f,
+      Float.MIN_NORMAL,
+      Float.MIN_VALUE,
+      Float.MAX_VALUE,
+      Float.NEGATIVE_INFINITY,
+      Float.POSITIVE_INFINITY,
+      Float.NaN
+    };
+    // Initialize cross-values for the interesting values.
+    int total = interesting.length * interesting.length;
+    float[] x = new float[total];
+    float[] y = new float[total];
+    float[] z = new float[total];
+    int k = 0;
+    for (int i = 0; i < interesting.length; i++) {
+      for (int j = 0; j < interesting.length; j++) {
+        x[k] = 0;
+        y[k] = interesting[i];
+        z[k] = interesting[j];
+        k++;
+      }
+    }
+
+    // And test.
+    doitMin(x, y, z);
+    for (int i = 0; i < total; i++) {
+      float expected = Math.min(y[i], z[i]);
+      expectEquals(expected, x[i]);
+    }
+    doitMax(x, y, z);
+    for (int i = 0; i < total; i++) {
+      float expected = Math.max(y[i], z[i]);
+      expectEquals(expected, x[i]);
+    }
+
+    System.out.println("passed");
+  }
+
+  private static void expectEquals(float expected, float result) {
+    // Tests the bits directly. This distinguishes correctly between +0.0
+    // and -0.0 and returns a canonical representation for all NaN.
+    int expected_bits = Float.floatToIntBits(expected);
+    int result_bits = Float.floatToIntBits(result);
+    if (expected_bits != result_bits) {
+      throw new Error("Expected: " + expected +
+          "(0x" + Integer.toHexString(expected_bits) + "), found: " + result +
+          "(0x" + Integer.toHexString(result_bits) + ")");
+    }
+  }
+}
diff --git a/test/651-checker-int-simd-minmax/expected.txt b/test/651-checker-int-simd-minmax/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/651-checker-int-simd-minmax/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/651-checker-int-simd-minmax/info.txt b/test/651-checker-int-simd-minmax/info.txt
new file mode 100644
index 0000000..73af124
--- /dev/null
+++ b/test/651-checker-int-simd-minmax/info.txt
@@ -0,0 +1 @@
+Functional tests on min/max SIMD vectorization.
diff --git a/test/651-checker-int-simd-minmax/src/Main.java b/test/651-checker-int-simd-minmax/src/Main.java
new file mode 100644
index 0000000..4e05a9d
--- /dev/null
+++ b/test/651-checker-int-simd-minmax/src/Main.java
@@ -0,0 +1,106 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Tests for MIN/MAX vectorization.
+ */
+public class Main {
+
+  /// CHECK-START: void Main.doitMin(int[], int[], int[]) loop_optimization (before)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:i\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:i\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Min:i\d+>>  InvokeStaticOrDirect [<<Get1>>,<<Get2>>] intrinsic:MathMinIntInt loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Min>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.doitMin(int[], int[], int[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Min:d\d+>>  VecMin [<<Get1>>,<<Get2>>]          loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Min>>] loop:<<Loop>>      outer_loop:none
+  private static void doitMin(int[] x, int[] y, int[] z) {
+    int min = Math.min(x.length, Math.min(y.length, z.length));
+    for (int i = 0; i < min; i++) {
+      x[i] = Math.min(y[i], z[i]);
+    }
+  }
+
+  /// CHECK-START: void Main.doitMax(int[], int[], int[]) loop_optimization (before)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:i\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:i\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Max:i\d+>>  InvokeStaticOrDirect [<<Get1>>,<<Get2>>] intrinsic:MathMaxIntInt loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Max>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.doitMax(int[], int[], int[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Max:d\d+>>  VecMax [<<Get1>>,<<Get2>>]          loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Max>>] loop:<<Loop>>      outer_loop:none
+  private static void doitMax(int[] x, int[] y, int[] z) {
+    int min = Math.min(x.length, Math.min(y.length, z.length));
+    for (int i = 0; i < min; i++) {
+      x[i] = Math.max(y[i], z[i]);
+    }
+  }
+
+  public static void main(String[] args) {
+    int[] interesting = {
+      0x00000000, 0x00000001, 0x00007fff, 0x00008000, 0x00008001, 0x0000ffff,
+      0x00010000, 0x00010001, 0x00017fff, 0x00018000, 0x00018001, 0x0001ffff,
+      0x7fff0000, 0x7fff0001, 0x7fff7fff, 0x7fff8000, 0x7fff8001, 0x7fffffff,
+      0x80000000, 0x80000001, 0x80007fff, 0x80008000, 0x80008001, 0x8000ffff,
+      0x80010000, 0x80010001, 0x80017fff, 0x80018000, 0x80018001, 0x8001ffff,
+      0xffff0000, 0xffff0001, 0xffff7fff, 0xffff8000, 0xffff8001, 0xffffffff
+    };
+    // Initialize cross-values for the interesting values.
+    int total = interesting.length * interesting.length;
+    int[] x = new int[total];
+    int[] y = new int[total];
+    int[] z = new int[total];
+    int k = 0;
+    for (int i = 0; i < interesting.length; i++) {
+      for (int j = 0; j < interesting.length; j++) {
+        x[k] = 0;
+        y[k] = interesting[i];
+        z[k] = interesting[j];
+        k++;
+      }
+    }
+
+    // And test.
+    doitMin(x, y, z);
+    for (int i = 0; i < total; i++) {
+      int expected = Math.min(y[i], z[i]);
+      expectEquals(expected, x[i]);
+    }
+    doitMax(x, y, z);
+    for (int i = 0; i < total; i++) {
+      int expected = Math.max(y[i], z[i]);
+      expectEquals(expected, x[i]);
+    }
+
+    System.out.println("passed");
+  }
+
+  private static void expectEquals(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+}
diff --git a/test/651-checker-long-simd-minmax/expected.txt b/test/651-checker-long-simd-minmax/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/651-checker-long-simd-minmax/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/651-checker-long-simd-minmax/info.txt b/test/651-checker-long-simd-minmax/info.txt
new file mode 100644
index 0000000..73af124
--- /dev/null
+++ b/test/651-checker-long-simd-minmax/info.txt
@@ -0,0 +1 @@
+Functional tests on min/max SIMD vectorization.
diff --git a/test/651-checker-long-simd-minmax/src/Main.java b/test/651-checker-long-simd-minmax/src/Main.java
new file mode 100644
index 0000000..51cf67e
--- /dev/null
+++ b/test/651-checker-long-simd-minmax/src/Main.java
@@ -0,0 +1,108 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Tests for MIN/MAX vectorization.
+ */
+public class Main {
+
+  /// CHECK-START: void Main.doitMin(long[], long[], long[]) loop_optimization (before)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:j\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:j\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Min:j\d+>>  InvokeStaticOrDirect [<<Get1>>,<<Get2>>] intrinsic:MathMinLongLong loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Min>>] loop:<<Loop>>      outer_loop:none
+  //
+  // Not directly supported for longs.
+  //
+  /// CHECK-START: void Main.doitMin(long[], long[], long[]) loop_optimization (after)
+  /// CHECK-NOT: VecMin
+  private static void doitMin(long[] x, long[] y, long[] z) {
+    int min = Math.min(x.length, Math.min(y.length, z.length));
+    for (int i = 0; i < min; i++) {
+      x[i] = Math.min(y[i], z[i]);
+    }
+  }
+
+  /// CHECK-START: void Main.doitMax(long[], long[], long[]) loop_optimization (before)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:j\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:j\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Max:j\d+>>  InvokeStaticOrDirect [<<Get1>>,<<Get2>>] intrinsic:MathMaxLongLong loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Max>>] loop:<<Loop>>      outer_loop:none
+  //
+  // Not directly supported for longs.
+  //
+  /// CHECK-START: void Main.doitMax(long[], long[], long[]) loop_optimization (after)
+  /// CHECK-NOT: VecMax
+  private static void doitMax(long[] x, long[] y, long[] z) {
+    int min = Math.min(x.length, Math.min(y.length, z.length));
+    for (int i = 0; i < min; i++) {
+      x[i] = Math.max(y[i], z[i]);
+    }
+  }
+
+  public static void main(String[] args) {
+    long[] interesting = {
+      0x0000000000000000L, 0x0000000000000001L, 0x000000007fffffffL,
+      0x0000000080000000L, 0x0000000080000001L, 0x00000000ffffffffL,
+      0x0000000100000000L, 0x0000000100000001L, 0x000000017fffffffL,
+      0x0000000180000000L, 0x0000000180000001L, 0x00000001ffffffffL,
+      0x7fffffff00000000L, 0x7fffffff00000001L, 0x7fffffff7fffffffL,
+      0x7fffffff80000000L, 0x7fffffff80000001L, 0x7fffffffffffffffL,
+      0x8000000000000000L, 0x8000000000000001L, 0x800000007fffffffL,
+      0x8000000080000000L, 0x8000000080000001L, 0x80000000ffffffffL,
+      0x8000000100000000L, 0x8000000100000001L, 0x800000017fffffffL,
+      0x8000000180000000L, 0x8000000180000001L, 0x80000001ffffffffL,
+      0xffffffff00000000L, 0xffffffff00000001L, 0xffffffff7fffffffL,
+      0xffffffff80000000L, 0xffffffff80000001L, 0xffffffffffffffffL
+    };
+    // Initialize cross-values for the interesting values.
+    int total = interesting.length * interesting.length;
+    long[] x = new long[total];
+    long[] y = new long[total];
+    long[] z = new long[total];
+    int k = 0;
+    for (int i = 0; i < interesting.length; i++) {
+      for (int j = 0; j < interesting.length; j++) {
+        x[k] = 0;
+        y[k] = interesting[i];
+        z[k] = interesting[j];
+        k++;
+      }
+    }
+
+    // And test.
+    doitMin(x, y, z);
+    for (int i = 0; i < total; i++) {
+      long expected = Math.min(y[i], z[i]);
+      expectEquals(expected, x[i]);
+    }
+    doitMax(x, y, z);
+    for (int i = 0; i < total; i++) {
+      long expected = Math.max(y[i], z[i]);
+      expectEquals(expected, x[i]);
+    }
+
+    System.out.println("passed");
+  }
+
+  private static void expectEquals(long expected, long result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+}
diff --git a/test/651-checker-short-simd-minmax/expected.txt b/test/651-checker-short-simd-minmax/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/651-checker-short-simd-minmax/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/651-checker-short-simd-minmax/info.txt b/test/651-checker-short-simd-minmax/info.txt
new file mode 100644
index 0000000..73af124
--- /dev/null
+++ b/test/651-checker-short-simd-minmax/info.txt
@@ -0,0 +1 @@
+Functional tests on min/max SIMD vectorization.
diff --git a/test/651-checker-short-simd-minmax/src/Main.java b/test/651-checker-short-simd-minmax/src/Main.java
new file mode 100644
index 0000000..f34f526
--- /dev/null
+++ b/test/651-checker-short-simd-minmax/src/Main.java
@@ -0,0 +1,108 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Tests for MIN/MAX vectorization.
+ */
+public class Main {
+
+  /// CHECK-START: void Main.doitMin(short[], short[], short[]) loop_optimization (before)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:s\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:s\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Min:i\d+>>  InvokeStaticOrDirect [<<Get1>>,<<Get2>>] intrinsic:MathMinIntInt loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<Cnv:s\d+>>  TypeConversion [<<Min>>]            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
+  //
+  // TODO: narrow type vectorization.
+  /// CHECK-START: void Main.doitMin(short[], short[], short[]) loop_optimization (after)
+  /// CHECK-NOT: VecMin
+  private static void doitMin(short[] x, short[] y, short[] z) {
+    int min = Math.min(x.length, Math.min(y.length, z.length));
+    for (int i = 0; i < min; i++) {
+      x[i] = (short) Math.min(y[i], z[i]);
+    }
+  }
+
+  /// CHECK-START: void Main.doitMax(short[], short[], short[]) loop_optimization (before)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:s\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:s\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Max:i\d+>>  InvokeStaticOrDirect [<<Get1>>,<<Get2>>] intrinsic:MathMaxIntInt loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<Cnv:s\d+>>  TypeConversion [<<Max>>]            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
+  //
+  // TODO: narrow type vectorization.
+  /// CHECK-START: void Main.doitMax(short[], short[], short[]) loop_optimization (after)
+  /// CHECK-NOT: VecMax
+  private static void doitMax(short[] x, short[] y, short[] z) {
+    int min = Math.min(x.length, Math.min(y.length, z.length));
+    for (int i = 0; i < min; i++) {
+      x[i] = (short) Math.max(y[i], z[i]);
+    }
+  }
+
+  public static void main(String[] args) {
+    short[] interesting = {
+      (short) 0x0000, (short) 0x0001, (short) 0x007f,
+      (short) 0x0080, (short) 0x0081, (short) 0x00ff,
+      (short) 0x0100, (short) 0x0101, (short) 0x017f,
+      (short) 0x0180, (short) 0x0181, (short) 0x01ff,
+      (short) 0x7f00, (short) 0x7f01, (short) 0x7f7f,
+      (short) 0x7f80, (short) 0x7f81, (short) 0x7fff,
+      (short) 0x8000, (short) 0x8001, (short) 0x807f,
+      (short) 0x8080, (short) 0x8081, (short) 0x80ff,
+      (short) 0x8100, (short) 0x8101, (short) 0x817f,
+      (short) 0x8180, (short) 0x8181, (short) 0x81ff,
+      (short) 0xff00, (short) 0xff01, (short) 0xff7f,
+      (short) 0xff80, (short) 0xff81, (short) 0xffff
+    };
+    // Initialize cross-values for the interesting values.
+    int total = interesting.length * interesting.length;
+    short[] x = new short[total];
+    short[] y = new short[total];
+    short[] z = new short[total];
+    int k = 0;
+    for (int i = 0; i < interesting.length; i++) {
+      for (int j = 0; j < interesting.length; j++) {
+        x[k] = 0;
+        y[k] = interesting[i];
+        z[k] = interesting[j];
+        k++;
+      }
+    }
+
+    // And test.
+    doitMin(x, y, z);
+    for (int i = 0; i < total; i++) {
+      short expected = (short) Math.min(y[i], z[i]);
+      expectEquals(expected, x[i]);
+    }
+    doitMax(x, y, z);
+    for (int i = 0; i < total; i++) {
+      short expected = (short) Math.max(y[i], z[i]);
+      expectEquals(expected, x[i]);
+    }
+
+    System.out.println("passed");
+  }
+
+  private static void expectEquals(short expected, short result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+}