Compiler changes for bitstring based type checks.

We guard the use of this feature with a compile-time flag,
set to true in this CL.

Boot image size for aosp_taimen-userdebug in AOSP master:
  - before:
    arm boot*.oat: 63604740
    arm64 boot*.oat: 74237864
  - after:
    arm boot*.oat: 63531172 (-72KiB, -0.1%)
    arm64 boot*.oat: 74135008 (-100KiB, -0.1%)

The new TypeCheckBenchmark yields the following changes
using the little cores of taimen fixed at 1.4016GHz:
                               32-bit        64-bit
  timeCheckCastLevel1ToLevel1  11.48->15.80 11.47->15.78
  timeCheckCastLevel2ToLevel1  15.08->15.79 15.08->15.79
  timeCheckCastLevel3ToLevel1  19.01->15.82 17.94->15.81
  timeCheckCastLevel9ToLevel1  42.55->15.79 42.63->15.81
  timeCheckCastLevel9ToLevel2  39.70->14.36 39.70->14.35
  timeInstanceOfLevel1ToLevel1 13.74->17.93 13.76->17.95
  timeInstanceOfLevel2ToLevel1 17.02->17.95 16.99->17.93
  timeInstanceOfLevel3ToLevel1 24.03->17.95 24.45->17.95
  timeInstanceOfLevel9ToLevel1 47.13->17.95 47.14->18.00
  timeInstanceOfLevel9ToLevel2 44.19->16.52 44.27->16.51
This suggests that the bitstring typecheck should not be
used for exact type checks which would be equivalent to the
"Level1ToLevel1" benchmark. Whether the implementation is
a beneficial replacement for the kClassHierarchyCheck and
kAbstractClassCheck on average depends on how many levels
from the target class (or Object for a negative result) is
a typical object's class.

Test: m test-art-host-gtest
Test: testrunner.py --host --optimizing --jit
Test: testrunner.py --host -t 670-bitstring-type-check
Test: Pixel 2 XL boots.
Test: testrunner.py --target --optimizing --jit
Test: testrunner.py --target -t 670-bitstring-type-check
Bug: 64692057
Bug: 71853552
Bug: 26687569
Change-Id: I538d7e036b5a8ae2cc3fe77662a5903d74854562
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index cbe9e0a..e85f900 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -6234,6 +6234,27 @@
   // No need for memory fence, thanks to the X86 memory model.
 }
 
+void InstructionCodeGeneratorX86::GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check,
+                                                                    Register temp) {
+  uint32_t path_to_root = check->GetBitstringPathToRoot();
+  uint32_t mask = check->GetBitstringMask();
+  DCHECK(IsPowerOfTwo(mask + 1));
+  size_t mask_bits = WhichPowerOf2(mask + 1);
+
+  if ((false) && mask_bits == 16u) {
+    // FIXME: cmpw() erroneously emits the constant as 32 bits instead of 16 bits. b/71853552
+    // Compare the bitstring in memory.
+    __ cmpw(Address(temp, mirror::Class::StatusOffset()), Immediate(path_to_root));
+  } else {
+    // /* uint32_t */ temp = temp->status_
+    __ movl(temp, Address(temp, mirror::Class::StatusOffset()));
+    // Compare the bitstring bits using SUB.
+    __ subl(temp, Immediate(path_to_root));
+    // Shift out bits that do not contribute to the comparison.
+    __ shll(temp, Immediate(32u - mask_bits));
+  }
+}
+
 HLoadString::LoadKind CodeGeneratorX86::GetSupportedLoadStringKind(
     HLoadString::LoadKind desired_string_load_kind) {
   switch (desired_string_load_kind) {
@@ -6426,6 +6447,8 @@
     case TypeCheckKind::kInterfaceCheck:
       call_kind = LocationSummary::kCallOnSlowPath;
       break;
+    case TypeCheckKind::kBitstringCheck:
+      break;
   }
 
   LocationSummary* locations =
@@ -6434,7 +6457,13 @@
     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
   }
   locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::Any());
+  if (type_check_kind == TypeCheckKind::kBitstringCheck) {
+    locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
+    locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
+    locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
+  } else {
+    locations->SetInAt(1, Location::Any());
+  }
   // Note that TypeCheckSlowPathX86 uses this "out" register too.
   locations->SetOut(Location::RequiresRegister());
   // When read barriers are enabled, we need a temporary register for some cases.
@@ -6655,6 +6684,21 @@
       }
       break;
     }
+
+    case TypeCheckKind::kBitstringCheck: {
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        out_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        kWithoutReadBarrier);
+
+      GenerateBitstringTypeCheckCompare(instruction, out);
+      __ j(kNotEqual, &zero);
+      __ movl(out, Immediate(1));
+      __ jmp(&done);
+      break;
+    }
   }
 
   if (zero.IsLinked()) {
@@ -6681,6 +6725,10 @@
     // Require a register for the interface check since there is a loop that compares the class to
     // a memory address.
     locations->SetInAt(1, Location::RequiresRegister());
+  } else if (type_check_kind == TypeCheckKind::kBitstringCheck) {
+    locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
+    locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
+    locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
   } else {
     locations->SetInAt(1, Location::Any());
   }
@@ -6900,6 +6948,19 @@
       __ MaybeUnpoisonHeapReference(cls.AsRegister<Register>());
       break;
     }
+
+    case TypeCheckKind::kBitstringCheck: {
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        temp_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        kWithoutReadBarrier);
+
+      GenerateBitstringTypeCheckCompare(instruction, temp);
+      __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
+      break;
+    }
   }
   __ Bind(&done);