Merge "ART: Fix typo in IsCallerSave"
diff --git a/benchmark/const-class/info.txt b/benchmark/const-class/info.txt
new file mode 100644
index 0000000..ed0b827
--- /dev/null
+++ b/benchmark/const-class/info.txt
@@ -0,0 +1 @@
+Benchmarks for repeating const-class instructions in a loop.
diff --git a/benchmark/const-class/src/ConstClassBenchmark.java b/benchmark/const-class/src/ConstClassBenchmark.java
new file mode 100644
index 0000000..d45b49f
--- /dev/null
+++ b/benchmark/const-class/src/ConstClassBenchmark.java
@@ -0,0 +1,1071 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class ConstClassBenchmark {
+    // Define 1025 classes with consecutive type indexes in the dex file.
+    // The tests below rely on the knowledge that ART uses the low 10 bits
+    // of the type index as the hash into DexCache types array.
+    // Note: n == n + 1024 (mod 2^10), n + 1 != n + 1023 (mod 2^10).
+    public static class TestClass_0000 {}
+    public static class TestClass_0001 {}
+    public static class TestClass_0002 {}
+    public static class TestClass_0003 {}
+    public static class TestClass_0004 {}
+    public static class TestClass_0005 {}
+    public static class TestClass_0006 {}
+    public static class TestClass_0007 {}
+    public static class TestClass_0008 {}
+    public static class TestClass_0009 {}
+    public static class TestClass_0010 {}
+    public static class TestClass_0011 {}
+    public static class TestClass_0012 {}
+    public static class TestClass_0013 {}
+    public static class TestClass_0014 {}
+    public static class TestClass_0015 {}
+    public static class TestClass_0016 {}
+    public static class TestClass_0017 {}
+    public static class TestClass_0018 {}
+    public static class TestClass_0019 {}
+    public static class TestClass_0020 {}
+    public static class TestClass_0021 {}
+    public static class TestClass_0022 {}
+    public static class TestClass_0023 {}
+    public static class TestClass_0024 {}
+    public static class TestClass_0025 {}
+    public static class TestClass_0026 {}
+    public static class TestClass_0027 {}
+    public static class TestClass_0028 {}
+    public static class TestClass_0029 {}
+    public static class TestClass_0030 {}
+    public static class TestClass_0031 {}
+    public static class TestClass_0032 {}
+    public static class TestClass_0033 {}
+    public static class TestClass_0034 {}
+    public static class TestClass_0035 {}
+    public static class TestClass_0036 {}
+    public static class TestClass_0037 {}
+    public static class TestClass_0038 {}
+    public static class TestClass_0039 {}
+    public static class TestClass_0040 {}
+    public static class TestClass_0041 {}
+    public static class TestClass_0042 {}
+    public static class TestClass_0043 {}
+    public static class TestClass_0044 {}
+    public static class TestClass_0045 {}
+    public static class TestClass_0046 {}
+    public static class TestClass_0047 {}
+    public static class TestClass_0048 {}
+    public static class TestClass_0049 {}
+    public static class TestClass_0050 {}
+    public static class TestClass_0051 {}
+    public static class TestClass_0052 {}
+    public static class TestClass_0053 {}
+    public static class TestClass_0054 {}
+    public static class TestClass_0055 {}
+    public static class TestClass_0056 {}
+    public static class TestClass_0057 {}
+    public static class TestClass_0058 {}
+    public static class TestClass_0059 {}
+    public static class TestClass_0060 {}
+    public static class TestClass_0061 {}
+    public static class TestClass_0062 {}
+    public static class TestClass_0063 {}
+    public static class TestClass_0064 {}
+    public static class TestClass_0065 {}
+    public static class TestClass_0066 {}
+    public static class TestClass_0067 {}
+    public static class TestClass_0068 {}
+    public static class TestClass_0069 {}
+    public static class TestClass_0070 {}
+    public static class TestClass_0071 {}
+    public static class TestClass_0072 {}
+    public static class TestClass_0073 {}
+    public static class TestClass_0074 {}
+    public static class TestClass_0075 {}
+    public static class TestClass_0076 {}
+    public static class TestClass_0077 {}
+    public static class TestClass_0078 {}
+    public static class TestClass_0079 {}
+    public static class TestClass_0080 {}
+    public static class TestClass_0081 {}
+    public static class TestClass_0082 {}
+    public static class TestClass_0083 {}
+    public static class TestClass_0084 {}
+    public static class TestClass_0085 {}
+    public static class TestClass_0086 {}
+    public static class TestClass_0087 {}
+    public static class TestClass_0088 {}
+    public static class TestClass_0089 {}
+    public static class TestClass_0090 {}
+    public static class TestClass_0091 {}
+    public static class TestClass_0092 {}
+    public static class TestClass_0093 {}
+    public static class TestClass_0094 {}
+    public static class TestClass_0095 {}
+    public static class TestClass_0096 {}
+    public static class TestClass_0097 {}
+    public static class TestClass_0098 {}
+    public static class TestClass_0099 {}
+    public static class TestClass_0100 {}
+    public static class TestClass_0101 {}
+    public static class TestClass_0102 {}
+    public static class TestClass_0103 {}
+    public static class TestClass_0104 {}
+    public static class TestClass_0105 {}
+    public static class TestClass_0106 {}
+    public static class TestClass_0107 {}
+    public static class TestClass_0108 {}
+    public static class TestClass_0109 {}
+    public static class TestClass_0110 {}
+    public static class TestClass_0111 {}
+    public static class TestClass_0112 {}
+    public static class TestClass_0113 {}
+    public static class TestClass_0114 {}
+    public static class TestClass_0115 {}
+    public static class TestClass_0116 {}
+    public static class TestClass_0117 {}
+    public static class TestClass_0118 {}
+    public static class TestClass_0119 {}
+    public static class TestClass_0120 {}
+    public static class TestClass_0121 {}
+    public static class TestClass_0122 {}
+    public static class TestClass_0123 {}
+    public static class TestClass_0124 {}
+    public static class TestClass_0125 {}
+    public static class TestClass_0126 {}
+    public static class TestClass_0127 {}
+    public static class TestClass_0128 {}
+    public static class TestClass_0129 {}
+    public static class TestClass_0130 {}
+    public static class TestClass_0131 {}
+    public static class TestClass_0132 {}
+    public static class TestClass_0133 {}
+    public static class TestClass_0134 {}
+    public static class TestClass_0135 {}
+    public static class TestClass_0136 {}
+    public static class TestClass_0137 {}
+    public static class TestClass_0138 {}
+    public static class TestClass_0139 {}
+    public static class TestClass_0140 {}
+    public static class TestClass_0141 {}
+    public static class TestClass_0142 {}
+    public static class TestClass_0143 {}
+    public static class TestClass_0144 {}
+    public static class TestClass_0145 {}
+    public static class TestClass_0146 {}
+    public static class TestClass_0147 {}
+    public static class TestClass_0148 {}
+    public static class TestClass_0149 {}
+    public static class TestClass_0150 {}
+    public static class TestClass_0151 {}
+    public static class TestClass_0152 {}
+    public static class TestClass_0153 {}
+    public static class TestClass_0154 {}
+    public static class TestClass_0155 {}
+    public static class TestClass_0156 {}
+    public static class TestClass_0157 {}
+    public static class TestClass_0158 {}
+    public static class TestClass_0159 {}
+    public static class TestClass_0160 {}
+    public static class TestClass_0161 {}
+    public static class TestClass_0162 {}
+    public static class TestClass_0163 {}
+    public static class TestClass_0164 {}
+    public static class TestClass_0165 {}
+    public static class TestClass_0166 {}
+    public static class TestClass_0167 {}
+    public static class TestClass_0168 {}
+    public static class TestClass_0169 {}
+    public static class TestClass_0170 {}
+    public static class TestClass_0171 {}
+    public static class TestClass_0172 {}
+    public static class TestClass_0173 {}
+    public static class TestClass_0174 {}
+    public static class TestClass_0175 {}
+    public static class TestClass_0176 {}
+    public static class TestClass_0177 {}
+    public static class TestClass_0178 {}
+    public static class TestClass_0179 {}
+    public static class TestClass_0180 {}
+    public static class TestClass_0181 {}
+    public static class TestClass_0182 {}
+    public static class TestClass_0183 {}
+    public static class TestClass_0184 {}
+    public static class TestClass_0185 {}
+    public static class TestClass_0186 {}
+    public static class TestClass_0187 {}
+    public static class TestClass_0188 {}
+    public static class TestClass_0189 {}
+    public static class TestClass_0190 {}
+    public static class TestClass_0191 {}
+    public static class TestClass_0192 {}
+    public static class TestClass_0193 {}
+    public static class TestClass_0194 {}
+    public static class TestClass_0195 {}
+    public static class TestClass_0196 {}
+    public static class TestClass_0197 {}
+    public static class TestClass_0198 {}
+    public static class TestClass_0199 {}
+    public static class TestClass_0200 {}
+    public static class TestClass_0201 {}
+    public static class TestClass_0202 {}
+    public static class TestClass_0203 {}
+    public static class TestClass_0204 {}
+    public static class TestClass_0205 {}
+    public static class TestClass_0206 {}
+    public static class TestClass_0207 {}
+    public static class TestClass_0208 {}
+    public static class TestClass_0209 {}
+    public static class TestClass_0210 {}
+    public static class TestClass_0211 {}
+    public static class TestClass_0212 {}
+    public static class TestClass_0213 {}
+    public static class TestClass_0214 {}
+    public static class TestClass_0215 {}
+    public static class TestClass_0216 {}
+    public static class TestClass_0217 {}
+    public static class TestClass_0218 {}
+    public static class TestClass_0219 {}
+    public static class TestClass_0220 {}
+    public static class TestClass_0221 {}
+    public static class TestClass_0222 {}
+    public static class TestClass_0223 {}
+    public static class TestClass_0224 {}
+    public static class TestClass_0225 {}
+    public static class TestClass_0226 {}
+    public static class TestClass_0227 {}
+    public static class TestClass_0228 {}
+    public static class TestClass_0229 {}
+    public static class TestClass_0230 {}
+    public static class TestClass_0231 {}
+    public static class TestClass_0232 {}
+    public static class TestClass_0233 {}
+    public static class TestClass_0234 {}
+    public static class TestClass_0235 {}
+    public static class TestClass_0236 {}
+    public static class TestClass_0237 {}
+    public static class TestClass_0238 {}
+    public static class TestClass_0239 {}
+    public static class TestClass_0240 {}
+    public static class TestClass_0241 {}
+    public static class TestClass_0242 {}
+    public static class TestClass_0243 {}
+    public static class TestClass_0244 {}
+    public static class TestClass_0245 {}
+    public static class TestClass_0246 {}
+    public static class TestClass_0247 {}
+    public static class TestClass_0248 {}
+    public static class TestClass_0249 {}
+    public static class TestClass_0250 {}
+    public static class TestClass_0251 {}
+    public static class TestClass_0252 {}
+    public static class TestClass_0253 {}
+    public static class TestClass_0254 {}
+    public static class TestClass_0255 {}
+    public static class TestClass_0256 {}
+    public static class TestClass_0257 {}
+    public static class TestClass_0258 {}
+    public static class TestClass_0259 {}
+    public static class TestClass_0260 {}
+    public static class TestClass_0261 {}
+    public static class TestClass_0262 {}
+    public static class TestClass_0263 {}
+    public static class TestClass_0264 {}
+    public static class TestClass_0265 {}
+    public static class TestClass_0266 {}
+    public static class TestClass_0267 {}
+    public static class TestClass_0268 {}
+    public static class TestClass_0269 {}
+    public static class TestClass_0270 {}
+    public static class TestClass_0271 {}
+    public static class TestClass_0272 {}
+    public static class TestClass_0273 {}
+    public static class TestClass_0274 {}
+    public static class TestClass_0275 {}
+    public static class TestClass_0276 {}
+    public static class TestClass_0277 {}
+    public static class TestClass_0278 {}
+    public static class TestClass_0279 {}
+    public static class TestClass_0280 {}
+    public static class TestClass_0281 {}
+    public static class TestClass_0282 {}
+    public static class TestClass_0283 {}
+    public static class TestClass_0284 {}
+    public static class TestClass_0285 {}
+    public static class TestClass_0286 {}
+    public static class TestClass_0287 {}
+    public static class TestClass_0288 {}
+    public static class TestClass_0289 {}
+    public static class TestClass_0290 {}
+    public static class TestClass_0291 {}
+    public static class TestClass_0292 {}
+    public static class TestClass_0293 {}
+    public static class TestClass_0294 {}
+    public static class TestClass_0295 {}
+    public static class TestClass_0296 {}
+    public static class TestClass_0297 {}
+    public static class TestClass_0298 {}
+    public static class TestClass_0299 {}
+    public static class TestClass_0300 {}
+    public static class TestClass_0301 {}
+    public static class TestClass_0302 {}
+    public static class TestClass_0303 {}
+    public static class TestClass_0304 {}
+    public static class TestClass_0305 {}
+    public static class TestClass_0306 {}
+    public static class TestClass_0307 {}
+    public static class TestClass_0308 {}
+    public static class TestClass_0309 {}
+    public static class TestClass_0310 {}
+    public static class TestClass_0311 {}
+    public static class TestClass_0312 {}
+    public static class TestClass_0313 {}
+    public static class TestClass_0314 {}
+    public static class TestClass_0315 {}
+    public static class TestClass_0316 {}
+    public static class TestClass_0317 {}
+    public static class TestClass_0318 {}
+    public static class TestClass_0319 {}
+    public static class TestClass_0320 {}
+    public static class TestClass_0321 {}
+    public static class TestClass_0322 {}
+    public static class TestClass_0323 {}
+    public static class TestClass_0324 {}
+    public static class TestClass_0325 {}
+    public static class TestClass_0326 {}
+    public static class TestClass_0327 {}
+    public static class TestClass_0328 {}
+    public static class TestClass_0329 {}
+    public static class TestClass_0330 {}
+    public static class TestClass_0331 {}
+    public static class TestClass_0332 {}
+    public static class TestClass_0333 {}
+    public static class TestClass_0334 {}
+    public static class TestClass_0335 {}
+    public static class TestClass_0336 {}
+    public static class TestClass_0337 {}
+    public static class TestClass_0338 {}
+    public static class TestClass_0339 {}
+    public static class TestClass_0340 {}
+    public static class TestClass_0341 {}
+    public static class TestClass_0342 {}
+    public static class TestClass_0343 {}
+    public static class TestClass_0344 {}
+    public static class TestClass_0345 {}
+    public static class TestClass_0346 {}
+    public static class TestClass_0347 {}
+    public static class TestClass_0348 {}
+    public static class TestClass_0349 {}
+    public static class TestClass_0350 {}
+    public static class TestClass_0351 {}
+    public static class TestClass_0352 {}
+    public static class TestClass_0353 {}
+    public static class TestClass_0354 {}
+    public static class TestClass_0355 {}
+    public static class TestClass_0356 {}
+    public static class TestClass_0357 {}
+    public static class TestClass_0358 {}
+    public static class TestClass_0359 {}
+    public static class TestClass_0360 {}
+    public static class TestClass_0361 {}
+    public static class TestClass_0362 {}
+    public static class TestClass_0363 {}
+    public static class TestClass_0364 {}
+    public static class TestClass_0365 {}
+    public static class TestClass_0366 {}
+    public static class TestClass_0367 {}
+    public static class TestClass_0368 {}
+    public static class TestClass_0369 {}
+    public static class TestClass_0370 {}
+    public static class TestClass_0371 {}
+    public static class TestClass_0372 {}
+    public static class TestClass_0373 {}
+    public static class TestClass_0374 {}
+    public static class TestClass_0375 {}
+    public static class TestClass_0376 {}
+    public static class TestClass_0377 {}
+    public static class TestClass_0378 {}
+    public static class TestClass_0379 {}
+    public static class TestClass_0380 {}
+    public static class TestClass_0381 {}
+    public static class TestClass_0382 {}
+    public static class TestClass_0383 {}
+    public static class TestClass_0384 {}
+    public static class TestClass_0385 {}
+    public static class TestClass_0386 {}
+    public static class TestClass_0387 {}
+    public static class TestClass_0388 {}
+    public static class TestClass_0389 {}
+    public static class TestClass_0390 {}
+    public static class TestClass_0391 {}
+    public static class TestClass_0392 {}
+    public static class TestClass_0393 {}
+    public static class TestClass_0394 {}
+    public static class TestClass_0395 {}
+    public static class TestClass_0396 {}
+    public static class TestClass_0397 {}
+    public static class TestClass_0398 {}
+    public static class TestClass_0399 {}
+    public static class TestClass_0400 {}
+    public static class TestClass_0401 {}
+    public static class TestClass_0402 {}
+    public static class TestClass_0403 {}
+    public static class TestClass_0404 {}
+    public static class TestClass_0405 {}
+    public static class TestClass_0406 {}
+    public static class TestClass_0407 {}
+    public static class TestClass_0408 {}
+    public static class TestClass_0409 {}
+    public static class TestClass_0410 {}
+    public static class TestClass_0411 {}
+    public static class TestClass_0412 {}
+    public static class TestClass_0413 {}
+    public static class TestClass_0414 {}
+    public static class TestClass_0415 {}
+    public static class TestClass_0416 {}
+    public static class TestClass_0417 {}
+    public static class TestClass_0418 {}
+    public static class TestClass_0419 {}
+    public static class TestClass_0420 {}
+    public static class TestClass_0421 {}
+    public static class TestClass_0422 {}
+    public static class TestClass_0423 {}
+    public static class TestClass_0424 {}
+    public static class TestClass_0425 {}
+    public static class TestClass_0426 {}
+    public static class TestClass_0427 {}
+    public static class TestClass_0428 {}
+    public static class TestClass_0429 {}
+    public static class TestClass_0430 {}
+    public static class TestClass_0431 {}
+    public static class TestClass_0432 {}
+    public static class TestClass_0433 {}
+    public static class TestClass_0434 {}
+    public static class TestClass_0435 {}
+    public static class TestClass_0436 {}
+    public static class TestClass_0437 {}
+    public static class TestClass_0438 {}
+    public static class TestClass_0439 {}
+    public static class TestClass_0440 {}
+    public static class TestClass_0441 {}
+    public static class TestClass_0442 {}
+    public static class TestClass_0443 {}
+    public static class TestClass_0444 {}
+    public static class TestClass_0445 {}
+    public static class TestClass_0446 {}
+    public static class TestClass_0447 {}
+    public static class TestClass_0448 {}
+    public static class TestClass_0449 {}
+    public static class TestClass_0450 {}
+    public static class TestClass_0451 {}
+    public static class TestClass_0452 {}
+    public static class TestClass_0453 {}
+    public static class TestClass_0454 {}
+    public static class TestClass_0455 {}
+    public static class TestClass_0456 {}
+    public static class TestClass_0457 {}
+    public static class TestClass_0458 {}
+    public static class TestClass_0459 {}
+    public static class TestClass_0460 {}
+    public static class TestClass_0461 {}
+    public static class TestClass_0462 {}
+    public static class TestClass_0463 {}
+    public static class TestClass_0464 {}
+    public static class TestClass_0465 {}
+    public static class TestClass_0466 {}
+    public static class TestClass_0467 {}
+    public static class TestClass_0468 {}
+    public static class TestClass_0469 {}
+    public static class TestClass_0470 {}
+    public static class TestClass_0471 {}
+    public static class TestClass_0472 {}
+    public static class TestClass_0473 {}
+    public static class TestClass_0474 {}
+    public static class TestClass_0475 {}
+    public static class TestClass_0476 {}
+    public static class TestClass_0477 {}
+    public static class TestClass_0478 {}
+    public static class TestClass_0479 {}
+    public static class TestClass_0480 {}
+    public static class TestClass_0481 {}
+    public static class TestClass_0482 {}
+    public static class TestClass_0483 {}
+    public static class TestClass_0484 {}
+    public static class TestClass_0485 {}
+    public static class TestClass_0486 {}
+    public static class TestClass_0487 {}
+    public static class TestClass_0488 {}
+    public static class TestClass_0489 {}
+    public static class TestClass_0490 {}
+    public static class TestClass_0491 {}
+    public static class TestClass_0492 {}
+    public static class TestClass_0493 {}
+    public static class TestClass_0494 {}
+    public static class TestClass_0495 {}
+    public static class TestClass_0496 {}
+    public static class TestClass_0497 {}
+    public static class TestClass_0498 {}
+    public static class TestClass_0499 {}
+    public static class TestClass_0500 {}
+    public static class TestClass_0501 {}
+    public static class TestClass_0502 {}
+    public static class TestClass_0503 {}
+    public static class TestClass_0504 {}
+    public static class TestClass_0505 {}
+    public static class TestClass_0506 {}
+    public static class TestClass_0507 {}
+    public static class TestClass_0508 {}
+    public static class TestClass_0509 {}
+    public static class TestClass_0510 {}
+    public static class TestClass_0511 {}
+    public static class TestClass_0512 {}
+    public static class TestClass_0513 {}
+    public static class TestClass_0514 {}
+    public static class TestClass_0515 {}
+    public static class TestClass_0516 {}
+    public static class TestClass_0517 {}
+    public static class TestClass_0518 {}
+    public static class TestClass_0519 {}
+    public static class TestClass_0520 {}
+    public static class TestClass_0521 {}
+    public static class TestClass_0522 {}
+    public static class TestClass_0523 {}
+    public static class TestClass_0524 {}
+    public static class TestClass_0525 {}
+    public static class TestClass_0526 {}
+    public static class TestClass_0527 {}
+    public static class TestClass_0528 {}
+    public static class TestClass_0529 {}
+    public static class TestClass_0530 {}
+    public static class TestClass_0531 {}
+    public static class TestClass_0532 {}
+    public static class TestClass_0533 {}
+    public static class TestClass_0534 {}
+    public static class TestClass_0535 {}
+    public static class TestClass_0536 {}
+    public static class TestClass_0537 {}
+    public static class TestClass_0538 {}
+    public static class TestClass_0539 {}
+    public static class TestClass_0540 {}
+    public static class TestClass_0541 {}
+    public static class TestClass_0542 {}
+    public static class TestClass_0543 {}
+    public static class TestClass_0544 {}
+    public static class TestClass_0545 {}
+    public static class TestClass_0546 {}
+    public static class TestClass_0547 {}
+    public static class TestClass_0548 {}
+    public static class TestClass_0549 {}
+    public static class TestClass_0550 {}
+    public static class TestClass_0551 {}
+    public static class TestClass_0552 {}
+    public static class TestClass_0553 {}
+    public static class TestClass_0554 {}
+    public static class TestClass_0555 {}
+    public static class TestClass_0556 {}
+    public static class TestClass_0557 {}
+    public static class TestClass_0558 {}
+    public static class TestClass_0559 {}
+    public static class TestClass_0560 {}
+    public static class TestClass_0561 {}
+    public static class TestClass_0562 {}
+    public static class TestClass_0563 {}
+    public static class TestClass_0564 {}
+    public static class TestClass_0565 {}
+    public static class TestClass_0566 {}
+    public static class TestClass_0567 {}
+    public static class TestClass_0568 {}
+    public static class TestClass_0569 {}
+    public static class TestClass_0570 {}
+    public static class TestClass_0571 {}
+    public static class TestClass_0572 {}
+    public static class TestClass_0573 {}
+    public static class TestClass_0574 {}
+    public static class TestClass_0575 {}
+    public static class TestClass_0576 {}
+    public static class TestClass_0577 {}
+    public static class TestClass_0578 {}
+    public static class TestClass_0579 {}
+    public static class TestClass_0580 {}
+    public static class TestClass_0581 {}
+    public static class TestClass_0582 {}
+    public static class TestClass_0583 {}
+    public static class TestClass_0584 {}
+    public static class TestClass_0585 {}
+    public static class TestClass_0586 {}
+    public static class TestClass_0587 {}
+    public static class TestClass_0588 {}
+    public static class TestClass_0589 {}
+    public static class TestClass_0590 {}
+    public static class TestClass_0591 {}
+    public static class TestClass_0592 {}
+    public static class TestClass_0593 {}
+    public static class TestClass_0594 {}
+    public static class TestClass_0595 {}
+    public static class TestClass_0596 {}
+    public static class TestClass_0597 {}
+    public static class TestClass_0598 {}
+    public static class TestClass_0599 {}
+    public static class TestClass_0600 {}
+    public static class TestClass_0601 {}
+    public static class TestClass_0602 {}
+    public static class TestClass_0603 {}
+    public static class TestClass_0604 {}
+    public static class TestClass_0605 {}
+    public static class TestClass_0606 {}
+    public static class TestClass_0607 {}
+    public static class TestClass_0608 {}
+    public static class TestClass_0609 {}
+    public static class TestClass_0610 {}
+    public static class TestClass_0611 {}
+    public static class TestClass_0612 {}
+    public static class TestClass_0613 {}
+    public static class TestClass_0614 {}
+    public static class TestClass_0615 {}
+    public static class TestClass_0616 {}
+    public static class TestClass_0617 {}
+    public static class TestClass_0618 {}
+    public static class TestClass_0619 {}
+    public static class TestClass_0620 {}
+    public static class TestClass_0621 {}
+    public static class TestClass_0622 {}
+    public static class TestClass_0623 {}
+    public static class TestClass_0624 {}
+    public static class TestClass_0625 {}
+    public static class TestClass_0626 {}
+    public static class TestClass_0627 {}
+    public static class TestClass_0628 {}
+    public static class TestClass_0629 {}
+    public static class TestClass_0630 {}
+    public static class TestClass_0631 {}
+    public static class TestClass_0632 {}
+    public static class TestClass_0633 {}
+    public static class TestClass_0634 {}
+    public static class TestClass_0635 {}
+    public static class TestClass_0636 {}
+    public static class TestClass_0637 {}
+    public static class TestClass_0638 {}
+    public static class TestClass_0639 {}
+    public static class TestClass_0640 {}
+    public static class TestClass_0641 {}
+    public static class TestClass_0642 {}
+    public static class TestClass_0643 {}
+    public static class TestClass_0644 {}
+    public static class TestClass_0645 {}
+    public static class TestClass_0646 {}
+    public static class TestClass_0647 {}
+    public static class TestClass_0648 {}
+    public static class TestClass_0649 {}
+    public static class TestClass_0650 {}
+    public static class TestClass_0651 {}
+    public static class TestClass_0652 {}
+    public static class TestClass_0653 {}
+    public static class TestClass_0654 {}
+    public static class TestClass_0655 {}
+    public static class TestClass_0656 {}
+    public static class TestClass_0657 {}
+    public static class TestClass_0658 {}
+    public static class TestClass_0659 {}
+    public static class TestClass_0660 {}
+    public static class TestClass_0661 {}
+    public static class TestClass_0662 {}
+    public static class TestClass_0663 {}
+    public static class TestClass_0664 {}
+    public static class TestClass_0665 {}
+    public static class TestClass_0666 {}
+    public static class TestClass_0667 {}
+    public static class TestClass_0668 {}
+    public static class TestClass_0669 {}
+    public static class TestClass_0670 {}
+    public static class TestClass_0671 {}
+    public static class TestClass_0672 {}
+    public static class TestClass_0673 {}
+    public static class TestClass_0674 {}
+    public static class TestClass_0675 {}
+    public static class TestClass_0676 {}
+    public static class TestClass_0677 {}
+    public static class TestClass_0678 {}
+    public static class TestClass_0679 {}
+    public static class TestClass_0680 {}
+    public static class TestClass_0681 {}
+    public static class TestClass_0682 {}
+    public static class TestClass_0683 {}
+    public static class TestClass_0684 {}
+    public static class TestClass_0685 {}
+    public static class TestClass_0686 {}
+    public static class TestClass_0687 {}
+    public static class TestClass_0688 {}
+    public static class TestClass_0689 {}
+    public static class TestClass_0690 {}
+    public static class TestClass_0691 {}
+    public static class TestClass_0692 {}
+    public static class TestClass_0693 {}
+    public static class TestClass_0694 {}
+    public static class TestClass_0695 {}
+    public static class TestClass_0696 {}
+    public static class TestClass_0697 {}
+    public static class TestClass_0698 {}
+    public static class TestClass_0699 {}
+    public static class TestClass_0700 {}
+    public static class TestClass_0701 {}
+    public static class TestClass_0702 {}
+    public static class TestClass_0703 {}
+    public static class TestClass_0704 {}
+    public static class TestClass_0705 {}
+    public static class TestClass_0706 {}
+    public static class TestClass_0707 {}
+    public static class TestClass_0708 {}
+    public static class TestClass_0709 {}
+    public static class TestClass_0710 {}
+    public static class TestClass_0711 {}
+    public static class TestClass_0712 {}
+    public static class TestClass_0713 {}
+    public static class TestClass_0714 {}
+    public static class TestClass_0715 {}
+    public static class TestClass_0716 {}
+    public static class TestClass_0717 {}
+    public static class TestClass_0718 {}
+    public static class TestClass_0719 {}
+    public static class TestClass_0720 {}
+    public static class TestClass_0721 {}
+    public static class TestClass_0722 {}
+    public static class TestClass_0723 {}
+    public static class TestClass_0724 {}
+    public static class TestClass_0725 {}
+    public static class TestClass_0726 {}
+    public static class TestClass_0727 {}
+    public static class TestClass_0728 {}
+    public static class TestClass_0729 {}
+    public static class TestClass_0730 {}
+    public static class TestClass_0731 {}
+    public static class TestClass_0732 {}
+    public static class TestClass_0733 {}
+    public static class TestClass_0734 {}
+    public static class TestClass_0735 {}
+    public static class TestClass_0736 {}
+    public static class TestClass_0737 {}
+    public static class TestClass_0738 {}
+    public static class TestClass_0739 {}
+    public static class TestClass_0740 {}
+    public static class TestClass_0741 {}
+    public static class TestClass_0742 {}
+    public static class TestClass_0743 {}
+    public static class TestClass_0744 {}
+    public static class TestClass_0745 {}
+    public static class TestClass_0746 {}
+    public static class TestClass_0747 {}
+    public static class TestClass_0748 {}
+    public static class TestClass_0749 {}
+    public static class TestClass_0750 {}
+    public static class TestClass_0751 {}
+    public static class TestClass_0752 {}
+    public static class TestClass_0753 {}
+    public static class TestClass_0754 {}
+    public static class TestClass_0755 {}
+    public static class TestClass_0756 {}
+    public static class TestClass_0757 {}
+    public static class TestClass_0758 {}
+    public static class TestClass_0759 {}
+    public static class TestClass_0760 {}
+    public static class TestClass_0761 {}
+    public static class TestClass_0762 {}
+    public static class TestClass_0763 {}
+    public static class TestClass_0764 {}
+    public static class TestClass_0765 {}
+    public static class TestClass_0766 {}
+    public static class TestClass_0767 {}
+    public static class TestClass_0768 {}
+    public static class TestClass_0769 {}
+    public static class TestClass_0770 {}
+    public static class TestClass_0771 {}
+    public static class TestClass_0772 {}
+    public static class TestClass_0773 {}
+    public static class TestClass_0774 {}
+    public static class TestClass_0775 {}
+    public static class TestClass_0776 {}
+    public static class TestClass_0777 {}
+    public static class TestClass_0778 {}
+    public static class TestClass_0779 {}
+    public static class TestClass_0780 {}
+    public static class TestClass_0781 {}
+    public static class TestClass_0782 {}
+    public static class TestClass_0783 {}
+    public static class TestClass_0784 {}
+    public static class TestClass_0785 {}
+    public static class TestClass_0786 {}
+    public static class TestClass_0787 {}
+    public static class TestClass_0788 {}
+    public static class TestClass_0789 {}
+    public static class TestClass_0790 {}
+    public static class TestClass_0791 {}
+    public static class TestClass_0792 {}
+    public static class TestClass_0793 {}
+    public static class TestClass_0794 {}
+    public static class TestClass_0795 {}
+    public static class TestClass_0796 {}
+    public static class TestClass_0797 {}
+    public static class TestClass_0798 {}
+    public static class TestClass_0799 {}
+    public static class TestClass_0800 {}
+    public static class TestClass_0801 {}
+    public static class TestClass_0802 {}
+    public static class TestClass_0803 {}
+    public static class TestClass_0804 {}
+    public static class TestClass_0805 {}
+    public static class TestClass_0806 {}
+    public static class TestClass_0807 {}
+    public static class TestClass_0808 {}
+    public static class TestClass_0809 {}
+    public static class TestClass_0810 {}
+    public static class TestClass_0811 {}
+    public static class TestClass_0812 {}
+    public static class TestClass_0813 {}
+    public static class TestClass_0814 {}
+    public static class TestClass_0815 {}
+    public static class TestClass_0816 {}
+    public static class TestClass_0817 {}
+    public static class TestClass_0818 {}
+    public static class TestClass_0819 {}
+    public static class TestClass_0820 {}
+    public static class TestClass_0821 {}
+    public static class TestClass_0822 {}
+    public static class TestClass_0823 {}
+    public static class TestClass_0824 {}
+    public static class TestClass_0825 {}
+    public static class TestClass_0826 {}
+    public static class TestClass_0827 {}
+    public static class TestClass_0828 {}
+    public static class TestClass_0829 {}
+    public static class TestClass_0830 {}
+    public static class TestClass_0831 {}
+    public static class TestClass_0832 {}
+    public static class TestClass_0833 {}
+    public static class TestClass_0834 {}
+    public static class TestClass_0835 {}
+    public static class TestClass_0836 {}
+    public static class TestClass_0837 {}
+    public static class TestClass_0838 {}
+    public static class TestClass_0839 {}
+    public static class TestClass_0840 {}
+    public static class TestClass_0841 {}
+    public static class TestClass_0842 {}
+    public static class TestClass_0843 {}
+    public static class TestClass_0844 {}
+    public static class TestClass_0845 {}
+    public static class TestClass_0846 {}
+    public static class TestClass_0847 {}
+    public static class TestClass_0848 {}
+    public static class TestClass_0849 {}
+    public static class TestClass_0850 {}
+    public static class TestClass_0851 {}
+    public static class TestClass_0852 {}
+    public static class TestClass_0853 {}
+    public static class TestClass_0854 {}
+    public static class TestClass_0855 {}
+    public static class TestClass_0856 {}
+    public static class TestClass_0857 {}
+    public static class TestClass_0858 {}
+    public static class TestClass_0859 {}
+    public static class TestClass_0860 {}
+    public static class TestClass_0861 {}
+    public static class TestClass_0862 {}
+    public static class TestClass_0863 {}
+    public static class TestClass_0864 {}
+    public static class TestClass_0865 {}
+    public static class TestClass_0866 {}
+    public static class TestClass_0867 {}
+    public static class TestClass_0868 {}
+    public static class TestClass_0869 {}
+    public static class TestClass_0870 {}
+    public static class TestClass_0871 {}
+    public static class TestClass_0872 {}
+    public static class TestClass_0873 {}
+    public static class TestClass_0874 {}
+    public static class TestClass_0875 {}
+    public static class TestClass_0876 {}
+    public static class TestClass_0877 {}
+    public static class TestClass_0878 {}
+    public static class TestClass_0879 {}
+    public static class TestClass_0880 {}
+    public static class TestClass_0881 {}
+    public static class TestClass_0882 {}
+    public static class TestClass_0883 {}
+    public static class TestClass_0884 {}
+    public static class TestClass_0885 {}
+    public static class TestClass_0886 {}
+    public static class TestClass_0887 {}
+    public static class TestClass_0888 {}
+    public static class TestClass_0889 {}
+    public static class TestClass_0890 {}
+    public static class TestClass_0891 {}
+    public static class TestClass_0892 {}
+    public static class TestClass_0893 {}
+    public static class TestClass_0894 {}
+    public static class TestClass_0895 {}
+    public static class TestClass_0896 {}
+    public static class TestClass_0897 {}
+    public static class TestClass_0898 {}
+    public static class TestClass_0899 {}
+    public static class TestClass_0900 {}
+    public static class TestClass_0901 {}
+    public static class TestClass_0902 {}
+    public static class TestClass_0903 {}
+    public static class TestClass_0904 {}
+    public static class TestClass_0905 {}
+    public static class TestClass_0906 {}
+    public static class TestClass_0907 {}
+    public static class TestClass_0908 {}
+    public static class TestClass_0909 {}
+    public static class TestClass_0910 {}
+    public static class TestClass_0911 {}
+    public static class TestClass_0912 {}
+    public static class TestClass_0913 {}
+    public static class TestClass_0914 {}
+    public static class TestClass_0915 {}
+    public static class TestClass_0916 {}
+    public static class TestClass_0917 {}
+    public static class TestClass_0918 {}
+    public static class TestClass_0919 {}
+    public static class TestClass_0920 {}
+    public static class TestClass_0921 {}
+    public static class TestClass_0922 {}
+    public static class TestClass_0923 {}
+    public static class TestClass_0924 {}
+    public static class TestClass_0925 {}
+    public static class TestClass_0926 {}
+    public static class TestClass_0927 {}
+    public static class TestClass_0928 {}
+    public static class TestClass_0929 {}
+    public static class TestClass_0930 {}
+    public static class TestClass_0931 {}
+    public static class TestClass_0932 {}
+    public static class TestClass_0933 {}
+    public static class TestClass_0934 {}
+    public static class TestClass_0935 {}
+    public static class TestClass_0936 {}
+    public static class TestClass_0937 {}
+    public static class TestClass_0938 {}
+    public static class TestClass_0939 {}
+    public static class TestClass_0940 {}
+    public static class TestClass_0941 {}
+    public static class TestClass_0942 {}
+    public static class TestClass_0943 {}
+    public static class TestClass_0944 {}
+    public static class TestClass_0945 {}
+    public static class TestClass_0946 {}
+    public static class TestClass_0947 {}
+    public static class TestClass_0948 {}
+    public static class TestClass_0949 {}
+    public static class TestClass_0950 {}
+    public static class TestClass_0951 {}
+    public static class TestClass_0952 {}
+    public static class TestClass_0953 {}
+    public static class TestClass_0954 {}
+    public static class TestClass_0955 {}
+    public static class TestClass_0956 {}
+    public static class TestClass_0957 {}
+    public static class TestClass_0958 {}
+    public static class TestClass_0959 {}
+    public static class TestClass_0960 {}
+    public static class TestClass_0961 {}
+    public static class TestClass_0962 {}
+    public static class TestClass_0963 {}
+    public static class TestClass_0964 {}
+    public static class TestClass_0965 {}
+    public static class TestClass_0966 {}
+    public static class TestClass_0967 {}
+    public static class TestClass_0968 {}
+    public static class TestClass_0969 {}
+    public static class TestClass_0970 {}
+    public static class TestClass_0971 {}
+    public static class TestClass_0972 {}
+    public static class TestClass_0973 {}
+    public static class TestClass_0974 {}
+    public static class TestClass_0975 {}
+    public static class TestClass_0976 {}
+    public static class TestClass_0977 {}
+    public static class TestClass_0978 {}
+    public static class TestClass_0979 {}
+    public static class TestClass_0980 {}
+    public static class TestClass_0981 {}
+    public static class TestClass_0982 {}
+    public static class TestClass_0983 {}
+    public static class TestClass_0984 {}
+    public static class TestClass_0985 {}
+    public static class TestClass_0986 {}
+    public static class TestClass_0987 {}
+    public static class TestClass_0988 {}
+    public static class TestClass_0989 {}
+    public static class TestClass_0990 {}
+    public static class TestClass_0991 {}
+    public static class TestClass_0992 {}
+    public static class TestClass_0993 {}
+    public static class TestClass_0994 {}
+    public static class TestClass_0995 {}
+    public static class TestClass_0996 {}
+    public static class TestClass_0997 {}
+    public static class TestClass_0998 {}
+    public static class TestClass_0999 {}
+    public static class TestClass_1000 {}
+    public static class TestClass_1001 {}
+    public static class TestClass_1002 {}
+    public static class TestClass_1003 {}
+    public static class TestClass_1004 {}
+    public static class TestClass_1005 {}
+    public static class TestClass_1006 {}
+    public static class TestClass_1007 {}
+    public static class TestClass_1008 {}
+    public static class TestClass_1009 {}
+    public static class TestClass_1010 {}
+    public static class TestClass_1011 {}
+    public static class TestClass_1012 {}
+    public static class TestClass_1013 {}
+    public static class TestClass_1014 {}
+    public static class TestClass_1015 {}
+    public static class TestClass_1016 {}
+    public static class TestClass_1017 {}
+    public static class TestClass_1018 {}
+    public static class TestClass_1019 {}
+    public static class TestClass_1020 {}
+    public static class TestClass_1021 {}
+    public static class TestClass_1022 {}
+    public static class TestClass_1023 {}
+    public static class TestClass_1024 {}
+
+    public void timeConstClassWithConflict(int count) {
+        Class<?> class0001 = TestClass_0001.class;
+        for (int i = 0; i < count; ++i) {
+            $noinline$foo(class0001);  // Prevent LICM on the TestClass_xxxx.class below.
+            $noinline$foo(TestClass_0000.class);
+            $noinline$foo(TestClass_1024.class);
+        }
+    }
+
+    public void timeConstClassWithoutConflict(int count) {
+        Class<?> class0000 = TestClass_0000.class;
+        for (int i = 0; i < count; ++i) {
+            $noinline$foo(class0000);  // Prevent LICM on the TestClass_xxxx.class below.
+            $noinline$foo(TestClass_0001.class);
+            $noinline$foo(TestClass_1023.class);
+        }
+    }
+
+    static void $noinline$foo(Class<?> s) {
+        if (doThrow) { throw new Error(); }
+    }
+
+    public static boolean doThrow = false;
+}
diff --git a/benchmark/const-string/src/ConstStringBenchmark.java b/benchmark/const-string/src/ConstStringBenchmark.java
index 2beb0a4..2359a5f 100644
--- a/benchmark/const-string/src/ConstStringBenchmark.java
+++ b/benchmark/const-string/src/ConstStringBenchmark.java
@@ -18,6 +18,7 @@
     // Initialize 1025 strings with consecutive string indexes in the dex file.
     // The tests below rely on the knowledge that ART uses the low 10 bits
     // of the string index as the hash into DexCache strings array.
+    // Note: n == n + 1024 (mod 2^10), n + 1 != n + 1023 (mod 2^10).
     public static final String string_0000 = "TestString_0000";
     public static final String string_0001 = "TestString_0001";
     public static final String string_0002 = "TestString_0002";
@@ -1045,21 +1046,21 @@
     public static final String string_1024 = "TestString_1024";
 
     public void timeConstStringsWithConflict(int count) {
-      for (int i = 0; i < count; ++i) {
-        $noinline$foo("TestString_0000");
-        $noinline$foo("TestString_1024");
-      }
+        for (int i = 0; i < count; ++i) {
+            $noinline$foo("TestString_0000");
+            $noinline$foo("TestString_1024");
+        }
     }
 
     public void timeConstStringsWithoutConflict(int count) {
-      for (int i = 0; i < count; ++i) {
-        $noinline$foo("TestString_0001");
-        $noinline$foo("TestString_1023");
-      }
+        for (int i = 0; i < count; ++i) {
+            $noinline$foo("TestString_0001");
+            $noinline$foo("TestString_1023");
+        }
     }
 
     static void $noinline$foo(String s) {
-      if (doThrow) { throw new Error(); }
+        if (doThrow) { throw new Error(); }
     }
 
     public static boolean doThrow = false;
diff --git a/benchmark/string-indexof/info.txt b/benchmark/string-indexof/info.txt
new file mode 100644
index 0000000..cc04217
--- /dev/null
+++ b/benchmark/string-indexof/info.txt
@@ -0,0 +1 @@
+Benchmarks for repeating String.indexOf() instructions in a loop.
diff --git a/benchmark/string-indexof/src/StringIndexOfBenchmark.java b/benchmark/string-indexof/src/StringIndexOfBenchmark.java
new file mode 100644
index 0000000..481a27a
--- /dev/null
+++ b/benchmark/string-indexof/src/StringIndexOfBenchmark.java
@@ -0,0 +1,122 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class StringIndexOfBenchmark {
+    public static final String string36 = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";  // length = 36
+
+    public void timeIndexOf0(int count) {
+        final char c = '0';
+        String s = string36;
+        for (int i = 0; i < count; ++i) {
+            $noinline$indexOf(s, c);
+        }
+    }
+
+    public void timeIndexOf1(int count) {
+        final char c = '1';
+        String s = string36;
+        for (int i = 0; i < count; ++i) {
+            $noinline$indexOf(s, c);
+        }
+    }
+
+    public void timeIndexOf2(int count) {
+        final char c = '2';
+        String s = string36;
+        for (int i = 0; i < count; ++i) {
+            $noinline$indexOf(s, c);
+        }
+    }
+
+    public void timeIndexOf3(int count) {
+        final char c = '3';
+        String s = string36;
+        for (int i = 0; i < count; ++i) {
+            $noinline$indexOf(s, c);
+        }
+    }
+
+    public void timeIndexOf4(int count) {
+        final char c = '4';
+        String s = string36;
+        for (int i = 0; i < count; ++i) {
+            $noinline$indexOf(s, c);
+        }
+    }
+
+    public void timeIndexOf7(int count) {
+        final char c = '7';
+        String s = string36;
+        for (int i = 0; i < count; ++i) {
+            $noinline$indexOf(s, c);
+        }
+    }
+
+    public void timeIndexOf8(int count) {
+        final char c = '8';
+        String s = string36;
+        for (int i = 0; i < count; ++i) {
+            $noinline$indexOf(s, c);
+        }
+    }
+
+    public void timeIndexOfF(int count) {
+        final char c = 'F';
+        String s = string36;
+        for (int i = 0; i < count; ++i) {
+            $noinline$indexOf(s, c);
+        }
+    }
+
+    public void timeIndexOfG(int count) {
+        final char c = 'G';
+        String s = string36;
+        for (int i = 0; i < count; ++i) {
+            $noinline$indexOf(s, c);
+        }
+    }
+
+    public void timeIndexOfV(int count) {
+        final char c = 'V';
+        String s = string36;
+        for (int i = 0; i < count; ++i) {
+            $noinline$indexOf(s, c);
+        }
+    }
+
+    public void timeIndexOfW(int count) {
+        final char c = 'W';
+        String s = string36;
+        for (int i = 0; i < count; ++i) {
+            $noinline$indexOf(s, c);
+        }
+    }
+
+    public void timeIndexOf_(int count) {
+        final char c = '_';
+        String s = string36;
+        for (int i = 0; i < count; ++i) {
+            $noinline$indexOf(s, c);
+        }
+    }
+
+    static int $noinline$indexOf(String s, char c) {
+        if (doThrow) { throw new Error(); }
+        return s.indexOf(c);
+    }
+
+    public static boolean doThrow = false;
+}
diff --git a/build/Android.bp b/build/Android.bp
index 9156027..cd9d74a 100644
--- a/build/Android.bp
+++ b/build/Android.bp
@@ -145,6 +145,10 @@
         "external/vixl/src",
         "external/zlib",
     ],
+
+    tidy_checks: [
+        "-google-default-arguments",
+    ],
 }
 
 art_debug_defaults {
diff --git a/build/Android.common_test.mk b/build/Android.common_test.mk
index d2e3371..291db8b 100644
--- a/build/Android.common_test.mk
+++ b/build/Android.common_test.mk
@@ -124,12 +124,17 @@
 ART_TEST_RUN_TEST_MULTI_IMAGE ?= $(ART_TEST_FULL)
 
 # Define the command run on test failure. $(1) is the name of the test. Executed by the shell.
+# If the test was a top-level make target (e.g. `test-art-host-gtest-codegen_test64`), the command
+# fails with exit status 1 (returned by the last `grep` statement below).
+# Otherwise (e.g., if the test was run as a prerequisite of a compound test command, such as
+# `test-art-host-gtest-codegen_test`), the command does not fail, as this would break rules running
+# ART_TEST_PREREQ_FINISHED as one of their actions, which expects *all* prerequisites *not* to fail.
 define ART_TEST_FAILED
   ( [ -f $(ART_HOST_TEST_DIR)/skipped/$(1) ] || \
     (mkdir -p $(ART_HOST_TEST_DIR)/failed/ && touch $(ART_HOST_TEST_DIR)/failed/$(1) && \
       echo $(ART_TEST_KNOWN_FAILING) | grep -q $(1) \
         && (echo -e "$(1) \e[91mKNOWN FAILURE\e[0m") \
-        || (echo -e "$(1) \e[91mFAILED\e[0m" >&2 )))
+        || (echo -e "$(1) \e[91mFAILED\e[0m" >&2; echo $(MAKECMDGOALS) | grep -q -v $(1))))
 endef
 
 ifeq ($(ART_TEST_QUIET),true)
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk
index 4f273e5..1691dbb 100644
--- a/build/Android.gtest.mk
+++ b/build/Android.gtest.mk
@@ -25,6 +25,7 @@
 GTEST_DEX_DIRECTORIES := \
   AbstractMethod \
   AllFields \
+  DexToDexDecompiler \
   ExceptionHandle \
   GetMethodSignature \
   ImageLayoutA \
@@ -83,6 +84,7 @@
 # Dex file dependencies for each gtest.
 ART_GTEST_dex2oat_environment_tests_DEX_DEPS := Main MainStripped MultiDex MultiDexModifiedSecondary Nested
 
+ART_GTEST_atomic_method_ref_map_test_DEX_DEPS := Interfaces
 ART_GTEST_class_linker_test_DEX_DEPS := Interfaces MethodTypes MultiDex MyClass Nested Statics StaticsFromCode
 ART_GTEST_compiler_driver_test_DEX_DEPS := AbstractMethod StaticLeafMethods ProfileTestMultiDex
 ART_GTEST_dex_cache_test_DEX_DEPS := Main Packages MethodTypes
@@ -105,7 +107,8 @@
 ART_GTEST_stub_test_DEX_DEPS := AllFields
 ART_GTEST_transaction_test_DEX_DEPS := Transaction
 ART_GTEST_type_lookup_table_test_DEX_DEPS := Lookup
-ART_GTEST_verifier_deps_test_DEX_DEPS := VerifierDeps
+ART_GTEST_verifier_deps_test_DEX_DEPS := VerifierDeps MultiDex
+ART_GTEST_dex_to_dex_decompiler_test_DEX_DEPS := VerifierDeps DexToDexDecompiler
 
 # The elf writer test has dependencies on core.oat.
 ART_GTEST_elf_writer_test_HOST_DEPS := $(HOST_CORE_IMAGE_optimizing_no-pic_64) $(HOST_CORE_IMAGE_optimizing_no-pic_32)
@@ -114,10 +117,14 @@
 ART_GTEST_dex2oat_environment_tests_HOST_DEPS := \
   $(HOST_CORE_IMAGE_optimizing_pic_64) \
   $(HOST_CORE_IMAGE_optimizing_pic_32) \
+  $(HOST_CORE_IMAGE_optimizing_no-pic_64) \
+  $(HOST_CORE_IMAGE_optimizing_no-pic_32) \
   $(HOST_OUT_EXECUTABLES)/patchoatd
 ART_GTEST_dex2oat_environment_tests_TARGET_DEPS := \
   $(TARGET_CORE_IMAGE_optimizing_pic_64) \
   $(TARGET_CORE_IMAGE_optimizing_pic_32) \
+  $(TARGET_CORE_IMAGE_optimizing_no-pic_64) \
+  $(TARGET_CORE_IMAGE_optimizing_no-pic_32) \
   $(TARGET_OUT_EXECUTABLES)/patchoatd
 
 ART_GTEST_oat_file_assistant_test_HOST_DEPS := \
diff --git a/build/Android.oat.mk b/build/Android.oat.mk
index 3b273a2..e297b4f 100644
--- a/build/Android.oat.mk
+++ b/build/Android.oat.mk
@@ -215,24 +215,9 @@
       $(4)TARGET_CORE_IMAGE_$(1)_$(2)_64 := $$(core_image_name)
     else
       $(4)TARGET_CORE_IMAGE_$(1)_$(2)_32 := $$(core_image_name)
-      ifdef ART_USE_VIXL_ARM_BACKEND
-        ifeq ($(1),optimizing)
-          # TODO(VIXL): The ARM VIXL backend is still work in progress. Therefore for now we do not
-          # compile the core image with the Optimizing backend when ART_USE_VIXL_ARM_BACKEND is
-          # defined.
-          core_compile_options += --compiler-filter=interpret-only
-        endif
-      endif
     endif
   else
     $(4)TARGET_CORE_IMAGE_$(1)_$(2)_32 := $$(core_image_name)
-    ifdef ART_USE_VIXL_ARM_BACKEND
-      ifeq ($(1),optimizing)
-      # TODO(VIXL): The ARM VIXL backend is still work in progress. Therefore for now we do not
-      # compile the core image with the Optimizing backend when ART_USE_VIXL_ARM_BACKEND is defined.
-      core_compile_options += --compiler-filter=interpret-only
-      endif
-    endif
   endif
   $(4)TARGET_CORE_IMG_OUTS += $$(core_image_name)
   $(4)TARGET_CORE_OAT_OUTS += $$(core_oat_name)
diff --git a/cmdline/cmdline.h b/cmdline/cmdline.h
index dec9c83..6e042c3 100644
--- a/cmdline/cmdline.h
+++ b/cmdline/cmdline.h
@@ -234,7 +234,7 @@
     // Checks for --boot-image location.
     {
       std::string boot_image_location = boot_image_location_;
-      size_t file_name_idx = boot_image_location.rfind("/");
+      size_t file_name_idx = boot_image_location.rfind('/');
       if (file_name_idx == std::string::npos) {  // Prevent a InsertIsaDirectory check failure.
         *error_msg = "Boot image location must have a / in it";
         return false;
@@ -244,7 +244,7 @@
       // This prevents a common error "Could not create an image space..." when initing the Runtime.
       if (file_name_idx != std::string::npos) {
         std::string no_file_name = boot_image_location.substr(0, file_name_idx);
-        size_t ancestor_dirs_idx = no_file_name.rfind("/");
+        size_t ancestor_dirs_idx = no_file_name.rfind('/');
 
         std::string parent_dir_name;
         if (ancestor_dirs_idx != std::string::npos) {
diff --git a/cmdline/cmdline_parser.h b/cmdline/cmdline_parser.h
index cfc0967..d82fd48 100644
--- a/cmdline/cmdline_parser.h
+++ b/cmdline/cmdline_parser.h
@@ -390,7 +390,7 @@
         // Unlike regular argument definitions, when a value gets parsed into its
         // stronger type, we just throw it away.
 
-        if (ign.find("_") != std::string::npos) {  // Does the arg-def have a wildcard?
+        if (ign.find('_') != std::string::npos) {  // Does the arg-def have a wildcard?
           // pretend this is a string, e.g. -Xjitconfig:<anythinggoeshere>
           auto&& builder = Define(ignore_name).template WithType<std::string>().IntoIgnore();
           assert(&builder == this);
diff --git a/cmdline/cmdline_parser_test.cc b/cmdline/cmdline_parser_test.cc
index cad5104..550e8c4 100644
--- a/cmdline/cmdline_parser_test.cc
+++ b/cmdline/cmdline_parser_test.cc
@@ -78,7 +78,7 @@
     return memcmp(std::addressof(expected), std::addressof(actual), sizeof(expected)) == 0;
   }
 
-  bool UsuallyEquals(const char* expected, std::string actual) {
+  bool UsuallyEquals(const char* expected, const std::string& actual) {
     return std::string(expected) == actual;
   }
 
@@ -129,7 +129,7 @@
     parser_ = ParsedOptions::MakeParser(false);  // do not ignore unrecognized options
   }
 
-  static ::testing::AssertionResult IsResultSuccessful(CmdlineResult result) {
+  static ::testing::AssertionResult IsResultSuccessful(const CmdlineResult& result) {
     if (result.IsSuccess()) {
       return ::testing::AssertionSuccess();
     } else {
@@ -138,7 +138,7 @@
     }
   }
 
-  static ::testing::AssertionResult IsResultFailure(CmdlineResult result,
+  static ::testing::AssertionResult IsResultFailure(const CmdlineResult& result,
                                                     CmdlineResult::Status failure_status) {
     if (result.IsSuccess()) {
       return ::testing::AssertionFailure() << " got success but expected failure: "
diff --git a/cmdline/cmdline_types.h b/cmdline/cmdline_types.h
index 13a3235..156ca9e 100644
--- a/cmdline/cmdline_types.h
+++ b/cmdline/cmdline_types.h
@@ -182,7 +182,7 @@
 struct CmdlineType<Memory<Divisor>> : CmdlineTypeParser<Memory<Divisor>> {
   using typename CmdlineTypeParser<Memory<Divisor>>::Result;
 
-  Result Parse(const std::string arg) {
+  Result Parse(const std::string& arg) {
     CMDLINE_DEBUG_LOG << "Parsing memory: " << arg << std::endl;
     size_t val = ParseMemoryOption(arg.c_str(), Divisor);
     CMDLINE_DEBUG_LOG << "Memory parsed to size_t value: " << val << std::endl;
@@ -496,11 +496,7 @@
 struct XGcOption {
   // These defaults are used when the command line arguments for -Xgc:
   // are either omitted completely or partially.
-  gc::CollectorType collector_type_ = kUseReadBarrier ?
-                                           // If RB is enabled (currently a build-time decision),
-                                           // use CC as the default GC.
-                                           gc::kCollectorTypeCC :
-                                           gc::kCollectorTypeDefault;
+  gc::CollectorType collector_type_ = gc::kCollectorTypeDefault;
   bool verify_pre_gc_heap_ = false;
   bool verify_pre_sweeping_heap_ = kIsDebugBuild;
   bool verify_post_gc_heap_ = false;
@@ -580,10 +576,6 @@
     : background_collector_type_(background_collector_type) {}
   BackgroundGcOption()
     : background_collector_type_(gc::kCollectorTypeNone) {
-
-    if (kUseReadBarrier) {
-      background_collector_type_ = gc::kCollectorTypeCCBackground;  // Background compaction for CC.
-    }
   }
 
   operator gc::CollectorType() const { return background_collector_type_; }
@@ -696,7 +688,7 @@
   }
 
   static std::string RemovePrefix(const std::string& source) {
-    size_t prefix_idx = source.find(":");
+    size_t prefix_idx = source.find(':');
 
     if (prefix_idx == std::string::npos) {
       return "";
diff --git a/cmdline/detail/cmdline_parse_argument_detail.h b/cmdline/detail/cmdline_parse_argument_detail.h
index 84beff5..14eac30 100644
--- a/cmdline/detail/cmdline_parse_argument_detail.h
+++ b/cmdline/detail/cmdline_parse_argument_detail.h
@@ -108,7 +108,7 @@
       // If this is true, then the wildcard matching later on can still fail, so this is not
       // a guarantee that the argument is correct, it's more of a strong hint that the
       // user-provided input *probably* was trying to match this argument.
-      size_t MaybeMatches(TokenRange token_list) const {
+      size_t MaybeMatches(const TokenRange& token_list) const {
         auto best_match = FindClosestMatch(token_list);
 
         return best_match.second;
@@ -118,7 +118,7 @@
       //
       // Returns the token range that was the closest match and the # of tokens that
       // this range was matched up until.
-      std::pair<const TokenRange*, size_t> FindClosestMatch(TokenRange token_list) const {
+      std::pair<const TokenRange*, size_t> FindClosestMatch(const TokenRange& token_list) const {
         const TokenRange* best_match_ptr = nullptr;
 
         size_t best_match = 0;
diff --git a/compiler/Android.bp b/compiler/Android.bp
index 0b14859..db55ea0 100644
--- a/compiler/Android.bp
+++ b/compiler/Android.bp
@@ -28,6 +28,7 @@
         "compiled_method.cc",
         "debug/elf_debug_writer.cc",
         "dex/dex_to_dex_compiler.cc",
+        "dex/dex_to_dex_decompiler.cc",
         "dex/verified_method.cc",
         "dex/verification_results.cc",
         "dex/quick_compiler_callbacks.cc",
@@ -42,6 +43,7 @@
         "linker/vector_output_stream.cc",
         "linker/relative_patcher.cc",
         "jit/jit_compiler.cc",
+        "jit/jit_logger.cc",
         "jni/quick/calling_convention.cc",
         "jni/quick/jni_compiler.cc",
         "optimizing/block_builder.cc",
@@ -51,6 +53,7 @@
         "optimizing/code_generator_utils.cc",
         "optimizing/constant_folding.cc",
         "optimizing/dead_code_elimination.cc",
+        "optimizing/escape.cc",
         "optimizing/graph_checker.cc",
         "optimizing/graph_visualizer.cc",
         "optimizing/gvn.cc",
@@ -105,6 +108,7 @@
                 "optimizing/instruction_simplifier_arm.cc",
                 "optimizing/instruction_simplifier_shared.cc",
                 "optimizing/intrinsics_arm.cc",
+                "optimizing/intrinsics_arm_vixl.cc",
                 "utils/arm/assembler_arm.cc",
                 "utils/arm/assembler_arm_vixl.cc",
                 "utils/arm/assembler_thumb2.cc",
@@ -203,7 +207,8 @@
 
 gensrcs {
     name: "art_compiler_operator_srcs",
-    cmd: "art/tools/generate-operator-out.py art/compiler $in > $out",
+    cmd: "$(location generate-operator-out.py) art/compiler $(in) > $(out)",
+    tool_files: ["generate-operator-out.py"],
     srcs: [
         "compiled_method.h",
         "dex/dex_to_dex_compiler.h",
@@ -250,7 +255,10 @@
             },
         },
     },
-    shared_libs: ["libart"],
+    shared_libs: [
+        "libart",
+        "libart-dexlayout",
+    ],
 }
 
 art_cc_library {
@@ -287,7 +295,10 @@
             },
         },
     },
-    shared_libs: ["libartd"],
+    shared_libs: [
+        "libartd",
+        "libartd-dexlayout"
+    ],
 }
 
 art_cc_library {
@@ -309,6 +320,7 @@
     srcs: [
         "compiled_method_test.cc",
         "debug/dwarf/dwarf_test.cc",
+        "dex/dex_to_dex_decompiler_test.cc",
         "driver/compiled_method_storage_test.cc",
         "driver/compiler_driver_test.cc",
         "elf_writer_test.cc",
@@ -337,6 +349,7 @@
         "optimizing/ssa_test.cc",
         "optimizing/stack_map_test.cc",
         "optimizing/suspend_check_test.cc",
+        "utils/atomic_method_ref_map_test.cc",
         "utils/dedupe_set_test.cc",
         "utils/intrusive_forward_list_test.cc",
         "utils/string_reference_test.cc",
@@ -416,6 +429,7 @@
         },
         mips: {
             srcs: [
+                "optimizing/emit_swap_mips_test.cc",
                 "utils/mips/assembler_mips_test.cc",
                 "utils/mips/assembler_mips32r6_test.cc",
             ],
diff --git a/compiler/compiled_method.h b/compiler/compiled_method.h
index 99b0ac1..bbf9eee 100644
--- a/compiler/compiled_method.h
+++ b/compiler/compiled_method.h
@@ -26,6 +26,7 @@
 #include "base/array_ref.h"
 #include "base/bit_utils.h"
 #include "base/length_prefixed_array.h"
+#include "dex_file_types.h"
 #include "method_reference.h"
 
 namespace art {
@@ -302,9 +303,9 @@
     return target_dex_file_;
   }
 
-  uint32_t TargetTypeIndex() const {
+  dex::TypeIndex TargetTypeIndex() const {
     DCHECK(patch_type_ == Type::kType || patch_type_ == Type::kTypeRelative);
-    return type_idx_;
+    return dex::TypeIndex(type_idx_);
   }
 
   const DexFile* TargetStringDexFile() const {
@@ -314,11 +315,11 @@
     return target_dex_file_;
   }
 
-  uint32_t TargetStringIndex() const {
+  dex::StringIndex TargetStringIndex() const {
     DCHECK(patch_type_ == Type::kString ||
            patch_type_ == Type::kStringRelative ||
            patch_type_ == Type::kStringBssEntry);
-    return string_idx_;
+    return dex::StringIndex(string_idx_);
   }
 
   const DexFile* TargetDexCacheDexFile() const {
diff --git a/compiler/debug/elf_debug_info_writer.h b/compiler/debug/elf_debug_info_writer.h
index 0a4f094..30d4b47 100644
--- a/compiler/debug/elf_debug_info_writer.h
+++ b/compiler/debug/elf_debug_info_writer.h
@@ -53,7 +53,7 @@
       uint32_t parameters_size = DecodeUnsignedLeb128(&stream);
       for (uint32_t i = 0; i < parameters_size; ++i) {
         uint32_t id = DecodeUnsignedLeb128P1(&stream);
-        names.push_back(mi->dex_file->StringDataByIdx(id));
+        names.push_back(mi->dex_file->StringDataByIdx(dex::StringIndex(id)));
       }
     }
   }
diff --git a/compiler/dex/dex_to_dex_compiler.cc b/compiler/dex/dex_to_dex_compiler.cc
index 9c1d72b..cf69f46 100644
--- a/compiler/dex/dex_to_dex_compiler.cc
+++ b/compiler/dex/dex_to_dex_compiler.cc
@@ -233,6 +233,8 @@
                  << " by replacing it with 2 NOPs at dex pc "
                  << StringPrintf("0x%x", dex_pc) << " in method "
                  << GetDexFile().PrettyMethod(unit_.GetDexMethodIndex(), true);
+  quickened_info_.push_back(QuickenedInfo(dex_pc, inst->VRegA_21c()));
+  quickened_info_.push_back(QuickenedInfo(dex_pc, inst->VRegB_21c()));
   // We are modifying 4 consecutive bytes.
   inst->SetOpcode(Instruction::NOP);
   inst->SetVRegA_10x(0u);  // keep compliant with verifier.
diff --git a/compiler/dex/dex_to_dex_compiler.h b/compiler/dex/dex_to_dex_compiler.h
index 3fad6d4..0a00d45 100644
--- a/compiler/dex/dex_to_dex_compiler.h
+++ b/compiler/dex/dex_to_dex_compiler.h
@@ -17,8 +17,6 @@
 #ifndef ART_COMPILER_DEX_DEX_TO_DEX_COMPILER_H_
 #define ART_COMPILER_DEX_DEX_TO_DEX_COMPILER_H_
 
-#include "jni.h"
-
 #include "dex_file.h"
 #include "invoke_type.h"
 
diff --git a/compiler/dex/dex_to_dex_decompiler.cc b/compiler/dex/dex_to_dex_decompiler.cc
new file mode 100644
index 0000000..051125e
--- /dev/null
+++ b/compiler/dex/dex_to_dex_decompiler.cc
@@ -0,0 +1,198 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dex_to_dex_decompiler.h"
+
+#include "base/logging.h"
+#include "base/mutex.h"
+#include "dex_file-inl.h"
+#include "dex_instruction-inl.h"
+#include "optimizing/bytecode_utils.h"
+
+namespace art {
+namespace optimizer {
+
+class DexDecompiler {
+ public:
+  DexDecompiler(const DexFile::CodeItem& code_item, const ArrayRef<const uint8_t>& quickened_info)
+    : code_item_(code_item),
+      quickened_info_ptr_(quickened_info.data()),
+      quickened_info_end_(quickened_info.data() + quickened_info.size()) {}
+
+  bool Decompile();
+
+ private:
+  void DecompileInstanceFieldAccess(Instruction* inst,
+                                    uint32_t dex_pc,
+                                    Instruction::Code new_opcode) {
+    uint16_t index = GetIndexAt(dex_pc);
+    inst->SetOpcode(new_opcode);
+    inst->SetVRegC_22c(index);
+  }
+
+  void DecompileInvokeVirtual(Instruction* inst,
+                              uint32_t dex_pc,
+                              Instruction::Code new_opcode,
+                              bool is_range) {
+    uint16_t index = GetIndexAt(dex_pc);
+    inst->SetOpcode(new_opcode);
+    if (is_range) {
+      inst->SetVRegB_3rc(index);
+    } else {
+      inst->SetVRegB_35c(index);
+    }
+  }
+
+  void DecompileNop(Instruction* inst, uint32_t dex_pc) {
+    if (quickened_info_ptr_ == quickened_info_end_) {
+      return;
+    }
+    const uint8_t* temporary_pointer = quickened_info_ptr_;
+    uint32_t quickened_pc = DecodeUnsignedLeb128(&temporary_pointer);
+    if (quickened_pc != dex_pc) {
+      return;
+    }
+    uint16_t reference_index = GetIndexAt(dex_pc);
+    uint16_t type_index = GetIndexAt(dex_pc);
+    inst->SetOpcode(Instruction::CHECK_CAST);
+    inst->SetVRegA_21c(reference_index);
+    inst->SetVRegB_21c(type_index);
+  }
+
+  uint16_t GetIndexAt(uint32_t dex_pc) {
+    // Note that as a side effect, DecodeUnsignedLeb128 update the given pointer
+    // to the new position in the buffer.
+    DCHECK_LT(quickened_info_ptr_, quickened_info_end_);
+    uint32_t quickened_pc = DecodeUnsignedLeb128(&quickened_info_ptr_);
+    DCHECK_LT(quickened_info_ptr_, quickened_info_end_);
+    uint16_t index = DecodeUnsignedLeb128(&quickened_info_ptr_);
+    DCHECK_LE(quickened_info_ptr_, quickened_info_end_);
+    DCHECK_EQ(quickened_pc, dex_pc);
+    return index;
+  }
+
+  const DexFile::CodeItem& code_item_;
+  const uint8_t* quickened_info_ptr_;
+  const uint8_t* const quickened_info_end_;
+
+  DISALLOW_COPY_AND_ASSIGN(DexDecompiler);
+};
+
+bool DexDecompiler::Decompile() {
+  // We need to iterate over the code item, and not over the quickening data,
+  // because the RETURN_VOID quickening is not encoded in the quickening data. Because
+  // unquickening is a rare need and not performance sensitive, it is not worth the
+  // added storage to also add the RETURN_VOID quickening in the quickened data.
+  for (CodeItemIterator it(code_item_); !it.Done(); it.Advance()) {
+    uint32_t dex_pc = it.CurrentDexPc();
+    Instruction* inst = const_cast<Instruction*>(&it.CurrentInstruction());
+
+    switch (inst->Opcode()) {
+      case Instruction::RETURN_VOID_NO_BARRIER:
+        inst->SetOpcode(Instruction::RETURN_VOID);
+        break;
+
+      case Instruction::NOP:
+        DecompileNop(inst, dex_pc);
+        break;
+
+      case Instruction::IGET_QUICK:
+        DecompileInstanceFieldAccess(inst, dex_pc, Instruction::IGET);
+        break;
+
+      case Instruction::IGET_WIDE_QUICK:
+        DecompileInstanceFieldAccess(inst, dex_pc, Instruction::IGET_WIDE);
+        break;
+
+      case Instruction::IGET_OBJECT_QUICK:
+        DecompileInstanceFieldAccess(inst, dex_pc, Instruction::IGET_OBJECT);
+        break;
+
+      case Instruction::IGET_BOOLEAN_QUICK:
+        DecompileInstanceFieldAccess(inst, dex_pc, Instruction::IGET_BOOLEAN);
+        break;
+
+      case Instruction::IGET_BYTE_QUICK:
+        DecompileInstanceFieldAccess(inst, dex_pc, Instruction::IGET_BYTE);
+        break;
+
+      case Instruction::IGET_CHAR_QUICK:
+        DecompileInstanceFieldAccess(inst, dex_pc, Instruction::IGET_CHAR);
+        break;
+
+      case Instruction::IGET_SHORT_QUICK:
+        DecompileInstanceFieldAccess(inst, dex_pc, Instruction::IGET_SHORT);
+        break;
+
+      case Instruction::IPUT_QUICK:
+        DecompileInstanceFieldAccess(inst, dex_pc, Instruction::IPUT);
+        break;
+
+      case Instruction::IPUT_BOOLEAN_QUICK:
+        DecompileInstanceFieldAccess(inst, dex_pc, Instruction::IPUT_BOOLEAN);
+        break;
+
+      case Instruction::IPUT_BYTE_QUICK:
+        DecompileInstanceFieldAccess(inst, dex_pc, Instruction::IPUT_BYTE);
+        break;
+
+      case Instruction::IPUT_CHAR_QUICK:
+        DecompileInstanceFieldAccess(inst, dex_pc, Instruction::IPUT_CHAR);
+        break;
+
+      case Instruction::IPUT_SHORT_QUICK:
+        DecompileInstanceFieldAccess(inst, dex_pc, Instruction::IPUT_SHORT);
+        break;
+
+      case Instruction::IPUT_WIDE_QUICK:
+        DecompileInstanceFieldAccess(inst, dex_pc, Instruction::IPUT_WIDE);
+        break;
+
+      case Instruction::IPUT_OBJECT_QUICK:
+        DecompileInstanceFieldAccess(inst, dex_pc, Instruction::IPUT_OBJECT);
+        break;
+
+      case Instruction::INVOKE_VIRTUAL_QUICK:
+        DecompileInvokeVirtual(inst, dex_pc, Instruction::INVOKE_VIRTUAL, false);
+        break;
+
+      case Instruction::INVOKE_VIRTUAL_RANGE_QUICK:
+        DecompileInvokeVirtual(inst, dex_pc, Instruction::INVOKE_VIRTUAL_RANGE, true);
+        break;
+
+      default:
+        break;
+    }
+  }
+
+  if (quickened_info_ptr_ != quickened_info_end_) {
+    LOG(ERROR) << "Failed to use all values in quickening info."
+               << " Actual: " << std::hex << quickened_info_ptr_
+               << " Expected: " << quickened_info_end_;
+    return false;
+  }
+
+  return true;
+}
+
+bool ArtDecompileDEX(const DexFile::CodeItem& code_item,
+                     const ArrayRef<const uint8_t>& quickened_info) {
+  DexDecompiler decompiler(code_item, quickened_info);
+  return decompiler.Decompile();
+}
+
+}  // namespace optimizer
+}  // namespace art
diff --git a/compiler/dex/dex_to_dex_decompiler.h b/compiler/dex/dex_to_dex_decompiler.h
new file mode 100644
index 0000000..5502ca2
--- /dev/null
+++ b/compiler/dex/dex_to_dex_decompiler.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_DEX_DEX_TO_DEX_DECOMPILER_H_
+#define ART_COMPILER_DEX_DEX_TO_DEX_DECOMPILER_H_
+
+#include "base/array_ref.h"
+#include "dex_file.h"
+
+namespace art {
+namespace optimizer {
+
+// "Decompile", that is unquicken, the code item provided, given the
+// associated quickening data.
+// TODO: code_item isn't really a const element, but changing it
+// to non-const has too many repercussions on the code base. We make it
+// consistent with DexToDexCompiler, but we should really change it to
+// DexFile::CodeItem*.
+bool ArtDecompileDEX(const DexFile::CodeItem& code_item,
+                     const ArrayRef<const uint8_t>& quickened_data);
+
+}  // namespace optimizer
+}  // namespace art
+
+#endif  // ART_COMPILER_DEX_DEX_TO_DEX_DECOMPILER_H_
diff --git a/compiler/dex/dex_to_dex_decompiler_test.cc b/compiler/dex/dex_to_dex_decompiler_test.cc
new file mode 100644
index 0000000..ea6c7a2
--- /dev/null
+++ b/compiler/dex/dex_to_dex_decompiler_test.cc
@@ -0,0 +1,136 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dex/dex_to_dex_decompiler.h"
+
+#include "class_linker.h"
+#include "compiler/common_compiler_test.h"
+#include "compiler/compiled_method.h"
+#include "compiler/driver/compiler_options.h"
+#include "compiler/driver/compiler_driver.h"
+#include "compiler_callbacks.h"
+#include "dex_file.h"
+#include "handle_scope-inl.h"
+#include "verifier/method_verifier-inl.h"
+#include "mirror/class_loader.h"
+#include "runtime.h"
+#include "thread.h"
+#include "scoped_thread_state_change-inl.h"
+
+namespace art {
+
+class DexToDexDecompilerTest : public CommonCompilerTest {
+ public:
+  void CompileAll(jobject class_loader) REQUIRES(!Locks::mutator_lock_) {
+    TimingLogger timings("CompilerDriverTest::CompileAll", false, false);
+    TimingLogger::ScopedTiming t(__FUNCTION__, &timings);
+    compiler_options_->boot_image_ = false;
+    compiler_options_->SetCompilerFilter(CompilerFilter::kInterpretOnly);
+    compiler_driver_->CompileAll(class_loader,
+                                 GetDexFiles(class_loader),
+                                 /* verifier_deps */ nullptr,
+                                 &timings);
+  }
+
+  void RunTest(const char* dex_name) {
+    Thread* self = Thread::Current();
+    // First load the original dex file.
+    jobject original_class_loader;
+    {
+      ScopedObjectAccess soa(self);
+      original_class_loader = LoadDex(dex_name);
+    }
+    const DexFile* original_dex_file = GetDexFiles(original_class_loader)[0];
+
+    // Load the dex file again and make it writable to quicken them.
+    jobject class_loader;
+    const DexFile* updated_dex_file = nullptr;
+    {
+      ScopedObjectAccess soa(self);
+      class_loader = LoadDex(dex_name);
+      updated_dex_file = GetDexFiles(class_loader)[0];
+      Runtime::Current()->GetClassLinker()->RegisterDexFile(
+          *updated_dex_file, soa.Decode<mirror::ClassLoader>(class_loader).Ptr());
+    }
+    // The dex files should be identical.
+    int cmp = memcmp(original_dex_file->Begin(),
+                     updated_dex_file->Begin(),
+                     updated_dex_file->Size());
+    ASSERT_EQ(0, cmp);
+
+    updated_dex_file->EnableWrite();
+    CompileAll(class_loader);
+    // The dex files should be different after quickening.
+    cmp = memcmp(original_dex_file->Begin(), updated_dex_file->Begin(), updated_dex_file->Size());
+    ASSERT_NE(0, cmp);
+
+    // Unquicken the dex file.
+    for (uint32_t i = 0; i < updated_dex_file->NumClassDefs(); ++i) {
+      const DexFile::ClassDef& class_def = updated_dex_file->GetClassDef(i);
+      const uint8_t* class_data = updated_dex_file->GetClassData(class_def);
+      if (class_data == nullptr) {
+        continue;
+      }
+      ClassDataItemIterator it(*updated_dex_file, class_data);
+      // Skip fields
+      while (it.HasNextStaticField()) {
+        it.Next();
+      }
+      while (it.HasNextInstanceField()) {
+        it.Next();
+      }
+
+      // Unquicken each method.
+      while (it.HasNextDirectMethod()) {
+        uint32_t method_idx = it.GetMemberIndex();
+        CompiledMethod* compiled_method =
+            compiler_driver_->GetCompiledMethod(MethodReference(updated_dex_file, method_idx));
+        ArrayRef<const uint8_t> table;
+        if (compiled_method != nullptr) {
+          table = compiled_method->GetVmapTable();
+        }
+        optimizer::ArtDecompileDEX(*it.GetMethodCodeItem(), table);
+        it.Next();
+      }
+      while (it.HasNextVirtualMethod()) {
+        uint32_t method_idx = it.GetMemberIndex();
+        CompiledMethod* compiled_method =
+            compiler_driver_->GetCompiledMethod(MethodReference(updated_dex_file, method_idx));
+        ArrayRef<const uint8_t> table;
+        if (compiled_method != nullptr) {
+          table = compiled_method->GetVmapTable();
+        }
+        optimizer::ArtDecompileDEX(*it.GetMethodCodeItem(), table);
+        it.Next();
+      }
+      DCHECK(!it.HasNext());
+    }
+
+    // Make sure after unquickening we go back to the same contents as the original dex file.
+    cmp = memcmp(original_dex_file->Begin(), updated_dex_file->Begin(), updated_dex_file->Size());
+    ASSERT_EQ(0, cmp);
+  }
+};
+
+TEST_F(DexToDexDecompilerTest, VerifierDeps) {
+  RunTest("VerifierDeps");
+}
+
+TEST_F(DexToDexDecompilerTest, DexToDexDecompiler) {
+  RunTest("DexToDexDecompiler");
+}
+
+}  // namespace art
diff --git a/compiler/dex/quick_compiler_callbacks.h b/compiler/dex/quick_compiler_callbacks.h
index 34fd88b..db0fdaa 100644
--- a/compiler/dex/quick_compiler_callbacks.h
+++ b/compiler/dex/quick_compiler_callbacks.h
@@ -18,6 +18,7 @@
 #define ART_COMPILER_DEX_QUICK_COMPILER_CALLBACKS_H_
 
 #include "compiler_callbacks.h"
+#include "verifier/verifier_deps.h"
 
 namespace art {
 
@@ -46,16 +47,16 @@
     }
 
     verifier::VerifierDeps* GetVerifierDeps() const OVERRIDE {
-      return verifier_deps_;
+      return verifier_deps_.get();
     }
 
-    void SetVerifierDeps(verifier::VerifierDeps* deps) {
-      verifier_deps_ = deps;
+    void SetVerifierDeps(verifier::VerifierDeps* deps) OVERRIDE {
+      verifier_deps_.reset(deps);
     }
 
   private:
     VerificationResults* const verification_results_;
-    verifier::VerifierDeps* verifier_deps_;
+    std::unique_ptr<verifier::VerifierDeps> verifier_deps_;
 };
 
 }  // namespace art
diff --git a/compiler/dex/verification_results.cc b/compiler/dex/verification_results.cc
index 511a787..669d8cd 100644
--- a/compiler/dex/verification_results.cc
+++ b/compiler/dex/verification_results.cc
@@ -23,6 +23,7 @@
 #include "driver/compiler_options.h"
 #include "thread.h"
 #include "thread-inl.h"
+#include "utils/atomic_method_ref_map-inl.h"
 #include "verified_method.h"
 #include "verifier/method_verifier-inl.h"
 
@@ -31,49 +32,72 @@
 VerificationResults::VerificationResults(const CompilerOptions* compiler_options)
     : compiler_options_(compiler_options),
       verified_methods_lock_("compiler verified methods lock"),
-      verified_methods_(),
-      rejected_classes_lock_("compiler rejected classes lock"),
-      rejected_classes_() {
-}
+      rejected_classes_lock_("compiler rejected classes lock") {}
 
 VerificationResults::~VerificationResults() {
-  Thread* self = Thread::Current();
-  {
-    WriterMutexLock mu(self, verified_methods_lock_);
-    STLDeleteValues(&verified_methods_);
-  }
+  WriterMutexLock mu(Thread::Current(), verified_methods_lock_);
+  STLDeleteValues(&verified_methods_);
+  atomic_verified_methods_.Visit([](const MethodReference& ref ATTRIBUTE_UNUSED,
+                                    const VerifiedMethod* method) {
+    delete method;
+  });
 }
 
 void VerificationResults::ProcessVerifiedMethod(verifier::MethodVerifier* method_verifier) {
   DCHECK(method_verifier != nullptr);
   MethodReference ref = method_verifier->GetMethodReference();
   bool compile = IsCandidateForCompilation(ref, method_verifier->GetAccessFlags());
-  const VerifiedMethod* verified_method = VerifiedMethod::Create(method_verifier, compile);
+  std::unique_ptr<const VerifiedMethod> verified_method(
+      VerifiedMethod::Create(method_verifier, compile));
   if (verified_method == nullptr) {
     // We'll punt this later.
     return;
   }
-
-  WriterMutexLock mu(Thread::Current(), verified_methods_lock_);
-  auto it = verified_methods_.find(ref);
-  if (it != verified_methods_.end()) {
+  AtomicMap::InsertResult result = atomic_verified_methods_.Insert(ref,
+                                                                   /*expected*/ nullptr,
+                                                                   verified_method.get());
+  const VerifiedMethod* existing = nullptr;
+  bool inserted;
+  if (result != AtomicMap::kInsertResultInvalidDexFile) {
+    inserted = (result == AtomicMap::kInsertResultSuccess);
+    if (!inserted) {
+      // Rare case.
+      CHECK(atomic_verified_methods_.Get(ref, &existing));
+      CHECK_NE(verified_method.get(), existing);
+    }
+  } else {
+    WriterMutexLock mu(Thread::Current(), verified_methods_lock_);
+    auto it = verified_methods_.find(ref);
+    inserted = it == verified_methods_.end();
+    if (inserted) {
+      verified_methods_.Put(ref, verified_method.get());
+      DCHECK(verified_methods_.find(ref) != verified_methods_.end());
+    } else {
+      existing = it->second;
+    }
+  }
+  if (inserted) {
+    // Successfully added, release the unique_ptr since we no longer have ownership.
+    DCHECK_EQ(GetVerifiedMethod(ref), verified_method.get());
+    verified_method.release();
+  } else {
     // TODO: Investigate why are we doing the work again for this method and try to avoid it.
     LOG(WARNING) << "Method processed more than once: " << ref.PrettyMethod();
     if (!Runtime::Current()->UseJitCompilation()) {
-      DCHECK_EQ(it->second->GetDevirtMap().size(), verified_method->GetDevirtMap().size());
-      DCHECK_EQ(it->second->GetSafeCastSet().size(), verified_method->GetSafeCastSet().size());
+      DCHECK_EQ(existing->GetDevirtMap().size(), verified_method->GetDevirtMap().size());
+      DCHECK_EQ(existing->GetSafeCastSet().size(), verified_method->GetSafeCastSet().size());
     }
-    // Delete the new verified method since there was already an existing one registered. It
-    // is unsafe to replace the existing one since the JIT may be using it to generate a
-    // native GC map.
-    delete verified_method;
-    return;
+    // Let the unique_ptr delete the new verified method since there was already an existing one
+    // registered. It is unsafe to replace the existing one since the JIT may be using it to
+    // generate a native GC map.
   }
-  verified_methods_.Put(ref, verified_method);
-  DCHECK(verified_methods_.find(ref) != verified_methods_.end());
 }
 
 const VerifiedMethod* VerificationResults::GetVerifiedMethod(MethodReference ref) {
+  const VerifiedMethod* ret = nullptr;
+  if (atomic_verified_methods_.Get(ref, &ret)) {
+    return ret;
+  }
   ReaderMutexLock mu(Thread::Current(), verified_methods_lock_);
   auto it = verified_methods_.find(ref);
   return (it != verified_methods_.end()) ? it->second : nullptr;
@@ -105,4 +129,22 @@
   return true;
 }
 
+void VerificationResults::AddDexFile(const DexFile* dex_file) {
+  atomic_verified_methods_.AddDexFile(dex_file);
+  WriterMutexLock mu(Thread::Current(), verified_methods_lock_);
+  // There can be some verified methods that are already registered for the dex_file since we set
+  // up well known classes earlier. Remove these and put them in the array so that we don't
+  // accidentally miss seeing them.
+  for (auto it = verified_methods_.begin(); it != verified_methods_.end(); ) {
+    MethodReference ref = it->first;
+    if (ref.dex_file == dex_file) {
+      CHECK(atomic_verified_methods_.Insert(ref, nullptr, it->second) ==
+          AtomicMap::kInsertResultSuccess);
+      it = verified_methods_.erase(it);
+    } else {
+      ++it;
+    }
+  }
+}
+
 }  // namespace art
diff --git a/compiler/dex/verification_results.h b/compiler/dex/verification_results.h
index 6afd1ab..ea38f4d 100644
--- a/compiler/dex/verification_results.h
+++ b/compiler/dex/verification_results.h
@@ -19,13 +19,14 @@
 
 #include <stdint.h>
 #include <set>
-#include <vector>
 
+#include "base/dchecked_vector.h"
 #include "base/macros.h"
 #include "base/mutex.h"
 #include "class_reference.h"
 #include "method_reference.h"
 #include "safe_map.h"
+#include "utils/atomic_method_ref_map.h"
 
 namespace art {
 
@@ -38,35 +39,44 @@
 
 // Used by CompilerCallbacks to track verification information from the Runtime.
 class VerificationResults {
-  public:
-    explicit VerificationResults(const CompilerOptions* compiler_options);
-    ~VerificationResults();
+ public:
+  explicit VerificationResults(const CompilerOptions* compiler_options);
+  ~VerificationResults();
 
-    void ProcessVerifiedMethod(verifier::MethodVerifier* method_verifier)
-        REQUIRES_SHARED(Locks::mutator_lock_)
-        REQUIRES(!verified_methods_lock_);
+  void ProcessVerifiedMethod(verifier::MethodVerifier* method_verifier)
+      REQUIRES_SHARED(Locks::mutator_lock_)
+      REQUIRES(!verified_methods_lock_);
 
-    const VerifiedMethod* GetVerifiedMethod(MethodReference ref)
-        REQUIRES(!verified_methods_lock_);
+  const VerifiedMethod* GetVerifiedMethod(MethodReference ref)
+      REQUIRES(!verified_methods_lock_);
 
-    void AddRejectedClass(ClassReference ref) REQUIRES(!rejected_classes_lock_);
-    bool IsClassRejected(ClassReference ref) REQUIRES(!rejected_classes_lock_);
+  void AddRejectedClass(ClassReference ref) REQUIRES(!rejected_classes_lock_);
+  bool IsClassRejected(ClassReference ref) REQUIRES(!rejected_classes_lock_);
 
-    bool IsCandidateForCompilation(MethodReference& method_ref,
-                                   const uint32_t access_flags);
+  bool IsCandidateForCompilation(MethodReference& method_ref, const uint32_t access_flags);
 
-  private:
-    const CompilerOptions* const compiler_options_;
+  // Add a dex file to enable using the atomic map.
+  void AddDexFile(const DexFile* dex_file) REQUIRES(!verified_methods_lock_);
 
-    // Verified methods.
-    typedef SafeMap<MethodReference, const VerifiedMethod*,
-        MethodReferenceComparator> VerifiedMethodMap;
-    ReaderWriterMutex verified_methods_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
-    VerifiedMethodMap verified_methods_ GUARDED_BY(verified_methods_lock_);
+ private:
+  // Verified methods. The method array is fixed to avoid needing a lock to extend it.
+  using AtomicMap = AtomicMethodRefMap<const VerifiedMethod*>;
+  using VerifiedMethodMap = SafeMap<MethodReference,
+                                    const VerifiedMethod*,
+                                    MethodReferenceComparator>;
 
-    // Rejected classes.
-    ReaderWriterMutex rejected_classes_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
-    std::set<ClassReference> rejected_classes_ GUARDED_BY(rejected_classes_lock_);
+  VerifiedMethodMap verified_methods_ GUARDED_BY(verified_methods_lock_);
+  const CompilerOptions* const compiler_options_;
+
+  // Dex2oat can add dex files to atomic_verified_methods_ to avoid locking when calling
+  // GetVerifiedMethod.
+  AtomicMap atomic_verified_methods_;
+
+  ReaderWriterMutex verified_methods_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
+
+  // Rejected classes.
+  ReaderWriterMutex rejected_classes_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
+  std::set<ClassReference> rejected_classes_ GUARDED_BY(rejected_classes_lock_);
 };
 
 }  // namespace art
diff --git a/compiler/dex/verified_method.cc b/compiler/dex/verified_method.cc
index e19fb7b..1bdace9 100644
--- a/compiler/dex/verified_method.cc
+++ b/compiler/dex/verified_method.cc
@@ -230,7 +230,7 @@
         const verifier::RegType& reg_type(line->GetRegisterType(method_verifier,
                                                                 inst->VRegA_21c()));
         const verifier::RegType& cast_type =
-            method_verifier->ResolveCheckedClass(inst->VRegB_21c());
+            method_verifier->ResolveCheckedClass(dex::TypeIndex(inst->VRegB_21c()));
         is_safe_cast = cast_type.IsStrictlyAssignableFrom(reg_type, method_verifier);
       } else {
         const verifier::RegType& array_type(line->GetRegisterType(method_verifier,
diff --git a/compiler/driver/compiler_driver-inl.h b/compiler/driver/compiler_driver-inl.h
index 9711516..f056dd3 100644
--- a/compiler/driver/compiler_driver-inl.h
+++ b/compiler/driver/compiler_driver-inl.h
@@ -38,7 +38,7 @@
 
 inline mirror::Class* CompilerDriver::ResolveClass(
     const ScopedObjectAccess& soa, Handle<mirror::DexCache> dex_cache,
-    Handle<mirror::ClassLoader> class_loader, uint16_t cls_index,
+    Handle<mirror::ClassLoader> class_loader, dex::TypeIndex cls_index,
     const DexCompilationUnit* mUnit) {
   DCHECK_EQ(dex_cache->GetDexFile(), mUnit->GetDexFile());
   DCHECK_EQ(class_loader.Get(), GetClassLoader(soa, mUnit));
@@ -141,7 +141,7 @@
     mirror::Class* referrer_class,
     ArtMember* resolved_member,
     uint16_t member_idx,
-    uint32_t* storage_index) {
+    dex::TypeIndex* storage_index) {
   DCHECK(resolved_member->IsStatic());
   if (LIKELY(referrer_class != nullptr)) {
     ObjPtr<mirror::Class> members_class = resolved_member->GetDeclaringClass();
@@ -156,7 +156,7 @@
       // TODO: for images we can elide the static storage base null check
       // if we know there's a non-null entry in the image
       const DexFile* dex_file = dex_cache->GetDexFile();
-      uint32_t storage_idx = DexFile::kDexNoIndex;
+      dex::TypeIndex storage_idx(DexFile::kDexNoIndex16);
       if (LIKELY(members_class->GetDexCache() == dex_cache)) {
         // common case where the dex cache of both the referrer and the member are the same,
         // no need to search the dex file
@@ -166,27 +166,27 @@
         // of the class mentioned in the dex file and there is no dex cache entry.
         storage_idx = resolved_member->GetDeclaringClass()->FindTypeIndexInOtherDexFile(*dex_file);
       }
-      if (storage_idx != DexFile::kDexNoIndex) {
+      if (storage_idx.IsValid()) {
         *storage_index = storage_idx;
         return std::make_pair(true, !resolved_member->IsFinal());
       }
     }
   }
   // Conservative defaults.
-  *storage_index = DexFile::kDexNoIndex;
+  *storage_index = dex::TypeIndex(DexFile::kDexNoIndex16);
   return std::make_pair(false, false);
 }
 
 inline std::pair<bool, bool> CompilerDriver::IsFastStaticField(
     mirror::DexCache* dex_cache, mirror::Class* referrer_class,
-    ArtField* resolved_field, uint16_t field_idx, uint32_t* storage_index) {
+    ArtField* resolved_field, uint16_t field_idx, dex::TypeIndex* storage_index) {
   return IsClassOfStaticMemberAvailableToReferrer(
       dex_cache, referrer_class, resolved_field, field_idx, storage_index);
 }
 
 inline bool CompilerDriver::IsClassOfStaticMethodAvailableToReferrer(
     mirror::DexCache* dex_cache, mirror::Class* referrer_class,
-    ArtMethod* resolved_method, uint16_t method_idx, uint32_t* storage_index) {
+    ArtMethod* resolved_method, uint16_t method_idx, dex::TypeIndex* storage_index) {
   std::pair<bool, bool> result = IsClassOfStaticMemberAvailableToReferrer(
       dex_cache, referrer_class, resolved_method, method_idx, storage_index);
   // Only the first member of `result` is meaningful, as there is no
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index 1b87725..6b62110 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -39,11 +39,13 @@
 #include "compiled_class.h"
 #include "compiled_method.h"
 #include "compiler.h"
+#include "compiler_callbacks.h"
 #include "compiler_driver-inl.h"
 #include "dex_compilation_unit.h"
 #include "dex_file-inl.h"
 #include "dex_instruction-inl.h"
 #include "dex/dex_to_dex_compiler.h"
+#include "dex/dex_to_dex_decompiler.h"
 #include "dex/verification_results.h"
 #include "dex/verified_method.h"
 #include "driver/compiler_options.h"
@@ -69,8 +71,10 @@
 #include "thread_pool.h"
 #include "trampolines/trampoline_compiler.h"
 #include "transaction.h"
+#include "utils/atomic_method_ref_map-inl.h"
 #include "utils/dex_cache_arrays_layout-inl.h"
 #include "utils/swap_space.h"
+#include "vdex_file.h"
 #include "verifier/method_verifier.h"
 #include "verifier/method_verifier-inl.h"
 #include "verifier/verifier_log_mode.h"
@@ -284,8 +288,6 @@
       instruction_set_features_(instruction_set_features),
       requires_constructor_barrier_lock_("constructor barrier lock"),
       compiled_classes_lock_("compiled classes lock"),
-      compiled_methods_lock_("compiled method lock"),
-      compiled_methods_(MethodTable::key_compare()),
       non_relative_linker_patch_count_(0u),
       image_classes_(image_classes),
       classes_to_compile_(compiled_classes),
@@ -323,12 +325,12 @@
     MutexLock mu(self, compiled_classes_lock_);
     STLDeleteValues(&compiled_classes_);
   }
-  {
-    MutexLock mu(self, compiled_methods_lock_);
-    for (auto& pair : compiled_methods_) {
-      CompiledMethod::ReleaseSwapAllocatedCompiledMethod(this, pair.second);
+  compiled_methods_.Visit([this](const MethodReference& ref ATTRIBUTE_UNUSED,
+                                 CompiledMethod* method) {
+    if (method != nullptr) {
+      CompiledMethod::ReleaseSwapAllocatedCompiledMethod(this, method);
     }
-  }
+  });
   compiler_->UnInit();
 }
 
@@ -431,6 +433,72 @@
   FreeThreadPools();
 }
 
+// In-place unquicken the given `dex_files` based on `quickening_info`.
+static void Unquicken(const std::vector<const DexFile*>& dex_files,
+                      const ArrayRef<const uint8_t>& quickening_info) {
+  const uint8_t* quickening_info_ptr = quickening_info.data();
+  const uint8_t* const quickening_info_end = quickening_info.data() + quickening_info.size();
+  for (const DexFile* dex_file : dex_files) {
+    for (uint32_t i = 0; i < dex_file->NumClassDefs(); ++i) {
+      const DexFile::ClassDef& class_def = dex_file->GetClassDef(i);
+      const uint8_t* class_data = dex_file->GetClassData(class_def);
+      if (class_data == nullptr) {
+        continue;
+      }
+      ClassDataItemIterator it(*dex_file, class_data);
+      // Skip fields
+      while (it.HasNextStaticField()) {
+        it.Next();
+      }
+      while (it.HasNextInstanceField()) {
+        it.Next();
+      }
+
+      // Unquicken each method.
+      while (it.HasNextDirectMethod()) {
+        const DexFile::CodeItem* code_item = it.GetMethodCodeItem();
+        if (code_item != nullptr) {
+          uint32_t quickening_size = *reinterpret_cast<const uint32_t*>(quickening_info_ptr);
+          quickening_info_ptr += sizeof(uint32_t);
+          optimizer::ArtDecompileDEX(
+              *code_item, ArrayRef<const uint8_t>(quickening_info_ptr, quickening_size));
+          quickening_info_ptr += quickening_size;
+        }
+        it.Next();
+      }
+
+      while (it.HasNextVirtualMethod()) {
+        const DexFile::CodeItem* code_item = it.GetMethodCodeItem();
+        if (code_item != nullptr) {
+          uint32_t quickening_size = *reinterpret_cast<const uint32_t*>(quickening_info_ptr);
+          quickening_info_ptr += sizeof(uint32_t);
+          optimizer::ArtDecompileDEX(
+              *code_item, ArrayRef<const uint8_t>(quickening_info_ptr, quickening_size));
+          quickening_info_ptr += quickening_size;
+        }
+        it.Next();
+      }
+      DCHECK(!it.HasNext());
+    }
+  }
+  DCHECK_EQ(quickening_info_ptr, quickening_info_end) << "Failed to use all quickening info";
+}
+
+void CompilerDriver::CompileAll(jobject class_loader,
+                                const std::vector<const DexFile*>& dex_files,
+                                VdexFile* vdex_file,
+                                TimingLogger* timings) {
+  if (vdex_file != nullptr) {
+    // TODO: we unquicken unconditionnally, as we don't know
+    // if the boot image has changed. How exactly we'll know is under
+    // experimentation.
+    Unquicken(dex_files, vdex_file->GetQuickeningInfo());
+    Runtime::Current()->GetCompilerCallbacks()->SetVerifierDeps(
+        new verifier::VerifierDeps(dex_files, vdex_file->GetVerifierDepsData()));
+  }
+  CompileAll(class_loader, dex_files, timings);
+}
+
 static optimizer::DexToDexCompilationLevel GetDexToDexCompilationLevel(
     Thread* self, const CompilerDriver& driver, Handle<mirror::ClassLoader> class_loader,
     const DexFile& dex_file, const DexFile::ClassDef& class_def)
@@ -506,8 +574,7 @@
                           const DexFile& dex_file,
                           optimizer::DexToDexCompilationLevel dex_to_dex_compilation_level,
                           bool compilation_enabled,
-                          Handle<mirror::DexCache> dex_cache)
-    REQUIRES(!driver->compiled_methods_lock_) {
+                          Handle<mirror::DexCache> dex_cache) {
   DCHECK(driver != nullptr);
   CompiledMethod* compiled_method = nullptr;
   uint64_t start_ns = kTimeCompileMethod ? NanoTime() : 0;
@@ -536,14 +603,9 @@
               : optimizer::DexToDexCompilationLevel::kRequired);
     }
   } else if ((access_flags & kAccNative) != 0) {
-    const InstructionSet instruction_set = driver->GetInstructionSet();
-    const bool use_generic_jni =
-        // Are we extracting only and have support for generic JNI down calls?
-        (!driver->GetCompilerOptions().IsJniCompilationEnabled() &&
-             InstructionSetHasGenericJniStub(instruction_set)) ||
-        // Always punt to generic JNI for MIPS because of no support for @CriticalNative. b/31743474
-        (instruction_set == kMips || instruction_set == kMips64);
-    if (use_generic_jni) {
+    // Are we extracting only and have support for generic JNI down calls?
+    if (!driver->GetCompilerOptions().IsJniCompilationEnabled() &&
+        InstructionSetHasGenericJniStub(driver->GetInstructionSet())) {
       // Leaving this empty will trigger the generic JNI version
     } else {
       // Look-up the ArtMethod associated with this code_item (if any)
@@ -778,9 +840,9 @@
     switch (inst->Opcode()) {
       case Instruction::CONST_STRING:
       case Instruction::CONST_STRING_JUMBO: {
-        uint32_t string_index = (inst->Opcode() == Instruction::CONST_STRING)
+        dex::StringIndex string_index((inst->Opcode() == Instruction::CONST_STRING)
             ? inst->VRegB_21c()
-            : inst->VRegB_31c();
+            : inst->VRegB_31c());
         mirror::String* string = class_linker->ResolveString(dex_file, string_index, dex_cache);
         CHECK(string != nullptr) << "Could not allocate a string when forcing determinism";
         break;
@@ -876,6 +938,13 @@
                                 TimingLogger* timings) {
   CheckThreadPools();
 
+  for (const DexFile* dex_file : dex_files) {
+    // Can be already inserted if the caller is CompileOne. This happens for gtests.
+    if (!compiled_methods_.HaveDexFile(dex_file)) {
+      compiled_methods_.AddDexFile(dex_file);
+    }
+  }
+
   LoadImageClasses(timings);
   VLOG(compiler) << "LoadImageClasses: " << GetMemoryUsageString(false);
 
@@ -970,11 +1039,12 @@
     return true;
   }
   DCHECK(profile_compilation_info_ != nullptr);
-  bool result = profile_compilation_info_->ContainsClass(dex_file, class_idx);
+  const DexFile::ClassDef& class_def = dex_file.GetClassDef(class_idx);
+  dex::TypeIndex type_idx = class_def.class_idx_;
+  bool result = profile_compilation_info_->ContainsClass(dex_file, type_idx);
   if (kDebugProfileGuidedCompilation) {
-    LOG(INFO) << "[ProfileGuidedCompilation] "
-        << (result ? "Verified" : "Skipped") << " method:"
-        << dex_file.GetClassDescriptor(dex_file.GetClassDef(class_idx));
+    LOG(INFO) << "[ProfileGuidedCompilation] " << (result ? "Verified" : "Skipped") << " method:"
+        << dex_file.GetClassDescriptor(class_def);
   }
   return result;
 }
@@ -982,7 +1052,7 @@
 class ResolveCatchBlockExceptionsClassVisitor : public ClassVisitor {
  public:
   explicit ResolveCatchBlockExceptionsClassVisitor(
-      std::set<std::pair<uint16_t, const DexFile*>>& exceptions_to_resolve)
+      std::set<std::pair<dex::TypeIndex, const DexFile*>>& exceptions_to_resolve)
      : exceptions_to_resolve_(exceptions_to_resolve) {}
 
   virtual bool operator()(ObjPtr<mirror::Class> c) OVERRIDE REQUIRES_SHARED(Locks::mutator_lock_) {
@@ -1013,8 +1083,8 @@
         has_catch_all = true;
       }
       for (int32_t j = 0; j < encoded_catch_handler_size; j++) {
-        uint16_t encoded_catch_handler_handlers_type_idx =
-            DecodeUnsignedLeb128(&encoded_catch_handler_list);
+        dex::TypeIndex encoded_catch_handler_handlers_type_idx =
+            dex::TypeIndex(DecodeUnsignedLeb128(&encoded_catch_handler_list));
         // Add to set of types to resolve if not already in the dex cache resolved types
         if (!method_handle->IsResolvedTypeIdx(encoded_catch_handler_handlers_type_idx,
                                               pointer_size)) {
@@ -1031,7 +1101,7 @@
     }
   }
 
-  std::set<std::pair<uint16_t, const DexFile*>>& exceptions_to_resolve_;
+  std::set<std::pair<dex::TypeIndex, const DexFile*>>& exceptions_to_resolve_;
 };
 
 class RecordImageClassesVisitor : public ClassVisitor {
@@ -1079,7 +1149,7 @@
   // Resolve exception classes referenced by the loaded classes. The catch logic assumes
   // exceptions are resolved by the verifier when there is a catch block in an interested method.
   // Do this here so that exception classes appear to have been specified image classes.
-  std::set<std::pair<uint16_t, const DexFile*>> unresolved_exception_types;
+  std::set<std::pair<dex::TypeIndex, const DexFile*>> unresolved_exception_types;
   StackHandleScope<1> hs(self);
   Handle<mirror::Class> java_lang_Throwable(
       hs.NewHandle(class_linker->FindSystemClass(self, "Ljava/lang/Throwable;")));
@@ -1087,8 +1157,8 @@
     unresolved_exception_types.clear();
     ResolveCatchBlockExceptionsClassVisitor visitor(unresolved_exception_types);
     class_linker->VisitClasses(&visitor);
-    for (const std::pair<uint16_t, const DexFile*>& exception_type : unresolved_exception_types) {
-      uint16_t exception_type_idx = exception_type.first;
+    for (const auto& exception_type : unresolved_exception_types) {
+      dex::TypeIndex exception_type_idx = exception_type.first;
       const DexFile* dex_file = exception_type.second;
       StackHandleScope<2> hs2(self);
       Handle<mirror::DexCache> dex_cache(hs2.NewHandle(class_linker->RegisterDexFile(*dex_file,
@@ -1339,7 +1409,7 @@
 
 bool CompilerDriver::CanAccessTypeWithoutChecks(uint32_t referrer_idx,
                                                 Handle<mirror::DexCache> dex_cache,
-                                                uint32_t type_idx) {
+                                                dex::TypeIndex type_idx) {
   // Get type from dex cache assuming it was populated by the verifier
   mirror::Class* resolved_class = dex_cache->GetResolvedType(type_idx);
   if (resolved_class == nullptr) {
@@ -1368,7 +1438,7 @@
 
 bool CompilerDriver::CanAccessInstantiableTypeWithoutChecks(uint32_t referrer_idx,
                                                             Handle<mirror::DexCache> dex_cache,
-                                                            uint32_t type_idx,
+                                                            dex::TypeIndex type_idx,
                                                             bool* finalizable) {
   // Get type from dex cache assuming it was populated by the verifier.
   mirror::Class* resolved_class = dex_cache->GetResolvedType(type_idx);
@@ -1522,7 +1592,7 @@
 
   if (!use_dex_cache) {
     bool method_in_image = false;
-    const std::vector<gc::space::ImageSpace*> image_spaces = heap->GetBootImageSpaces();
+    const std::vector<gc::space::ImageSpace*>& image_spaces = heap->GetBootImageSpaces();
     for (gc::space::ImageSpace* image_space : image_spaces) {
       const auto& method_section = image_space->GetImageHeader().GetMethodsSection();
       if (method_section.Contains(reinterpret_cast<uint8_t*>(method) - image_space->Begin())) {
@@ -1862,7 +1932,7 @@
  public:
   explicit ResolveTypeVisitor(const ParallelCompilationManager* manager) : manager_(manager) {
   }
-  virtual void Visit(size_t type_idx) OVERRIDE REQUIRES(!Locks::mutator_lock_) {
+  void Visit(size_t type_idx) OVERRIDE REQUIRES(!Locks::mutator_lock_) {
   // Class derived values are more complicated, they require the linker and loader.
     ScopedObjectAccess soa(Thread::Current());
     ClassLinker* class_linker = manager_->GetClassLinker();
@@ -1873,7 +1943,10 @@
     Handle<mirror::DexCache> dex_cache(hs.NewHandle(class_linker->RegisterDexFile(
         dex_file,
         class_loader.Get())));
-    mirror::Class* klass = class_linker->ResolveType(dex_file, type_idx, dex_cache, class_loader);
+    mirror::Class* klass = class_linker->ResolveType(dex_file,
+                                                     dex::TypeIndex(type_idx),
+                                                     dex_cache,
+                                                     class_loader);
 
     if (klass == nullptr) {
       soa.Self()->AssertPendingException();
@@ -1932,21 +2005,87 @@
   }
 }
 
-void CompilerDriver::Verify(jobject class_loader,
+void CompilerDriver::Verify(jobject jclass_loader,
                             const std::vector<const DexFile*>& dex_files,
                             TimingLogger* timings) {
+  verifier::VerifierDeps* verifier_deps =
+      Runtime::Current()->GetCompilerCallbacks()->GetVerifierDeps();
+  // If there is an existing `VerifierDeps`, try to use it for fast verification.
+  if (verifier_deps != nullptr) {
+    TimingLogger::ScopedTiming t("Fast Verify", timings);
+    ScopedObjectAccess soa(Thread::Current());
+    StackHandleScope<2> hs(soa.Self());
+    Handle<mirror::ClassLoader> class_loader(
+        hs.NewHandle(soa.Decode<mirror::ClassLoader>(jclass_loader)));
+    MutableHandle<mirror::Class> cls(hs.NewHandle<mirror::Class>(nullptr));
+    ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+    if (verifier_deps->ValidateDependencies(class_loader, soa.Self())) {
+      // We successfully validated the dependencies, now update class status
+      // of verified classes. Note that the dependencies also record which classes
+      // could not be fully verified; we could try again, but that would hurt verification
+      // time. So instead we assume these classes still need to be verified at
+      // runtime.
+      for (const DexFile* dex_file : dex_files) {
+        // Fetch the list of unverified classes and turn it into a set for faster
+        // lookups.
+        const std::vector<dex::TypeIndex>& unverified_classes =
+            verifier_deps->GetUnverifiedClasses(*dex_file);
+        std::set<dex::TypeIndex> set(unverified_classes.begin(), unverified_classes.end());
+        for (uint32_t i = 0; i < dex_file->NumClassDefs(); ++i) {
+          const DexFile::ClassDef& class_def = dex_file->GetClassDef(i);
+          const char* descriptor = dex_file->GetClassDescriptor(class_def);
+          cls.Assign(class_linker->FindClass(soa.Self(), descriptor, class_loader));
+          if (cls.Get() == nullptr) {
+            CHECK(soa.Self()->IsExceptionPending());
+            soa.Self()->ClearException();
+          } else if (set.find(class_def.class_idx_) == set.end()) {
+            ObjectLock<mirror::Class> lock(soa.Self(), cls);
+            mirror::Class::SetStatus(cls, mirror::Class::kStatusVerified, soa.Self());
+          }
+        }
+      }
+      return;
+    }
+  }
+
+  // If there is no existing `verifier_deps` (because of non-existing vdex), or
+  // the existing `verifier_deps` is not valid anymore, create a new one for
+  // non boot image compilation. The verifier will need it to record the new dependencies.
+  // Then dex2oat can update the vdex file with these new dependencies.
+  if (!GetCompilerOptions().IsBootImage()) {
+    // Create the main VerifierDeps, and set it to this thread.
+    verifier_deps = new verifier::VerifierDeps(dex_files);
+    Runtime::Current()->GetCompilerCallbacks()->SetVerifierDeps(verifier_deps);
+    Thread::Current()->SetVerifierDeps(verifier_deps);
+    // Create per-thread VerifierDeps to avoid contention on the main one.
+    // We will merge them after verification.
+    for (ThreadPoolWorker* worker : parallel_thread_pool_->GetWorkers()) {
+      worker->GetThread()->SetVerifierDeps(new verifier::VerifierDeps(dex_files));
+    }
+  }
   // Note: verification should not be pulling in classes anymore when compiling the boot image,
   //       as all should have been resolved before. As such, doing this in parallel should still
   //       be deterministic.
   for (const DexFile* dex_file : dex_files) {
     CHECK(dex_file != nullptr);
-    VerifyDexFile(class_loader,
+    VerifyDexFile(jclass_loader,
                   *dex_file,
                   dex_files,
                   parallel_thread_pool_.get(),
                   parallel_thread_count_,
                   timings);
   }
+
+  if (!GetCompilerOptions().IsBootImage()) {
+    // Merge all VerifierDeps into the main one.
+    for (ThreadPoolWorker* worker : parallel_thread_pool_->GetWorkers()) {
+      verifier::VerifierDeps* thread_deps = worker->GetThread()->GetVerifierDeps();
+      worker->GetThread()->SetVerifierDeps(nullptr);
+      verifier_deps->MergeWith(*thread_deps, dex_files);;
+      delete thread_deps;
+    }
+    Thread::Current()->SetVerifierDeps(nullptr);
+  }
 }
 
 class VerifyClassVisitor : public CompilationVisitor {
@@ -2482,30 +2621,15 @@
                                        size_t non_relative_linker_patch_count) {
   DCHECK(GetCompiledMethod(method_ref) == nullptr)
       << method_ref.dex_file->PrettyMethod(method_ref.dex_method_index);
-  {
-    MutexLock mu(Thread::Current(), compiled_methods_lock_);
-    compiled_methods_.Put(method_ref, compiled_method);
-    non_relative_linker_patch_count_ += non_relative_linker_patch_count;
-  }
+  MethodTable::InsertResult result = compiled_methods_.Insert(method_ref,
+                                                              /*expected*/ nullptr,
+                                                              compiled_method);
+  CHECK(result == MethodTable::kInsertResultSuccess);
+  non_relative_linker_patch_count_.FetchAndAddRelaxed(non_relative_linker_patch_count);
   DCHECK(GetCompiledMethod(method_ref) != nullptr)
       << method_ref.dex_file->PrettyMethod(method_ref.dex_method_index);
 }
 
-void CompilerDriver::RemoveCompiledMethod(const MethodReference& method_ref) {
-  CompiledMethod* compiled_method = nullptr;
-  {
-    MutexLock mu(Thread::Current(), compiled_methods_lock_);
-    auto it = compiled_methods_.find(method_ref);
-    if (it != compiled_methods_.end()) {
-      compiled_method = it->second;
-      compiled_methods_.erase(it);
-    }
-  }
-  if (compiled_method != nullptr) {
-    CompiledMethod::ReleaseSwapAllocatedCompiledMethod(this, compiled_method);
-  }
-}
-
 CompiledClass* CompilerDriver::GetCompiledClass(ClassReference ref) const {
   MutexLock mu(Thread::Current(), compiled_classes_lock_);
   ClassTable::const_iterator it = compiled_classes_.find(ref);
@@ -2544,13 +2668,9 @@
 }
 
 CompiledMethod* CompilerDriver::GetCompiledMethod(MethodReference ref) const {
-  MutexLock mu(Thread::Current(), compiled_methods_lock_);
-  MethodTable::const_iterator it = compiled_methods_.find(ref);
-  if (it == compiled_methods_.end()) {
-    return nullptr;
-  }
-  CHECK(it->second != nullptr);
-  return it->second;
+  CompiledMethod* compiled_method = nullptr;
+  compiled_methods_.Get(ref, &compiled_method);
+  return compiled_method;
 }
 
 bool CompilerDriver::IsMethodVerifiedWithoutFailures(uint32_t method_idx,
@@ -2579,8 +2699,7 @@
 }
 
 size_t CompilerDriver::GetNonRelativeLinkerPatchCount() const {
-  MutexLock mu(Thread::Current(), compiled_methods_lock_);
-  return non_relative_linker_patch_count_;
+  return non_relative_linker_patch_count_.LoadRelaxed();
 }
 
 void CompilerDriver::SetRequiresConstructorBarrier(Thread* self,
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index 4a48f9c..cc50197 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -31,6 +31,7 @@
 #include "class_reference.h"
 #include "compiler.h"
 #include "dex_file.h"
+#include "dex_file_types.h"
 #include "driver/compiled_method_storage.h"
 #include "jit/offline_profiling_info.h"
 #include "invoke_type.h"
@@ -40,6 +41,7 @@
 #include "runtime.h"
 #include "safe_map.h"
 #include "thread_pool.h"
+#include "utils/atomic_method_ref_map.h"
 #include "utils/dex_cache_arrays_layout.h"
 
 namespace art {
@@ -67,6 +69,7 @@
 using SwapSrcMap = SrcMap<SwapAllocator<SrcMapElem>>;
 template<class T> class Handle;
 class TimingLogger;
+class VdexFile;
 class VerificationResults;
 class VerifiedMethod;
 
@@ -120,10 +123,16 @@
                   TimingLogger* timings)
       REQUIRES(!Locks::mutator_lock_, !compiled_classes_lock_, !dex_to_dex_references_lock_);
 
+  void CompileAll(jobject class_loader,
+                  const std::vector<const DexFile*>& dex_files,
+                  VdexFile* vdex_file,
+                  TimingLogger* timings)
+      REQUIRES(!Locks::mutator_lock_, !compiled_classes_lock_, !dex_to_dex_references_lock_);
+
   // Compile a single Method.
   void CompileOne(Thread* self, ArtMethod* method, TimingLogger* timings)
       REQUIRES_SHARED(Locks::mutator_lock_)
-      REQUIRES(!compiled_methods_lock_, !compiled_classes_lock_, !dex_to_dex_references_lock_);
+      REQUIRES(!compiled_classes_lock_, !dex_to_dex_references_lock_);
 
   VerificationResults* GetVerificationResults() const {
     DCHECK(Runtime::Current()->IsAotCompiler());
@@ -160,18 +169,12 @@
   CompiledClass* GetCompiledClass(ClassReference ref) const
       REQUIRES(!compiled_classes_lock_);
 
-  CompiledMethod* GetCompiledMethod(MethodReference ref) const
-      REQUIRES(!compiled_methods_lock_);
-  size_t GetNonRelativeLinkerPatchCount() const
-      REQUIRES(!compiled_methods_lock_);
-
+  CompiledMethod* GetCompiledMethod(MethodReference ref) const;
+  size_t GetNonRelativeLinkerPatchCount() const;
   // Add a compiled method.
   void AddCompiledMethod(const MethodReference& method_ref,
                          CompiledMethod* const compiled_method,
-                         size_t non_relative_linker_patch_count)
-      REQUIRES(!compiled_methods_lock_);
-  // Remove and delete a compiled method.
-  void RemoveCompiledMethod(const MethodReference& method_ref) REQUIRES(!compiled_methods_lock_);
+                         size_t non_relative_linker_patch_count);
 
   void SetRequiresConstructorBarrier(Thread* self,
                                      const DexFile* dex_file,
@@ -186,14 +189,14 @@
   // Are runtime access checks necessary in the compiled code?
   bool CanAccessTypeWithoutChecks(uint32_t referrer_idx,
                                   Handle<mirror::DexCache> dex_cache,
-                                  uint32_t type_idx)
+                                  dex::TypeIndex type_idx)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Are runtime access and instantiable checks necessary in the code?
   // out_is_finalizable is set to whether the type is finalizable.
   bool CanAccessInstantiableTypeWithoutChecks(uint32_t referrer_idx,
                                               Handle<mirror::DexCache> dex_cache,
-                                              uint32_t type_idx,
+                                              dex::TypeIndex type_idx,
                                               bool* out_is_finalizable)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
@@ -205,7 +208,7 @@
 
   mirror::Class* ResolveClass(
       const ScopedObjectAccess& soa, Handle<mirror::DexCache> dex_cache,
-      Handle<mirror::ClassLoader> class_loader, uint16_t type_index,
+      Handle<mirror::ClassLoader> class_loader, dex::TypeIndex type_index,
       const DexCompilationUnit* mUnit)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
@@ -232,9 +235,11 @@
 
   // Can we fast-path an SGET/SPUT access to a static field? If yes, compute the type index
   // of the declaring class in the referrer's dex file.
-  std::pair<bool, bool> IsFastStaticField(
-      mirror::DexCache* dex_cache, mirror::Class* referrer_class,
-      ArtField* resolved_field, uint16_t field_idx, uint32_t* storage_index)
+  std::pair<bool, bool> IsFastStaticField(mirror::DexCache* dex_cache,
+                                          mirror::Class* referrer_class,
+                                          ArtField* resolved_field,
+                                          uint16_t field_idx,
+                                          dex::TypeIndex* storage_index)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Return whether the declaring class of `resolved_method` is
@@ -246,7 +251,7 @@
                                                 mirror::Class* referrer_class,
                                                 ArtMethod* resolved_method,
                                                 uint16_t method_idx,
-                                                uint32_t* storage_index)
+                                                dex::TypeIndex* storage_index)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Resolve a method. Returns null on failure, including incompatible class change.
@@ -393,7 +398,7 @@
                                                                  mirror::Class* referrer_class,
                                                                  ArtMember* resolved_member,
                                                                  uint16_t member_idx,
-                                                                 uint32_t* storage_index)
+                                                                 dex::TypeIndex* storage_index)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Can `referrer_class` access the resolved `member`?
@@ -438,6 +443,7 @@
   void Verify(jobject class_loader,
               const std::vector<const DexFile*>& dex_files,
               TimingLogger* timings);
+
   void VerifyDexFile(jobject class_loader,
                      const DexFile& dex_file,
                      const std::vector<const DexFile*>& dex_files,
@@ -508,18 +514,15 @@
   mutable Mutex compiled_classes_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
   ClassTable compiled_classes_ GUARDED_BY(compiled_classes_lock_);
 
-  typedef SafeMap<const MethodReference, CompiledMethod*, MethodReferenceComparator> MethodTable;
-
- public:
-  // Lock is public so that non-members can have lock annotations.
-  mutable Mutex compiled_methods_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
+  typedef AtomicMethodRefMap<CompiledMethod*> MethodTable;
 
  private:
   // All method references that this compiler has compiled.
-  MethodTable compiled_methods_ GUARDED_BY(compiled_methods_lock_);
+  MethodTable compiled_methods_;
+
   // Number of non-relative patches in all compiled methods. These patches need space
   // in the .oat_patches ELF section if requested in the compiler options.
-  size_t non_relative_linker_patch_count_ GUARDED_BY(compiled_methods_lock_);
+  Atomic<size_t> non_relative_linker_patch_count_;
 
   // If image_ is true, specifies the classes that will be included in the image.
   // Note if image_classes_ is null, all classes are included in the image.
@@ -579,6 +582,7 @@
   const BitVector* current_dex_to_dex_methods_;
 
   friend class CompileClassVisitor;
+  friend class DexToDexDecompilerTest;
   friend class verifier::VerifierDepsTest;
   DISALLOW_COPY_AND_ASSIGN(CompilerDriver);
 };
diff --git a/compiler/driver/compiler_driver_test.cc b/compiler/driver/compiler_driver_test.cc
index 845028d..12684c0 100644
--- a/compiler/driver/compiler_driver_test.cc
+++ b/compiler/driver/compiler_driver_test.cc
@@ -24,6 +24,7 @@
 #include "class_linker-inl.h"
 #include "common_compiler_test.h"
 #include "dex_file.h"
+#include "dex_file_types.h"
 #include "gc/heap.h"
 #include "mirror/class-inl.h"
 #include "mirror/class_loader.h"
@@ -43,6 +44,7 @@
     TimingLogger::ScopedTiming t(__FUNCTION__, &timings);
     compiler_driver_->CompileAll(class_loader,
                                  GetDexFiles(class_loader),
+                                 /* verifier_deps */ nullptr,
                                  &timings);
     t.NewTiming("MakeAllExecutable");
     MakeAllExecutable(class_loader);
@@ -109,14 +111,14 @@
   ObjPtr<mirror::DexCache> dex_cache = class_linker_->FindDexCache(soa.Self(), dex);
   EXPECT_EQ(dex.NumStringIds(), dex_cache->NumStrings());
   for (size_t i = 0; i < dex_cache->NumStrings(); i++) {
-    const mirror::String* string = dex_cache->GetResolvedString(i);
+    const mirror::String* string = dex_cache->GetResolvedString(dex::StringIndex(i));
     EXPECT_TRUE(string != nullptr) << "string_idx=" << i;
   }
   EXPECT_EQ(dex.NumTypeIds(), dex_cache->NumResolvedTypes());
   for (size_t i = 0; i < dex_cache->NumResolvedTypes(); i++) {
-    mirror::Class* type = dex_cache->GetResolvedType(i);
+    mirror::Class* type = dex_cache->GetResolvedType(dex::TypeIndex(i));
     EXPECT_TRUE(type != nullptr) << "type_idx=" << i
-                              << " " << dex.GetTypeDescriptor(dex.GetTypeId(i));
+                              << " " << dex.GetTypeDescriptor(dex.GetTypeId(dex::TypeIndex(i)));
   }
   EXPECT_EQ(dex.NumMethodIds(), dex_cache->NumResolvedMethods());
   auto* cl = Runtime::Current()->GetClassLinker();
diff --git a/compiler/driver/compiler_options.h b/compiler/driver/compiler_options.h
index 56b632d..9c62f80 100644
--- a/compiler/driver/compiler_options.h
+++ b/compiler/driver/compiler_options.h
@@ -341,6 +341,7 @@
   const std::vector<std::string>* passes_to_run_;
 
   friend class Dex2Oat;
+  friend class DexToDexDecompilerTest;
   friend class CommonCompilerTest;
   friend class verifier::VerifierDepsTest;
 
diff --git a/compiler/elf_builder.h b/compiler/elf_builder.h
index 31a7529..7c02384 100644
--- a/compiler/elf_builder.h
+++ b/compiler/elf_builder.h
@@ -879,7 +879,7 @@
     elf_header.e_ident[EI_MAG2]       = ELFMAG2;
     elf_header.e_ident[EI_MAG3]       = ELFMAG3;
     elf_header.e_ident[EI_CLASS]      = (sizeof(Elf_Addr) == sizeof(Elf32_Addr))
-                                         ? ELFCLASS32 : ELFCLASS64;;
+                                         ? ELFCLASS32 : ELFCLASS64;
     elf_header.e_ident[EI_DATA]       = ELFDATA2LSB;
     elf_header.e_ident[EI_VERSION]    = EV_CURRENT;
     elf_header.e_ident[EI_OSABI]      = ELFOSABI_LINUX;
diff --git a/compiler/generate-operator-out.py b/compiler/generate-operator-out.py
new file mode 120000
index 0000000..cc291d2
--- /dev/null
+++ b/compiler/generate-operator-out.py
@@ -0,0 +1 @@
+../tools/generate-operator-out.py
\ No newline at end of file
diff --git a/compiler/image_test.cc b/compiler/image_test.cc
index 8fdf6fc..5629dff 100644
--- a/compiler/image_test.cc
+++ b/compiler/image_test.cc
@@ -189,7 +189,7 @@
       TimingLogger timings("ImageTest::WriteRead", false, false);
       TimingLogger::ScopedTiming t("CompileAll", &timings);
       driver->SetDexFilesForOatFile(class_path);
-      driver->CompileAll(class_loader, class_path, &timings);
+      driver->CompileAll(class_loader, class_path, /* verifier_deps */ nullptr, &timings);
 
       t.NewTiming("WriteElf");
       SafeMap<std::string, std::string> key_value_store;
@@ -211,7 +211,9 @@
                                                       &driver->GetCompilerOptions(),
                                                       oat_file.GetFile()));
         elf_writers.back()->Start();
-        oat_writers.emplace_back(new OatWriter(/*compiling_boot_image*/true, &timings));
+        oat_writers.emplace_back(new OatWriter(/*compiling_boot_image*/true,
+                                               &timings,
+                                               /*profile_compilation_info*/nullptr));
       }
 
       std::vector<OutputStream*> rodata;
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index d1ac139..fb5560b 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -32,6 +32,7 @@
 #include "class_linker-inl.h"
 #include "compiled_method.h"
 #include "dex_file-inl.h"
+#include "dex_file_types.h"
 #include "driver/compiler_driver.h"
 #include "elf_file.h"
 #include "elf_utils.h"
@@ -51,6 +52,7 @@
 #include "lock_word.h"
 #include "mirror/array-inl.h"
 #include "mirror/class-inl.h"
+#include "mirror/class_ext.h"
 #include "mirror/class_loader.h"
 #include "mirror/dex_cache.h"
 #include "mirror/dex_cache-inl.h"
@@ -696,7 +698,7 @@
   return true;
 }
 
-class ComputeLazyFieldsForClassesVisitor : public ClassVisitor {
+class ImageWriter::ComputeLazyFieldsForClassesVisitor : public ClassVisitor {
  public:
   bool operator()(ObjPtr<Class> c) OVERRIDE REQUIRES_SHARED(Locks::mutator_lock_) {
     StackHandleScope<1> hs(Thread::Current());
@@ -757,7 +759,8 @@
   if (klass->GetStatus() == mirror::Class::kStatusError) {
     result = true;
   } else {
-    CHECK(klass->GetVerifyError() == nullptr) << klass->PrettyClass();
+    ObjPtr<mirror::ClassExt> ext(klass->GetExtData());
+    CHECK(ext.IsNull() || ext->GetVerifyError() == nullptr) << klass->PrettyClass();
   }
   if (!result) {
     // Check interfaces since these wont be visited through VisitReferences.)
@@ -835,7 +838,7 @@
   return true;
 }
 
-class NonImageClassesVisitor : public ClassVisitor {
+class ImageWriter::NonImageClassesVisitor : public ClassVisitor {
  public:
   explicit NonImageClassesVisitor(ImageWriter* image_writer) : image_writer_(image_writer) {}
 
@@ -888,9 +891,9 @@
     }
     ObjPtr<mirror::DexCache> dex_cache = self->DecodeJObject(data.weak_root)->AsDexCache();
     for (size_t i = 0; i < dex_cache->NumResolvedTypes(); i++) {
-      Class* klass = dex_cache->GetResolvedType(i);
+      Class* klass = dex_cache->GetResolvedType(dex::TypeIndex(i));
       if (klass != nullptr && !KeepClass(klass)) {
-        dex_cache->SetResolvedType(i, nullptr);
+        dex_cache->SetResolvedType(dex::TypeIndex(i), nullptr);
       }
     }
     ArtMethod** resolved_methods = dex_cache->GetResolvedMethods();
@@ -963,21 +966,21 @@
 mirror::String* ImageWriter::FindInternedString(mirror::String* string) {
   Thread* const self = Thread::Current();
   for (const ImageInfo& image_info : image_infos_) {
-    mirror::String* const found = image_info.intern_table_->LookupStrong(self, string);
+    ObjPtr<mirror::String> const found = image_info.intern_table_->LookupStrong(self, string);
     DCHECK(image_info.intern_table_->LookupWeak(self, string) == nullptr)
         << string->ToModifiedUtf8();
     if (found != nullptr) {
-      return found;
+      return found.Ptr();
     }
   }
   if (compile_app_image_) {
     Runtime* const runtime = Runtime::Current();
-    mirror::String* found = runtime->GetInternTable()->LookupStrong(self, string);
+    ObjPtr<mirror::String> found = runtime->GetInternTable()->LookupStrong(self, string);
     // If we found it in the runtime intern table it could either be in the boot image or interned
     // during app image compilation. If it was in the boot image return that, otherwise return null
     // since it belongs to another image space.
-    if (found != nullptr && runtime->GetHeap()->ObjectIsInBootImageSpace(found)) {
-      return found;
+    if (found != nullptr && runtime->GetHeap()->ObjectIsInBootImageSpace(found.Ptr())) {
+      return found.Ptr();
     }
     DCHECK(runtime->GetInternTable()->LookupWeak(self, string) == nullptr)
         << string->ToModifiedUtf8();
@@ -1088,7 +1091,8 @@
       mirror::String* interned = FindInternedString(obj->AsString());
       if (interned == nullptr) {
         // Not in another image space, insert to our table.
-        interned = GetImageInfo(oat_index).intern_table_->InternStrongImageString(obj->AsString());
+        interned =
+            GetImageInfo(oat_index).intern_table_->InternStrongImageString(obj->AsString()).Ptr();
         DCHECK_EQ(interned, obj);
       }
     } else if (obj->IsDexCache()) {
@@ -1447,8 +1451,9 @@
     InternTable* const intern_table = runtime->GetInternTable();
     for (size_t i = 0, count = dex_file->NumStringIds(); i < count; ++i) {
       uint32_t utf16_length;
-      const char* utf8_data = dex_file->StringDataAndUtf16LengthByIdx(i, &utf16_length);
-      mirror::String* string = intern_table->LookupStrong(self, utf16_length, utf8_data);
+      const char* utf8_data = dex_file->StringDataAndUtf16LengthByIdx(dex::StringIndex(i),
+                                                                      &utf16_length);
+      mirror::String* string = intern_table->LookupStrong(self, utf16_length, utf8_data).Ptr();
       TryAssignBinSlot(work_stack, string, oat_index);
     }
   }
@@ -1698,7 +1703,7 @@
   return reinterpret_cast<ArtMethod*>(image_info.image_begin_ + it->second.offset);
 }
 
-class FixupRootVisitor : public RootVisitor {
+class ImageWriter::FixupRootVisitor : public RootVisitor {
  public:
   explicit FixupRootVisitor(ImageWriter* image_writer) : image_writer_(image_writer) {
   }
@@ -1941,7 +1946,7 @@
 }
 
 // Rewrite all the references in the copied object to point to their image address equivalent
-class FixupVisitor {
+class ImageWriter::FixupVisitor {
  public:
   FixupVisitor(ImageWriter* image_writer, Object* copy) : image_writer_(image_writer), copy_(copy) {
   }
@@ -1977,7 +1982,7 @@
   mirror::Object* const copy_;
 };
 
-class FixupClassVisitor FINAL : public FixupVisitor {
+class ImageWriter::FixupClassVisitor FINAL : public FixupVisitor {
  public:
   FixupClassVisitor(ImageWriter* image_writer, Object* copy) : FixupVisitor(image_writer, copy) {
   }
@@ -2042,7 +2047,7 @@
   }
 }
 
-class NativeLocationVisitor {
+class ImageWriter::NativeLocationVisitor {
  public:
   explicit NativeLocationVisitor(ImageWriter* image_writer) : image_writer_(image_writer) {}
 
@@ -2067,13 +2072,8 @@
 void ImageWriter::FixupObject(Object* orig, Object* copy) {
   DCHECK(orig != nullptr);
   DCHECK(copy != nullptr);
-  if (kUseBakerOrBrooksReadBarrier) {
-    orig->AssertReadBarrierPointer();
-    if (kUseBrooksReadBarrier) {
-      // Note the address 'copy' isn't the same as the image address of 'orig'.
-      copy->SetReadBarrierPointer(GetImageAddress(orig));
-      DCHECK_EQ(copy->GetReadBarrierPointer(), GetImageAddress(orig));
-    }
+  if (kUseBakerReadBarrier) {
+    orig->AssertReadBarrierState();
   }
   auto* klass = orig->GetClass();
   if (klass->IsIntArrayClass() || klass->IsLongArrayClass()) {
diff --git a/compiler/image_writer.h b/compiler/image_writer.h
index c9cf4cb..24fad46 100644
--- a/compiler/image_writer.h
+++ b/compiler/image_writer.h
@@ -582,14 +582,15 @@
   // Map of dex files to the indexes of oat files that they were compiled into.
   const std::unordered_map<const DexFile*, size_t>& dex_file_oat_index_map_;
 
-  friend class ContainsBootClassLoaderNonImageClassVisitor;
-  friend class FixupClassVisitor;
-  friend class FixupRootVisitor;
-  friend class FixupVisitor;
+  class ComputeLazyFieldsForClassesVisitor;
+  class FixupClassVisitor;
+  class FixupRootVisitor;
+  class FixupVisitor;
   class GetRootsVisitor;
-  friend class NativeLocationVisitor;
-  friend class NonImageClassesVisitor;
+  class NativeLocationVisitor;
+  class NonImageClassesVisitor;
   class VisitReferencesVisitor;
+
   DISALLOW_COPY_AND_ASSIGN(ImageWriter);
 };
 
diff --git a/compiler/intrinsics_list.h b/compiler/intrinsics_list.h
index b617387..9bd25d8 100644
--- a/compiler/intrinsics_list.h
+++ b/compiler/intrinsics_list.h
@@ -108,13 +108,21 @@
   V(StringCompareTo, kVirtual, kNeedsEnvironmentOrCache, kReadSideEffects, kCanThrow, "Ljava/lang/String;", "compareTo", "(Ljava/lang/String;)I") \
   V(StringEquals, kVirtual, kNeedsEnvironmentOrCache, kReadSideEffects, kCanThrow, "Ljava/lang/String;", "equals", "(Ljava/lang/Object;)Z") \
   V(StringGetCharsNoCheck, kVirtual, kNeedsEnvironmentOrCache, kReadSideEffects, kCanThrow, "Ljava/lang/String;", "getCharsNoCheck", "(II[CI)V") \
-  V(StringIndexOf, kVirtual, kNeedsEnvironmentOrCache, kReadSideEffects, kCanThrow, "Ljava/lang/String;", "indexOf", "(I)I") \
-  V(StringIndexOfAfter, kVirtual, kNeedsEnvironmentOrCache, kReadSideEffects, kCanThrow, "Ljava/lang/String;", "indexOf", "(II)I") \
+  V(StringIndexOf, kVirtual, kNeedsEnvironmentOrCache, kReadSideEffects, kNoThrow, "Ljava/lang/String;", "indexOf", "(I)I") \
+  V(StringIndexOfAfter, kVirtual, kNeedsEnvironmentOrCache, kReadSideEffects, kNoThrow, "Ljava/lang/String;", "indexOf", "(II)I") \
+  V(StringStringIndexOf, kVirtual, kNeedsEnvironmentOrCache, kReadSideEffects, kCanThrow, "Ljava/lang/String;", "indexOf", "(Ljava/lang/String;)I") \
+  V(StringStringIndexOfAfter, kVirtual, kNeedsEnvironmentOrCache, kReadSideEffects, kCanThrow, "Ljava/lang/String;", "indexOf", "(Ljava/lang/String;I)I") \
   V(StringIsEmpty, kVirtual, kNeedsEnvironmentOrCache, kReadSideEffects, kNoThrow, "Ljava/lang/String;", "isEmpty", "()Z") \
   V(StringLength, kVirtual, kNeedsEnvironmentOrCache, kReadSideEffects, kNoThrow, "Ljava/lang/String;", "length", "()I") \
   V(StringNewStringFromBytes, kStatic, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow, "Ljava/lang/StringFactory;", "newStringFromBytes", "([BIII)Ljava/lang/String;") \
   V(StringNewStringFromChars, kStatic, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow, "Ljava/lang/StringFactory;", "newStringFromChars", "(II[C)Ljava/lang/String;") \
   V(StringNewStringFromString, kStatic, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow, "Ljava/lang/StringFactory;", "newStringFromString", "(Ljava/lang/String;)Ljava/lang/String;") \
+  V(StringBufferAppend, kVirtual, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow, "Ljava/lang/StringBuffer;", "append", "(Ljava/lang/String;)Ljava/lang/StringBuffer;") \
+  V(StringBufferLength, kVirtual, kNeedsEnvironmentOrCache, kAllSideEffects, kNoThrow, "Ljava/lang/StringBuffer;", "length", "()I") \
+  V(StringBufferToString, kVirtual, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow, "Ljava/lang/StringBuffer;", "toString", "()Ljava/lang/String;") \
+  V(StringBuilderAppend, kVirtual, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow, "Ljava/lang/StringBuilder;", "append", "(Ljava/lang/String;)Ljava/lang/StringBuilder;") \
+  V(StringBuilderLength, kVirtual, kNeedsEnvironmentOrCache, kReadSideEffects, kNoThrow, "Ljava/lang/StringBuilder;", "length", "()I") \
+  V(StringBuilderToString, kVirtual, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow, "Ljava/lang/StringBuilder;", "toString", "()Ljava/lang/String;") \
   V(UnsafeCASInt, kVirtual, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow, "Lsun/misc/Unsafe;", "compareAndSwapInt", "(Ljava/lang/Object;JII)Z") \
   V(UnsafeCASLong, kVirtual, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow, "Lsun/misc/Unsafe;", "compareAndSwapLong", "(Ljava/lang/Object;JJJ)Z") \
   V(UnsafeCASObject, kVirtual, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow, "Lsun/misc/Unsafe;", "compareAndSwapObject", "(Ljava/lang/Object;JLjava/lang/Object;Ljava/lang/Object;)Z") \
diff --git a/compiler/jit/jit_compiler.cc b/compiler/jit/jit_compiler.cc
index f83d37c..9dfb434 100644
--- a/compiler/jit/jit_compiler.cc
+++ b/compiler/jit/jit_compiler.cc
@@ -171,19 +171,10 @@
 
   size_t thread_count = compiler_driver_->GetThreadCount();
   if (compiler_options_->GetGenerateDebugInfo()) {
-#ifdef ART_TARGET_ANDROID
-    const char* prefix = "/data/misc/trace";
-#else
-    const char* prefix = "/tmp";
-#endif
     DCHECK_EQ(thread_count, 1u)
         << "Generating debug info only works with one compiler thread";
-    std::string perf_filename = std::string(prefix) + "/perf-" + std::to_string(getpid()) + ".map";
-    perf_file_.reset(OS::CreateEmptyFileWriteOnly(perf_filename.c_str()));
-    if (perf_file_ == nullptr) {
-      LOG(ERROR) << "Could not create perf file at " << perf_filename <<
-                    " Are you on a user build? Perf only works on userdebug/eng builds";
-    }
+    jit_logger_.reset(new JitLogger());
+    jit_logger_->OpenLog();
   }
 
   size_t inline_depth_limit = compiler_driver_->GetCompilerOptions().GetInlineDepthLimit();
@@ -192,9 +183,8 @@
 }
 
 JitCompiler::~JitCompiler() {
-  if (perf_file_ != nullptr) {
-    UNUSED(perf_file_->Flush());
-    UNUSED(perf_file_->Close());
+  if (compiler_options_->GetGenerateDebugInfo()) {
+    jit_logger_->CloseLog();
   }
 }
 
@@ -218,19 +208,8 @@
     TimingLogger::ScopedTiming t2("Compiling", &logger);
     JitCodeCache* const code_cache = runtime->GetJit()->GetCodeCache();
     success = compiler_driver_->GetCompiler()->JitCompile(self, code_cache, method, osr);
-    if (success && (perf_file_ != nullptr)) {
-      const void* ptr = method->GetEntryPointFromQuickCompiledCode();
-      std::ostringstream stream;
-      stream << std::hex
-             << reinterpret_cast<uintptr_t>(ptr)
-             << " "
-             << code_cache->GetMemorySizeOfCodePointer(ptr)
-             << " "
-             << method->PrettyMethod()
-             << std::endl;
-      std::string str = stream.str();
-      bool res = perf_file_->WriteFully(str.c_str(), str.size());
-      CHECK(res);
+    if (success && (jit_logger_ != nullptr)) {
+      jit_logger_->WriteLog(code_cache, method);
     }
   }
 
diff --git a/compiler/jit/jit_compiler.h b/compiler/jit/jit_compiler.h
index ea2747c..f0f24d3 100644
--- a/compiler/jit/jit_compiler.h
+++ b/compiler/jit/jit_compiler.h
@@ -19,6 +19,7 @@
 
 #include "base/mutex.h"
 #include "compiled_method.h"
+#include "jit_logger.h"
 #include "driver/compiler_driver.h"
 #include "driver/compiler_options.h"
 
@@ -50,7 +51,7 @@
   std::unique_ptr<CumulativeLogger> cumulative_logger_;
   std::unique_ptr<CompilerDriver> compiler_driver_;
   std::unique_ptr<const InstructionSetFeatures> instruction_set_features_;
-  std::unique_ptr<File> perf_file_;
+  std::unique_ptr<JitLogger> jit_logger_;
 
   JitCompiler();
 
diff --git a/compiler/jit/jit_logger.cc b/compiler/jit/jit_logger.cc
new file mode 100644
index 0000000..9ce3b0c
--- /dev/null
+++ b/compiler/jit/jit_logger.cc
@@ -0,0 +1,312 @@
+/*
+ * Copyright 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "jit_logger.h"
+
+#include "arch/instruction_set.h"
+#include "art_method-inl.h"
+#include "base/time_utils.h"
+#include "base/unix_file/fd_file.h"
+#include "driver/compiler_driver.h"
+#include "jit/jit.h"
+#include "jit/jit_code_cache.h"
+
+namespace art {
+namespace jit {
+
+#ifdef ART_TARGET_ANDROID
+static const char* kLogPrefix = "/data/misc/trace";
+#else
+static const char* kLogPrefix = "/tmp";
+#endif
+
+// File format of perf-PID.map:
+// +---------------------+
+// |ADDR SIZE symbolname1|
+// |ADDR SIZE symbolname2|
+// |...                  |
+// +---------------------+
+void JitLogger::OpenPerfMapLog() {
+  std::string pid_str = std::to_string(getpid());
+  std::string perf_filename = std::string(kLogPrefix) + "/perf-" + pid_str + ".map";
+  perf_file_.reset(OS::CreateEmptyFileWriteOnly(perf_filename.c_str()));
+  if (perf_file_ == nullptr) {
+    LOG(ERROR) << "Could not create perf file at " << perf_filename <<
+      " Are you on a user build? Perf only works on userdebug/eng builds";
+  }
+}
+
+void JitLogger::WritePerfMapLog(JitCodeCache* code_cache, ArtMethod* method) {
+  if (perf_file_ != nullptr) {
+    const void* ptr = method->GetEntryPointFromQuickCompiledCode();
+    size_t code_size = code_cache->GetMemorySizeOfCodePointer(ptr);
+    std::string method_name = method->PrettyMethod();
+
+    std::ostringstream stream;
+    stream << std::hex
+           << reinterpret_cast<uintptr_t>(ptr)
+           << " "
+           << code_size
+           << " "
+           << method_name
+           << std::endl;
+    std::string str = stream.str();
+    bool res = perf_file_->WriteFully(str.c_str(), str.size());
+    if (!res) {
+      LOG(WARNING) << "Failed to write jitted method info in log: write failure.";
+    }
+  } else {
+    LOG(WARNING) << "Failed to write jitted method info in log: log file doesn't exist.";
+  }
+}
+
+void JitLogger::ClosePerfMapLog() {
+  if (perf_file_ != nullptr) {
+    UNUSED(perf_file_->Flush());
+    UNUSED(perf_file_->Close());
+  }
+}
+
+//  File format of jit-PID.jump:
+//
+//  +--------------------------------+
+//  |  PerfJitHeader                 |
+//  +--------------------------------+
+//  |  PerfJitCodeLoad {             | .
+//  |    struct PerfJitBase;         |  .
+//  |    uint32_t process_id_;       |   .
+//  |    uint32_t thread_id_;        |   .
+//  |    uint64_t vma_;              |   .
+//  |    uint64_t code_address_;     |   .
+//  |    uint64_t code_size_;        |   .
+//  |    uint64_t code_id_;          |   .
+//  |  }                             |   .
+//  +-                              -+   .
+//  |  method_name'\0'               |   +--> one jitted method
+//  +-                              -+   .
+//  |  jitted code binary            |   .
+//  |  ...                           |   .
+//  +--------------------------------+   .
+//  |  PerfJitCodeDebugInfo     {    |   .
+//  |    struct PerfJitBase;         |   .
+//  |    uint64_t address_;          |   .
+//  |    uint64_t entry_count_;      |   .
+//  |    struct PerfJitDebugEntry;   |  .
+//  |  }                             | .
+//  +--------------------------------+
+//  |  PerfJitCodeLoad               |
+//     ...
+//
+struct PerfJitHeader {
+  uint32_t magic_;            // Characters "JiTD"
+  uint32_t version_;          // Header version
+  uint32_t size_;             // Total size of header
+  uint32_t elf_mach_target_;  // Elf mach target
+  uint32_t reserved_;         // Reserved, currently not used
+  uint32_t process_id_;       // Process ID of the JIT compiler
+  uint64_t time_stamp_;       // Timestamp when the header is generated
+  uint64_t flags_;            // Currently the flags are only used for choosing clock for timestamp,
+                              // we set it to 0 to tell perf that we use CLOCK_MONOTONIC clock.
+  static const uint32_t kMagic = 0x4A695444;  // "JiTD"
+  static const uint32_t kVersion = 1;
+};
+
+// Each record starts with such basic information: event type, total size, and timestamp.
+struct PerfJitBase {
+  enum PerfJitEvent {
+    // A jitted code load event.
+    // In ART JIT, it is used to log a new method is jit compiled and committed to jit-code-cache.
+    // Note that such kLoad event supports code cache GC in ART JIT.
+    // For every kLoad event recorded in jit-PID.dump and every perf sample recorded in perf.data,
+    // each event/sample has time stamp. In case code cache GC happens in ART JIT, and a new
+    // jitted method is committed to the same address of a previously deleted method,
+    // the time stamp information can help profiler to tell whether this sample belongs to the
+    // era of the first jitted method, or does it belong to the period of the second jitted method.
+    // JitCodeCache doesn't have to record any event on 'code delete'.
+    kLoad = 0,
+
+    // A jitted code move event, i,e. a jitted code moved from one address to another address.
+    // It helps profiler to map samples to the right symbol even when the code is moved.
+    // In ART JIT, this event can help log such behavior:
+    // A jitted method is recorded in previous kLoad event, but due to some reason,
+    // it is moved to another address in jit-code-cache.
+    kMove = 1,
+
+    // Logs debug line/column information.
+    kDebugInfo = 2,
+
+    // Logs JIT VM end of life event.
+    kClose = 3
+  };
+  uint32_t event_;       // Must be one of the events defined in PerfJitEvent.
+  uint32_t size_;        // Total size of this event record.
+                         // For example, for kLoad event, size of the event record is:
+                         // sizeof(PerfJitCodeLoad) + method_name.size() + compiled code size.
+  uint64_t time_stamp_;  // Timestamp for the event.
+};
+
+// Logs a jitted code load event (kLoad).
+// In ART JIT, it is used to log a new method is jit compiled and commited to jit-code-cache.
+struct PerfJitCodeLoad : PerfJitBase {
+  uint32_t process_id_;    // Process ID who performs the jit code load.
+                           // In ART JIT, it is the pid of the JIT compiler.
+  uint32_t thread_id_;     // Thread ID who performs the jit code load.
+                           // In ART JIT, it is the tid of the JIT compiler.
+  uint64_t vma_;           // Address of the code section. In ART JIT, because code_address_
+                           // uses absolute address, this field is 0.
+  uint64_t code_address_;  // Address where is jitted code is loaded.
+  uint64_t code_size_;     // Size of the jitted code.
+  uint64_t code_id_;       // Unique ID for each jitted code.
+};
+
+// This structure is for source line/column mapping.
+// Currently this feature is not implemented in ART JIT yet.
+struct PerfJitDebugEntry {
+  uint64_t address_;      // Code address which maps to the line/column in source.
+  uint32_t line_number_;  // Source line number starting at 1.
+  uint32_t column_;       // Column discriminator, default 0.
+  const char name_[0];    // Followed by null-terminated name or \0xff\0 if same as previous.
+};
+
+// Logs debug line information (kDebugInfo).
+// This structure is for source line/column mapping.
+// Currently this feature is not implemented in ART JIT yet.
+struct PerfJitCodeDebugInfo : PerfJitBase {
+  uint64_t address_;              // Starting code address which the debug info describes.
+  uint64_t entry_count_;          // How many instances of PerfJitDebugEntry.
+  PerfJitDebugEntry entries_[0];  // Followed by entry_count_ instances of PerfJitDebugEntry.
+};
+
+static uint32_t GetElfMach() {
+#if defined(__arm__)
+  static const uint32_t kElfMachARM = 0x28;
+  return kElfMachARM;
+#elif defined(__aarch64__)
+  static const uint32_t kElfMachARM64 = 0xB7;
+  return kElfMachARM64;
+#elif defined(__i386__)
+  static const uint32_t kElfMachIA32 = 0x3;
+  return kElfMachIA32;
+#elif defined(__x86_64__)
+  static const uint32_t kElfMachX64 = 0x3E;
+  return kElfMachX64;
+#else
+  UNIMPLEMENTED(WARNING) << "Unsupported architecture in JitLogger";
+  return 0;
+#endif
+}
+
+void JitLogger::OpenMarkerFile() {
+  int fd = jit_dump_file_->Fd();
+  // The 'perf inject' tool requires that the jit-PID.dump file
+  // must have a mmap(PROT_READ|PROT_EXEC) record in perf.data.
+  marker_address_ = mmap(nullptr, kPageSize, PROT_READ | PROT_EXEC, MAP_PRIVATE, fd, 0);
+  if (marker_address_ == MAP_FAILED) {
+    LOG(WARNING) << "Failed to create record in perf.data. JITed code profiling will not work.";
+    return;
+  }
+}
+
+void JitLogger::CloseMarkerFile() {
+  if (marker_address_ != nullptr) {
+    munmap(marker_address_, kPageSize);
+  }
+}
+
+void JitLogger::WriteJitDumpDebugInfo() {
+  // In the future, we can add java source file line/column mapping here.
+}
+
+void JitLogger::WriteJitDumpHeader() {
+  PerfJitHeader header;
+
+  std::memset(&header, 0, sizeof(header));
+  header.magic_ = PerfJitHeader::kMagic;
+  header.version_ = PerfJitHeader::kVersion;
+  header.size_ = sizeof(header);
+  header.elf_mach_target_ = GetElfMach();
+  header.process_id_ = static_cast<uint32_t>(getpid());
+  header.time_stamp_ = art::NanoTime();  // CLOCK_MONOTONIC clock is required.
+  header.flags_ = 0;
+
+  bool res = jit_dump_file_->WriteFully(reinterpret_cast<const char*>(&header), sizeof(header));
+  if (!res) {
+    LOG(WARNING) << "Failed to write profiling log. The 'perf inject' tool will not work.";
+  }
+}
+
+void JitLogger::OpenJitDumpLog() {
+  std::string pid_str = std::to_string(getpid());
+  std::string jitdump_filename = std::string(kLogPrefix) + "/jit-" + pid_str + ".dump";
+
+  jit_dump_file_.reset(OS::CreateEmptyFile(jitdump_filename.c_str()));
+  if (jit_dump_file_ == nullptr) {
+    LOG(ERROR) << "Could not create jit dump file at " << jitdump_filename <<
+      " Are you on a user build? Perf only works on userdebug/eng builds";
+    return;
+  }
+
+  OpenMarkerFile();
+
+  // Continue to write jit-PID.dump file even above OpenMarkerFile() fails.
+  // Even if that means 'perf inject' tool cannot work, developers can still use other tools
+  // to map the samples in perf.data to the information (symbol,address,code) recorded
+  // in the jit-PID.dump file, and still proceed the jitted code analysis.
+  WriteJitDumpHeader();
+}
+
+void JitLogger::WriteJitDumpLog(JitCodeCache* code_cache, ArtMethod* method) {
+  if (jit_dump_file_ != nullptr) {
+    const void* code = method->GetEntryPointFromQuickCompiledCode();
+    size_t code_size = code_cache->GetMemorySizeOfCodePointer(code);
+    std::string method_name = method->PrettyMethod();
+
+    PerfJitCodeLoad jit_code;
+    std::memset(&jit_code, 0, sizeof(jit_code));
+    jit_code.event_ = PerfJitCodeLoad::kLoad;
+    jit_code.size_ = sizeof(jit_code) + method_name.size() + 1 + code_size;
+    jit_code.time_stamp_ = art::NanoTime();    // CLOCK_MONOTONIC clock is required.
+    jit_code.process_id_ = static_cast<uint32_t>(getpid());
+    jit_code.thread_id_ = static_cast<uint32_t>(art::GetTid());
+    jit_code.vma_ = 0x0;
+    jit_code.code_address_ = reinterpret_cast<uint64_t>(code);
+    jit_code.code_size_ = code_size;
+    jit_code.code_id_ = code_index_++;
+
+    // Write one complete jitted method info, including:
+    // - PerfJitCodeLoad structure
+    // - Method name
+    // - Complete generated code of this method
+    //
+    // Use UNUSED() here to avoid compiler warnings.
+    UNUSED(jit_dump_file_->WriteFully(reinterpret_cast<const char*>(&jit_code), sizeof(jit_code)));
+    UNUSED(jit_dump_file_->WriteFully(method_name.c_str(), method_name.size() + 1));
+    UNUSED(jit_dump_file_->WriteFully(code, code_size));
+
+    WriteJitDumpDebugInfo();
+  }
+}
+
+void JitLogger::CloseJitDumpLog() {
+  if (jit_dump_file_ != nullptr) {
+    CloseMarkerFile();
+    UNUSED(jit_dump_file_->Flush());
+    UNUSED(jit_dump_file_->Close());
+  }
+}
+
+}  // namespace jit
+}  // namespace art
diff --git a/compiler/jit/jit_logger.h b/compiler/jit/jit_logger.h
new file mode 100644
index 0000000..0f8cfe4
--- /dev/null
+++ b/compiler/jit/jit_logger.h
@@ -0,0 +1,137 @@
+/*
+ * Copyright 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_JIT_JIT_LOGGER_H_
+#define ART_COMPILER_JIT_JIT_LOGGER_H_
+
+#include "base/mutex.h"
+#include "compiled_method.h"
+#include "driver/compiler_driver.h"
+#include "driver/compiler_options.h"
+
+namespace art {
+
+class ArtMethod;
+
+namespace jit {
+
+//
+// JitLogger supports two approaches of perf profiling.
+//
+// (1) perf-map:
+//     The perf-map mechanism generates perf-PID.map file,
+//     which provides simple "address, size, method_name" information to perf,
+//     and allows perf to map samples in jit-code-cache to jitted method symbols.
+//
+//     Command line Example:
+//       $ perf record dalvikvm -Xcompiler-option --generate-debug-info -cp <classpath> Test
+//       $ perf report
+//     NOTE:
+//       - Make sure that the perf-PID.map file is available for 'perf report' tool to access,
+//         so that jitted method can be displayed.
+//
+//
+// (2) perf-inject:
+//     The perf-inject mechansim generates jit-PID.dump file,
+//     which provides rich informations about a jitted method.
+//     It allows perf or other profiling tools to do advanced analysis on jitted code,
+//     for example instruction level profiling.
+//
+//     Command line Example:
+//       $ perf record -k mono dalvikvm -Xcompiler-option --generate-debug-info -cp <classpath> Test
+//       $ perf inject -i perf.data -o perf.data.jitted
+//       $ perf report -i perf.data.jitted
+//       $ perf annotate -i perf.data.jitted
+//     NOTE:
+//       REQUIREMENTS
+//       - The 'perf record -k mono' option requires 4.1 (or higher) Linux kernel.
+//       - The 'perf inject' (generating jit ELF files feature) requires perf 4.6 (or higher).
+//       PERF RECORD
+//       - The '-k mono' option tells 'perf record' to use CLOCK_MONOTONIC clock during sampling;
+//         which is required by 'perf inject', to make sure that both perf.data and jit-PID.dump
+//         have unified clock source for timestamps.
+//       PERF INJECT
+//       - The 'perf inject' tool injects information from jit-PID.dump into perf.data file,
+//         and generates small ELF files (jitted-TID-CODEID.so) for each jitted method.
+//       - On Android devices, the jit-PID.dump file is generated in /data/misc/trace/ folder, and
+//         such location is recorded in perf.data file.
+//         The 'perf inject' tool is going to look for jit-PID.dump and generates small ELF files in
+//         this /data/misc/trace/ folder.
+//         Make sure that you have the read/write access to /data/misc/trace/ folder.
+//       - On non-Android devices, the jit-PID.dump file is generated in /tmp/ folder, and
+//         'perf inject' tool operates on this folder.
+//         Make sure that you have the read/write access to /tmp/ folder.
+//       - If you are executing 'perf inject' on non-Android devices (host), but perf.data and
+//         jit-PID.dump files are adb-pulled from Android devices, make sure that there is a
+//         /data/misc/trace/ folder on host, and jit-PID.dump file is copied to this folder.
+//       - Currently 'perf inject' doesn't provide option to change the path for jit-PID.dump and
+//         generated ELF files.
+//       PERF ANNOTATE
+//       - The 'perf annotate' tool displays assembly level profiling report.
+//         Source code can also be displayed if the ELF file has debug symbols.
+//       - Make sure above small ELF files are available for 'perf annotate' tool to access,
+//         so that jitted code can be displayed in assembly view.
+//
+class JitLogger {
+  public:
+    JitLogger() : code_index_(0), marker_address_(nullptr) {}
+
+    void OpenLog() {
+      OpenPerfMapLog();
+      OpenJitDumpLog();
+    }
+
+    void WriteLog(JitCodeCache* code_cache, ArtMethod* method)
+        REQUIRES_SHARED(Locks::mutator_lock_) {
+      WritePerfMapLog(code_cache, method);
+      WriteJitDumpLog(code_cache, method);
+    }
+
+    void CloseLog() {
+      ClosePerfMapLog();
+      CloseJitDumpLog();
+    }
+
+  private:
+    // For perf-map profiling
+    void OpenPerfMapLog();
+    void WritePerfMapLog(JitCodeCache* code_cache, ArtMethod* method)
+        REQUIRES_SHARED(Locks::mutator_lock_);
+    void ClosePerfMapLog();
+
+    // For perf-inject profiling
+    void OpenJitDumpLog();
+    void WriteJitDumpLog(JitCodeCache* code_cache, ArtMethod* method)
+        REQUIRES_SHARED(Locks::mutator_lock_);
+    void CloseJitDumpLog();
+
+    void OpenMarkerFile();
+    void CloseMarkerFile();
+    void WriteJitDumpHeader();
+    void WriteJitDumpDebugInfo();
+
+    std::unique_ptr<File> perf_file_;
+    std::unique_ptr<File> jit_dump_file_;
+    uint64_t code_index_;
+    void* marker_address_;
+
+    DISALLOW_COPY_AND_ASSIGN(JitLogger);
+};
+
+}  // namespace jit
+}  // namespace art
+
+#endif  // ART_COMPILER_JIT_JIT_LOGGER_H_
diff --git a/compiler/jni/jni_compiler_test.cc b/compiler/jni/jni_compiler_test.cc
index 4960a73..21042a3 100644
--- a/compiler/jni/jni_compiler_test.cc
+++ b/compiler/jni/jni_compiler_test.cc
@@ -20,6 +20,7 @@
 #include <math.h>
 
 #include "art_method-inl.h"
+#include "base/bit_utils.h"
 #include "class_linker.h"
 #include "common_compiler_test.h"
 #include "compiler.h"
@@ -366,7 +367,9 @@
   void StackArgsIntsFirstImpl();
   void StackArgsFloatsFirstImpl();
   void StackArgsMixedImpl();
+#if defined(__mips__) && defined(__LP64__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
   void StackArgsSignExtendedMips64Impl();
+#endif
 
   void NormalNativeImpl();
   void FastNativeImpl();
@@ -2145,50 +2148,43 @@
 
 JNI_TEST_CRITICAL(StackArgsMixed)
 
-void Java_MyClassNatives_stackArgsSignExtendedMips64(JNIEnv*, jclass, jint i1, jint i2, jint i3,
-                                                     jint i4, jint i5, jint i6, jint i7, jint i8) {
-  EXPECT_EQ(i1, 1);
-  EXPECT_EQ(i2, 2);
-  EXPECT_EQ(i3, 3);
-  EXPECT_EQ(i4, 4);
-  EXPECT_EQ(i5, 5);
-  EXPECT_EQ(i6, 6);
-  EXPECT_EQ(i7, 7);
-  EXPECT_EQ(i8, -8);
-
 #if defined(__mips__) && defined(__LP64__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
-  // Mips64 ABI requires that arguments passed through stack be sign-extended 8B slots.
-  // First 8 arguments are passed through registers, check i7 and i8.
-  uint32_t stack1_high = *(&i7 + 1);
-  uint32_t stack2_high = *(&i8 + 1);
-
-  EXPECT_EQ(stack1_high, static_cast<uint32_t>(0));
-  EXPECT_EQ(stack2_high, static_cast<uint32_t>(0xffffffff));
-#else
-  LOG(INFO) << "Skipping stackArgsSignExtendedMips64 as there is nothing to be done on "
-            << kRuntimeISA;
-  // Force-print to std::cout so it's also outside the logcat.
-  std::cout << "Skipping stackArgsSignExtendedMips64 as there is nothing to be done on "
-            << kRuntimeISA << std::endl;
-#endif
+// Function will fetch the last argument passed from caller that is now on top of the stack and
+// return it as a 8B long. That way we can test if the caller has properly sign-extended the
+// value when placing it on the stack.
+__attribute__((naked))
+jlong Java_MyClassNatives_getStackArgSignExtendedMips64(
+    JNIEnv*, jclass,                      // Arguments passed from caller
+    jint, jint, jint, jint, jint, jint,   // through regs a0 to a7.
+    jint) {                               // The last argument will be passed on the stack.
+  __asm__(
+      ".set noreorder\n\t"                // Just return and store 8 bytes from the top of the stack
+      "jr  $ra\n\t"                       // in v0 (in branch delay slot). This should be the last
+      "ld  $v0, 0($sp)\n\t");             // argument. It is a 32-bit int, but it should be sign
+                                          // extended and it occupies 64-bit location.
 }
 
 void JniCompilerTest::StackArgsSignExtendedMips64Impl() {
-  SetUpForTest(true, "stackArgsSignExtendedMips64", "(IIIIIIII)V",
-               CURRENT_JNI_WRAPPER(Java_MyClassNatives_stackArgsSignExtendedMips64));
-  jint i1 = 1;
-  jint i2 = 2;
-  jint i3 = 3;
-  jint i4 = 4;
-  jint i5 = 5;
-  jint i6 = 6;
-  jint i7 = 7;
-  jint i8 = -8;
+  uint64_t ret;
+  SetUpForTest(true,
+               "getStackArgSignExtendedMips64",
+               "(IIIIIII)J",
+               // Don't use wrapper because this is raw assembly function.
+               reinterpret_cast<void*>(&Java_MyClassNatives_getStackArgSignExtendedMips64));
 
-  env_->CallStaticVoidMethod(jklass_, jmethod_, i1, i2, i3, i4, i5, i6, i7, i8);
+  // Mips64 ABI requires that arguments passed through stack be sign-extended 8B slots.
+  // First 8 arguments are passed through registers.
+  // Final argument's value is 7. When sign-extended, higher stack bits should be 0.
+  ret = env_->CallStaticLongMethod(jklass_, jmethod_, 1, 2, 3, 4, 5, 6, 7);
+  EXPECT_EQ(High32Bits(ret), static_cast<uint32_t>(0));
+
+  // Final argument's value is -8.  When sign-extended, higher stack bits should be 0xffffffff.
+  ret = env_->CallStaticLongMethod(jklass_, jmethod_, 1, 2, 3, 4, 5, 6, -8);
+  EXPECT_EQ(High32Bits(ret), static_cast<uint32_t>(0xffffffff));
 }
 
-JNI_TEST_CRITICAL(StackArgsSignExtendedMips64)
+JNI_TEST(StackArgsSignExtendedMips64)
+#endif
 
 void Java_MyClassNatives_normalNative(JNIEnv*, jclass) {
   // Intentionally left empty.
@@ -2202,8 +2198,7 @@
                "()V",
                CURRENT_JNI_WRAPPER(Java_MyClassNatives_normalNative));
 
-  ScopedObjectAccess soa(Thread::Current());
-  ArtMethod* method = soa.DecodeMethod(jmethod_);
+  ArtMethod* method = jni::DecodeArtMethod(jmethod_);
   ASSERT_TRUE(method != nullptr);
 
   EXPECT_FALSE(method->IsAnnotatedWithCriticalNative());
@@ -2225,8 +2220,7 @@
                "()V",
                CURRENT_JNI_WRAPPER(Java_MyClassNatives_fastNative));
 
-  ScopedObjectAccess soa(Thread::Current());
-  ArtMethod* method = soa.DecodeMethod(jmethod_);
+  ArtMethod* method = jni::DecodeArtMethod(jmethod_);
   ASSERT_TRUE(method != nullptr);
 
   EXPECT_FALSE(method->IsAnnotatedWithCriticalNative());
@@ -2255,8 +2249,7 @@
   UpdateCurrentJni(JniKind::kCritical);
   ASSERT_TRUE(IsCurrentJniCritical());
 
-  ScopedObjectAccess soa(Thread::Current());
-  ArtMethod* method = soa.DecodeMethod(jmethod_);
+  ArtMethod* method = jni::DecodeArtMethod(jmethod_);
   ASSERT_TRUE(method != nullptr);
 
   EXPECT_TRUE(method->IsAnnotatedWithCriticalNative());
diff --git a/compiler/jni/quick/arm64/calling_convention_arm64.cc b/compiler/jni/quick/arm64/calling_convention_arm64.cc
index 3fb7b56..33f4d77 100644
--- a/compiler/jni/quick/arm64/calling_convention_arm64.cc
+++ b/compiler/jni/quick/arm64/calling_convention_arm64.cc
@@ -222,7 +222,11 @@
                                                      bool is_synchronized,
                                                      bool is_critical_native,
                                                      const char* shorty)
-    : JniCallingConvention(is_static, is_synchronized, is_critical_native, shorty, kArm64PointerSize) {
+    : JniCallingConvention(is_static,
+                           is_synchronized,
+                           is_critical_native,
+                           shorty,
+                           kArm64PointerSize) {
 }
 
 uint32_t Arm64JniCallingConvention::CoreSpillMask() const {
diff --git a/compiler/jni/quick/calling_convention.cc b/compiler/jni/quick/calling_convention.cc
index 9859b5d..36a87a8 100644
--- a/compiler/jni/quick/calling_convention.cc
+++ b/compiler/jni/quick/calling_convention.cc
@@ -152,24 +152,6 @@
                                                                    bool is_critical_native,
                                                                    const char* shorty,
                                                                    InstructionSet instruction_set) {
-  if (UNLIKELY(is_critical_native)) {
-    // Sanity check that the requested JNI instruction set
-    // is supported for critical natives. Not every one is.
-    switch (instruction_set) {
-      case kX86_64:
-      case kX86:
-      case kArm64:
-      case kArm:
-      case kThumb2:
-        break;
-      default:
-        is_critical_native = false;
-        LOG(WARNING) << "@CriticalNative support not implemented for " << instruction_set
-                     << "; will crash at runtime if trying to invoke such a method.";
-        // TODO: implement for MIPS/MIPS64
-    }
-  }
-
   switch (instruction_set) {
 #ifdef ART_ENABLE_CODEGEN_arm
     case kArm:
@@ -191,12 +173,18 @@
 #ifdef ART_ENABLE_CODEGEN_mips
     case kMips:
       return std::unique_ptr<JniCallingConvention>(
-          new (arena) mips::MipsJniCallingConvention(is_static, is_synchronized, shorty));
+          new (arena) mips::MipsJniCallingConvention(is_static,
+                                                     is_synchronized,
+                                                     is_critical_native,
+                                                     shorty));
 #endif
 #ifdef ART_ENABLE_CODEGEN_mips64
     case kMips64:
       return std::unique_ptr<JniCallingConvention>(
-          new (arena) mips64::Mips64JniCallingConvention(is_static, is_synchronized, shorty));
+          new (arena) mips64::Mips64JniCallingConvention(is_static,
+                                                         is_synchronized,
+                                                         is_critical_native,
+                                                         shorty));
 #endif
 #ifdef ART_ENABLE_CODEGEN_x86
     case kX86:
diff --git a/compiler/jni/quick/calling_convention.h b/compiler/jni/quick/calling_convention.h
index f541d8f..335a2df 100644
--- a/compiler/jni/quick/calling_convention.h
+++ b/compiler/jni/quick/calling_convention.h
@@ -370,14 +370,6 @@
     kObjectOrClass = 1
   };
 
-  // TODO: remove this constructor once all are changed to the below one.
-  JniCallingConvention(bool is_static,
-                       bool is_synchronized,
-                       const char* shorty,
-                       PointerSize frame_pointer_size)
-      : CallingConvention(is_static, is_synchronized, shorty, frame_pointer_size),
-        is_critical_native_(false) {}
-
   JniCallingConvention(bool is_static,
                        bool is_synchronized,
                        bool is_critical_native,
diff --git a/compiler/jni/quick/mips/calling_convention_mips.cc b/compiler/jni/quick/mips/calling_convention_mips.cc
index f5ab5f7..e6948ec 100644
--- a/compiler/jni/quick/mips/calling_convention_mips.cc
+++ b/compiler/jni/quick/mips/calling_convention_mips.cc
@@ -23,6 +23,13 @@
 namespace art {
 namespace mips {
 
+// Up to how many float-like (float, double) args can be enregistered in floating-point registers.
+// The rest of the args must go in integer registers or on the stack.
+constexpr size_t kMaxFloatOrDoubleRegisterArguments = 2u;
+// Up to how many integer-like (pointers, objects, longs, int, short, bool, etc) args can be
+// enregistered. The rest of the args must go on the stack.
+constexpr size_t kMaxIntLikeRegisterArguments = 4u;
+
 static const Register kCoreArgumentRegisters[] = { A0, A1, A2, A3 };
 static const FRegister kFArgumentRegisters[] = { F12, F14 };
 static const DRegister kDArgumentRegisters[] = { D6, D7 };
@@ -170,23 +177,134 @@
 }
 // JNI calling convention
 
-MipsJniCallingConvention::MipsJniCallingConvention(bool is_static, bool is_synchronized,
+MipsJniCallingConvention::MipsJniCallingConvention(bool is_static,
+                                                   bool is_synchronized,
+                                                   bool is_critical_native,
                                                    const char* shorty)
-    : JniCallingConvention(is_static, is_synchronized, shorty, kMipsPointerSize) {
-  // Compute padding to ensure longs and doubles are not split in AAPCS. Ignore the 'this' jobject
-  // or jclass for static methods and the JNIEnv. We start at the aligned register A2.
+    : JniCallingConvention(is_static,
+                           is_synchronized,
+                           is_critical_native,
+                           shorty,
+                           kMipsPointerSize) {
+  // SYSTEM V - Application Binary Interface (MIPS RISC Processor):
+  // Data Representation - Fundamental Types (3-4) specifies fundamental alignments for each type.
+  //   "Each member is assigned to the lowest available offset with the appropriate alignment. This
+  // may require internal padding, depending on the previous member."
+  //
+  // All of our stack arguments are usually 4-byte aligned, however longs and doubles must be 8
+  // bytes aligned. Add padding to maintain 8-byte alignment invariant.
+  //
+  // Compute padding to ensure longs and doubles are not split in o32.
   size_t padding = 0;
-  for (size_t cur_arg = IsStatic() ? 0 : 1, cur_reg = 2; cur_arg < NumArgs(); cur_arg++) {
+  size_t cur_arg, cur_reg;
+  if (LIKELY(HasExtraArgumentsForJni())) {
+    // Ignore the 'this' jobject or jclass for static methods and the JNIEnv.
+    // We start at the aligned register A2.
+    //
+    // Ignore the first 2 parameters because they are guaranteed to be aligned.
+    cur_arg = NumImplicitArgs();  // Skip the "this" argument.
+    cur_reg = 2;  // Skip {A0=JNIEnv, A1=jobject} / {A0=JNIEnv, A1=jclass} parameters (start at A2).
+  } else {
+    // Check every parameter.
+    cur_arg = 0;
+    cur_reg = 0;
+  }
+
+  // Shift across a logical register mapping that looks like:
+  //
+  //   | A0 | A1 | A2 | A3 | SP+16 | SP+20 | SP+24 | ... | SP+n | SP+n+4 |
+  //
+  //   or some of variants with floating-point registers (F12 and F14), for example
+  //
+  //   | F12     | F14 | A3 | SP+16 | SP+20 | SP+24 | ... | SP+n | SP+n+4 |
+  //
+  //   (where SP is the stack pointer at the start of called function).
+  //
+  // Any time there would normally be a long/double in an odd logical register,
+  // we have to push out the rest of the mappings by 4 bytes to maintain an 8-byte alignment.
+  //
+  // This works for both physical register pairs {A0, A1}, {A2, A3},
+  // floating-point registers F12, F14 and for when the value is on the stack.
+  //
+  // For example:
+  // (a) long would normally go into A1, but we shift it into A2
+  //  | INT | (PAD) | LONG    |
+  //  | A0  |  A1   | A2 | A3 |
+  //
+  // (b) long would normally go into A3, but we shift it into SP
+  //  | INT | INT | INT | (PAD) | LONG        |
+  //  | A0  | A1  | A2  |  A3   | SP+16 SP+20 |
+  //
+  // where INT is any <=4 byte arg, and LONG is any 8-byte arg.
+  for (; cur_arg < NumArgs(); cur_arg++) {
     if (IsParamALongOrDouble(cur_arg)) {
       if ((cur_reg & 1) != 0) {
         padding += 4;
-        cur_reg++;  // additional bump to ensure alignment
+        cur_reg++;   // Additional bump to ensure alignment.
       }
-      cur_reg++;  // additional bump to skip extra long word
+      cur_reg += 2;  // Bump the iterator twice for every long argument.
+    } else {
+      cur_reg++;     // Bump the iterator for every argument.
     }
-    cur_reg++;  // bump the iterator for every argument
   }
-  padding_ = padding;
+  if (cur_reg < kMaxIntLikeRegisterArguments) {
+    // As a special case when, as a result of shifting (or not) there are no arguments on the stack,
+    // we actually have 0 stack padding.
+    //
+    // For example with @CriticalNative and:
+    // (int, long) -> shifts the long but doesn't need to pad the stack
+    //
+    //          shift
+    //           \/
+    //  | INT | (PAD) | LONG      | (EMPTY) ...
+    //  | r0  |  r1   |  r2  | r3 |   SP    ...
+    //                                /\
+    //                          no stack padding
+    padding_ = 0;
+  } else {
+    padding_ = padding;
+  }
+
+  // Argument Passing (3-17):
+  //   "When the first argument is integral, the remaining arguments are passed in the integer
+  // registers."
+  //
+  //   "The rules that determine which arguments go into registers and which ones must be passed on
+  // the stack are most easily explained by considering the list of arguments as a structure,
+  // aligned according to normal structure rules. Mapping of this structure into the combination of
+  // stack and registers is as follows: up to two leading floating-point arguments can be passed in
+  // $f12 and $f14; everything else with a structure offset greater than or equal to 16 is passed on
+  // the stack. The remainder of the arguments are passed in $4..$7 based on their structure offset.
+  // Holes left in the structure for alignment are unused, whether in registers or in the stack."
+  //
+  // For example with @CriticalNative and:
+  // (a) first argument is not floating-point, so all go into integer registers
+  //  | INT | FLOAT | DOUBLE  |
+  //  | A0  |  A1   | A2 | A3 |
+  // (b) first argument is floating-point, but 2nd is integer
+  //  | FLOAT | INT | DOUBLE  |
+  //  |  F12  | A1  | A2 | A3 |
+  // (c) first two arguments are floating-point (float, double)
+  //  | FLAOT | (PAD) | DOUBLE |  INT  |
+  //  |  F12  |       |  F14   | SP+16 |
+  // (d) first two arguments are floating-point (double, float)
+  //  | DOUBLE | FLOAT | INT |
+  //  |  F12   |  F14  | A3  |
+  // (e) first three arguments are floating-point, but just first two will go into fp registers
+  //  | DOUBLE | FLOAT | FLOAT |
+  //  |  F12   |  F14  |  A3   |
+  //
+  // Find out if the first argument is a floating-point. In that case, floating-point registers will
+  // be used for up to two leading floating-point arguments. Otherwise, all arguments will be passed
+  // using integer registers.
+  use_fp_arg_registers_ = false;
+  if (is_critical_native) {
+    if (NumArgs() > 0) {
+      if (IsParamAFloatOrDouble(0)) {
+        use_fp_arg_registers_ = true;
+      }
+    }
+  }
 }
 
 uint32_t MipsJniCallingConvention::CoreSpillMask() const {
@@ -202,74 +320,127 @@
 }
 
 size_t MipsJniCallingConvention::FrameSize() {
-  // ArtMethod*, RA and callee save area size, local reference segment state
-  size_t frame_data_size = static_cast<size_t>(kMipsPointerSize) +
-      (2 + CalleeSaveRegisters().size()) * kFramePointerSize;
-  // References plus 2 words for HandleScope header
-  size_t handle_scope_size = HandleScope::SizeOf(kMipsPointerSize, ReferenceCount());
-  // Plus return value spill area size
-  return RoundUp(frame_data_size + handle_scope_size + SizeOfReturnValue(), kStackAlignment);
+  // ArtMethod*, RA and callee save area size, local reference segment state.
+  const size_t method_ptr_size = static_cast<size_t>(kMipsPointerSize);
+  const size_t ra_return_addr_size = kFramePointerSize;
+  const size_t callee_save_area_size = CalleeSaveRegisters().size() * kFramePointerSize;
+
+  size_t frame_data_size = method_ptr_size + ra_return_addr_size + callee_save_area_size;
+
+  if (LIKELY(HasLocalReferenceSegmentState())) {
+    // Local reference segment state.
+    frame_data_size += kFramePointerSize;
+  }
+
+  // References plus 2 words for HandleScope header.
+  const size_t handle_scope_size = HandleScope::SizeOf(kMipsPointerSize, ReferenceCount());
+
+  size_t total_size = frame_data_size;
+  if (LIKELY(HasHandleScope())) {
+    // HandleScope is sometimes excluded.
+    total_size += handle_scope_size;    // Handle scope size.
+  }
+
+  // Plus return value spill area size.
+  total_size += SizeOfReturnValue();
+
+  return RoundUp(total_size, kStackAlignment);
 }
 
 size_t MipsJniCallingConvention::OutArgSize() {
-  return RoundUp(NumberOfOutgoingStackArgs() * kFramePointerSize + padding_, kStackAlignment);
+  // Argument Passing (3-17):
+  //   "Despite the fact that some or all of the arguments to a function are passed in registers,
+  // always allocate space on the stack for all arguments. This stack space should be a structure
+  // large enough to contain all the arguments, aligned according to normal structure rules (after
+  // promotion and structure return pointer insertion). The locations within the stack frame used
+  // for arguments are called the home locations."
+  //
+  // Allocate 16 bytes for home locations + space needed for stack arguments.
+  return RoundUp(
+      (kMaxIntLikeRegisterArguments + NumberOfOutgoingStackArgs()) * kFramePointerSize + padding_,
+      kStackAlignment);
 }
 
 ArrayRef<const ManagedRegister> MipsJniCallingConvention::CalleeSaveRegisters() const {
   return ArrayRef<const ManagedRegister>(kCalleeSaveRegisters);
 }
 
-// JniCallingConvention ABI follows AAPCS where longs and doubles must occur
-// in even register numbers and stack slots
+// JniCallingConvention ABI follows o32 where longs and doubles must occur
+// in even register numbers and stack slots.
 void MipsJniCallingConvention::Next() {
   JniCallingConvention::Next();
-  size_t arg_pos = itr_args_ - NumberOfExtraArgumentsForJni();
-  if ((itr_args_ >= 2) &&
-      (arg_pos < NumArgs()) &&
-      IsParamALongOrDouble(arg_pos)) {
-    // itr_slots_ needs to be an even number, according to AAPCS.
-    if ((itr_slots_ & 0x1u) != 0) {
+
+  if (LIKELY(HasNext())) {  // Avoid CHECK failure for IsCurrentParam
+    // Ensure slot is 8-byte aligned for longs/doubles (o32).
+    if (IsCurrentParamALongOrDouble() && ((itr_slots_ & 0x1u) != 0)) {
+      // itr_slots_ needs to be an even number, according to o32.
       itr_slots_++;
     }
   }
 }
 
 bool MipsJniCallingConvention::IsCurrentParamInRegister() {
-  return itr_slots_ < 4;
+  // Argument Passing (3-17):
+  //   "The rules that determine which arguments go into registers and which ones must be passed on
+  // the stack are most easily explained by considering the list of arguments as a structure,
+  // aligned according to normal structure rules. Mapping of this structure into the combination of
+  // stack and registers is as follows: up to two leading floating-point arguments can be passed in
+  // $f12 and $f14; everything else with a structure offset greater than or equal to 16 is passed on
+  // the stack. The remainder of the arguments are passed in $4..$7 based on their structure offset.
+  // Holes left in the structure for alignment are unused, whether in registers or in the stack."
+  //
+  // Even when floating-point registers are used, there can be up to 4 arguments passed in
+  // registers.
+  return itr_slots_ < kMaxIntLikeRegisterArguments;
 }
 
 bool MipsJniCallingConvention::IsCurrentParamOnStack() {
   return !IsCurrentParamInRegister();
 }
 
-static const Register kJniArgumentRegisters[] = {
-  A0, A1, A2, A3
-};
 ManagedRegister MipsJniCallingConvention::CurrentParamRegister() {
-  CHECK_LT(itr_slots_, 4u);
-  int arg_pos = itr_args_ - NumberOfExtraArgumentsForJni();
-  if ((itr_args_ >= 2) && IsParamALongOrDouble(arg_pos)) {
-    CHECK_EQ(itr_slots_, 2u);
-    return MipsManagedRegister::FromRegisterPair(A2_A3);
+  CHECK_LT(itr_slots_, kMaxIntLikeRegisterArguments);
+  // Up to two leading floating-point arguments can be passed in floating-point registers.
+  if (use_fp_arg_registers_ && (itr_args_ < kMaxFloatOrDoubleRegisterArguments)) {
+    if (IsCurrentParamAFloatOrDouble()) {
+      if (IsCurrentParamADouble()) {
+        return MipsManagedRegister::FromDRegister(kDArgumentRegisters[itr_args_]);
+      } else {
+        return MipsManagedRegister::FromFRegister(kFArgumentRegisters[itr_args_]);
+      }
+    }
+  }
+  // All other arguments (including other floating-point arguments) will be passed in integer
+  // registers.
+  if (IsCurrentParamALongOrDouble()) {
+    if (itr_slots_ == 0u) {
+      return MipsManagedRegister::FromRegisterPair(A0_A1);
+    } else {
+      CHECK_EQ(itr_slots_, 2u);
+      return MipsManagedRegister::FromRegisterPair(A2_A3);
+    }
   } else {
-    return
-      MipsManagedRegister::FromCoreRegister(kJniArgumentRegisters[itr_slots_]);
+    return MipsManagedRegister::FromCoreRegister(kCoreArgumentRegisters[itr_slots_]);
   }
 }
 
 FrameOffset MipsJniCallingConvention::CurrentParamStackOffset() {
-  CHECK_GE(itr_slots_, 4u);
+  CHECK_GE(itr_slots_, kMaxIntLikeRegisterArguments);
   size_t offset = displacement_.Int32Value() - OutArgSize() + (itr_slots_ * kFramePointerSize);
   CHECK_LT(offset, OutArgSize());
   return FrameOffset(offset);
 }
 
 size_t MipsJniCallingConvention::NumberOfOutgoingStackArgs() {
-  size_t static_args = IsStatic() ? 1 : 0;  // count jclass
-  // regular argument parameters and this
-  size_t param_args = NumArgs() + NumLongOrDoubleArgs();
-  // count JNIEnv*
-  return static_args + param_args + 1;
+  size_t static_args = HasSelfClass() ? 1 : 0;            // Count jclass.
+  // Regular argument parameters and this.
+  size_t param_args = NumArgs() + NumLongOrDoubleArgs();  // Twice count 8-byte args.
+  // Count JNIEnv* less arguments in registers.
+  size_t internal_args = (HasJniEnv() ? 1 : 0);
+  size_t total_args = static_args + param_args + internal_args;
+
+  return total_args - std::min(kMaxIntLikeRegisterArguments, static_cast<size_t>(total_args));
 }
+
 }  // namespace mips
 }  // namespace art
diff --git a/compiler/jni/quick/mips/calling_convention_mips.h b/compiler/jni/quick/mips/calling_convention_mips.h
index e95a738..ad3f118 100644
--- a/compiler/jni/quick/mips/calling_convention_mips.h
+++ b/compiler/jni/quick/mips/calling_convention_mips.h
@@ -54,14 +54,17 @@
 
 class MipsJniCallingConvention FINAL : public JniCallingConvention {
  public:
-  MipsJniCallingConvention(bool is_static, bool is_synchronized, const char* shorty);
+  MipsJniCallingConvention(bool is_static,
+                           bool is_synchronized,
+                           bool is_critical_native,
+                           const char* shorty);
   ~MipsJniCallingConvention() OVERRIDE {}
   // Calling convention
   ManagedRegister ReturnRegister() OVERRIDE;
   ManagedRegister IntReturnRegister() OVERRIDE;
   ManagedRegister InterproceduralScratchRegister() OVERRIDE;
   // JNI calling convention
-  void Next() OVERRIDE;  // Override default behavior for AAPCS
+  void Next() OVERRIDE;  // Override default behavior for o32.
   size_t FrameSize() OVERRIDE;
   size_t OutArgSize() OVERRIDE;
   ArrayRef<const ManagedRegister> CalleeSaveRegisters() const OVERRIDE;
@@ -82,8 +85,9 @@
   size_t NumberOfOutgoingStackArgs() OVERRIDE;
 
  private:
-  // Padding to ensure longs and doubles are not split in AAPCS
+  // Padding to ensure longs and doubles are not split in o32.
   size_t padding_;
+  size_t use_fp_arg_registers_;
 
   DISALLOW_COPY_AND_ASSIGN(MipsJniCallingConvention);
 };
diff --git a/compiler/jni/quick/mips64/calling_convention_mips64.cc b/compiler/jni/quick/mips64/calling_convention_mips64.cc
index 8341e8e..afe6a76 100644
--- a/compiler/jni/quick/mips64/calling_convention_mips64.cc
+++ b/compiler/jni/quick/mips64/calling_convention_mips64.cc
@@ -23,6 +23,9 @@
 namespace art {
 namespace mips64 {
 
+// Up to kow many args can be enregistered. The rest of the args must go on the stack.
+constexpr size_t kMaxRegisterArguments = 8u;
+
 static const GpuRegister kGpuArgumentRegisters[] = {
   A0, A1, A2, A3, A4, A5, A6, A7
 };
@@ -150,9 +153,15 @@
 
 // JNI calling convention
 
-Mips64JniCallingConvention::Mips64JniCallingConvention(bool is_static, bool is_synchronized,
+Mips64JniCallingConvention::Mips64JniCallingConvention(bool is_static,
+                                                       bool is_synchronized,
+                                                       bool is_critical_native,
                                                        const char* shorty)
-    : JniCallingConvention(is_static, is_synchronized, shorty, kMips64PointerSize) {
+    : JniCallingConvention(is_static,
+                           is_synchronized,
+                           is_critical_native,
+                           shorty,
+                           kMips64PointerSize) {
 }
 
 uint32_t Mips64JniCallingConvention::CoreSpillMask() const {
@@ -168,13 +177,28 @@
 }
 
 size_t Mips64JniCallingConvention::FrameSize() {
-  // ArtMethod*, RA and callee save area size, local reference segment state
-  size_t frame_data_size = kFramePointerSize +
-      (CalleeSaveRegisters().size() + 1) * kFramePointerSize + sizeof(uint32_t);
-  // References plus 2 words for HandleScope header
+  // ArtMethod*, RA and callee save area size, local reference segment state.
+  size_t method_ptr_size = static_cast<size_t>(kFramePointerSize);
+  size_t ra_and_callee_save_area_size = (CalleeSaveRegisters().size() + 1) * kFramePointerSize;
+
+  size_t frame_data_size = method_ptr_size + ra_and_callee_save_area_size;
+  if (LIKELY(HasLocalReferenceSegmentState())) {                     // Local ref. segment state.
+    // Local reference segment state is sometimes excluded.
+    frame_data_size += sizeof(uint32_t);
+  }
+  // References plus 2 words for HandleScope header.
   size_t handle_scope_size = HandleScope::SizeOf(kMips64PointerSize, ReferenceCount());
-  // Plus return value spill area size
-  return RoundUp(frame_data_size + handle_scope_size + SizeOfReturnValue(), kStackAlignment);
+
+  size_t total_size = frame_data_size;
+  if (LIKELY(HasHandleScope())) {
+    // HandleScope is sometimes excluded.
+    total_size += handle_scope_size;                                 // Handle scope size.
+  }
+
+  // Plus return value spill area size.
+  total_size += SizeOfReturnValue();
+
+  return RoundUp(total_size, kStackAlignment);
 }
 
 size_t Mips64JniCallingConvention::OutArgSize() {
@@ -186,7 +210,7 @@
 }
 
 bool Mips64JniCallingConvention::IsCurrentParamInRegister() {
-  return itr_args_ < 8;
+  return itr_args_ < kMaxRegisterArguments;
 }
 
 bool Mips64JniCallingConvention::IsCurrentParamOnStack() {
@@ -204,7 +228,8 @@
 
 FrameOffset Mips64JniCallingConvention::CurrentParamStackOffset() {
   CHECK(IsCurrentParamOnStack());
-  size_t offset = displacement_.Int32Value() - OutArgSize() + ((itr_args_ - 8) * kFramePointerSize);
+  size_t args_on_stack = itr_args_ - kMaxRegisterArguments;
+  size_t offset = displacement_.Int32Value() - OutArgSize() + (args_on_stack * kFramePointerSize);
   CHECK_LT(offset, OutArgSize());
   return FrameOffset(offset);
 }
@@ -214,7 +239,7 @@
   size_t all_args = NumArgs() + NumberOfExtraArgumentsForJni();
 
   // Nothing on the stack unless there are more than 8 arguments
-  return (all_args > 8) ? all_args - 8 : 0;
+  return (all_args > kMaxRegisterArguments) ? all_args - kMaxRegisterArguments : 0;
 }
 }  // namespace mips64
 }  // namespace art
diff --git a/compiler/jni/quick/mips64/calling_convention_mips64.h b/compiler/jni/quick/mips64/calling_convention_mips64.h
index a5fd111..faedaef 100644
--- a/compiler/jni/quick/mips64/calling_convention_mips64.h
+++ b/compiler/jni/quick/mips64/calling_convention_mips64.h
@@ -54,7 +54,10 @@
 
 class Mips64JniCallingConvention FINAL : public JniCallingConvention {
  public:
-  Mips64JniCallingConvention(bool is_static, bool is_synchronized, const char* shorty);
+  Mips64JniCallingConvention(bool is_static,
+                             bool is_synchronized,
+                             bool is_critical_native,
+                             const char* shorty);
   ~Mips64JniCallingConvention() OVERRIDE {}
   // Calling convention
   ManagedRegister ReturnRegister() OVERRIDE;
diff --git a/compiler/linker/relative_patcher_test.h b/compiler/linker/relative_patcher_test.h
index 0151789..233daf4 100644
--- a/compiler/linker/relative_patcher_test.h
+++ b/compiler/linker/relative_patcher_test.h
@@ -163,7 +163,8 @@
                                                offset + patch.LiteralOffset(),
                                                target_offset);
           } else if (patch.GetType() == LinkerPatch::Type::kStringRelative) {
-            uint32_t target_offset = string_index_to_offset_map_.Get(patch.TargetStringIndex());
+            uint32_t target_offset =
+                string_index_to_offset_map_.Get(patch.TargetStringIndex().index_);
             patcher_->PatchPcRelativeReference(&patched_code_,
                                                patch,
                                                offset + patch.LiteralOffset(),
diff --git a/compiler/oat_test.cc b/compiler/oat_test.cc
index 64ee574..9458576 100644
--- a/compiler/oat_test.cc
+++ b/compiler/oat_test.cc
@@ -125,7 +125,9 @@
                 SafeMap<std::string, std::string>& key_value_store,
                 bool verify) {
     TimingLogger timings("WriteElf", false, false);
-    OatWriter oat_writer(/*compiling_boot_image*/false, &timings);
+    OatWriter oat_writer(/*compiling_boot_image*/false,
+                         &timings,
+                         /*profile_compilation_info*/nullptr);
     for (const DexFile* dex_file : dex_files) {
       ArrayRef<const uint8_t> raw_dex_file(
           reinterpret_cast<const uint8_t*>(&dex_file->GetHeader()),
@@ -145,7 +147,9 @@
                 SafeMap<std::string, std::string>& key_value_store,
                 bool verify) {
     TimingLogger timings("WriteElf", false, false);
-    OatWriter oat_writer(/*compiling_boot_image*/false, &timings);
+    OatWriter oat_writer(/*compiling_boot_image*/false,
+                         &timings,
+                         /*profile_compilation_info*/nullptr);
     for (const char* dex_filename : dex_filenames) {
       if (!oat_writer.AddDexFileSource(dex_filename, dex_filename)) {
         return false;
@@ -161,7 +165,9 @@
                 SafeMap<std::string, std::string>& key_value_store,
                 bool verify) {
     TimingLogger timings("WriteElf", false, false);
-    OatWriter oat_writer(/*compiling_boot_image*/false, &timings);
+    OatWriter oat_writer(/*compiling_boot_image*/false,
+                         &timings,
+                         /*profile_compilation_info*/nullptr);
     if (!oat_writer.AddZippedDexFilesSource(std::move(zip_fd), location)) {
       return false;
     }
@@ -377,7 +383,8 @@
   if (kCompile) {
     TimingLogger timings2("OatTest::WriteRead", false, false);
     compiler_driver_->SetDexFilesForOatFile(class_linker->GetBootClassPath());
-    compiler_driver_->CompileAll(class_loader, class_linker->GetBootClassPath(), &timings2);
+    compiler_driver_->CompileAll(
+        class_loader, class_linker->GetBootClassPath(), /* verifier_deps */ nullptr, &timings2);
   }
 
   ScratchFile tmp_oat, tmp_vdex(tmp_oat, ".vdex");
@@ -391,7 +398,8 @@
   ASSERT_TRUE(success);
 
   if (kCompile) {  // OatWriter strips the code, regenerate to compare
-    compiler_driver_->CompileAll(class_loader, class_linker->GetBootClassPath(), &timings);
+    compiler_driver_->CompileAll(
+        class_loader, class_linker->GetBootClassPath(), /* verifier_deps */ nullptr, &timings);
   }
   std::unique_ptr<OatFile> oat_file(OatFile::Open(tmp_oat.GetFilename(),
                                                   tmp_oat.GetFilename(),
@@ -515,7 +523,7 @@
                                   soa.Decode<mirror::ClassLoader>(class_loader).Ptr());
   }
   compiler_driver_->SetDexFilesForOatFile(dex_files);
-  compiler_driver_->CompileAll(class_loader, dex_files, &timings);
+  compiler_driver_->CompileAll(class_loader, dex_files, /* verifier_deps */ nullptr, &timings);
 
   ScratchFile tmp_oat, tmp_vdex(tmp_oat, ".vdex");
   SafeMap<std::string, std::string> key_value_store;
diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc
index 6cbca7a..a7a451f 100644
--- a/compiler/oat_writer.cc
+++ b/compiler/oat_writer.cc
@@ -33,6 +33,7 @@
 #include "debug/method_debug_info.h"
 #include "dex/verification_results.h"
 #include "dex_file-inl.h"
+#include "dexlayout.h"
 #include "driver/compiler_driver.h"
 #include "driver/compiler_options.h"
 #include "gc/space/image_space.h"
@@ -260,16 +261,7 @@
   // Data to write to a separate section.
   dchecked_vector<uint32_t> class_offsets_;
 
-  void InitTypeLookupTable(const DexFile& dex_file, uint8_t* storage) const {
-    lookup_table_.reset(TypeLookupTable::Create(dex_file, storage));
-  }
-
-  TypeLookupTable* GetTypeLookupTable() const {
-    return lookup_table_.get();
-  }
-
  private:
-  mutable std::unique_ptr<TypeLookupTable> lookup_table_;
   size_t GetClassOffsetsRawSize() const {
     return class_offsets_.size() * sizeof(class_offsets_[0]);
   }
@@ -285,7 +277,7 @@
   DCHECK_EQ(static_cast<off_t>(file_offset + offset_), out->Seek(0, kSeekCurrent)) \
     << "file_offset=" << file_offset << " offset_=" << offset_
 
-OatWriter::OatWriter(bool compiling_boot_image, TimingLogger* timings)
+OatWriter::OatWriter(bool compiling_boot_image, TimingLogger* timings, ProfileCompilationInfo* info)
   : write_state_(WriteState::kAddingDexFileSources),
     timings_(timings),
     raw_dex_files_(),
@@ -346,7 +338,8 @@
     size_oat_class_method_bitmaps_(0),
     size_oat_class_method_offsets_(0),
     relative_patcher_(nullptr),
-    absolute_patch_locations_() {
+    absolute_patch_locations_(),
+    profile_compilation_info_(info) {
 }
 
 bool OatWriter::AddDexFileSource(const char* filename,
@@ -410,6 +403,35 @@
   return true;
 }
 
+// Add dex file source(s) from a vdex file specified by a file handle.
+bool OatWriter::AddVdexDexFilesSource(const VdexFile& vdex_file,
+                                      const char* location,
+                                      CreateTypeLookupTable create_type_lookup_table) {
+  DCHECK(write_state_ == WriteState::kAddingDexFileSources);
+  const uint8_t* current_dex_data = nullptr;
+  for (size_t i = 0; ; ++i) {
+    current_dex_data = vdex_file.GetNextDexFileData(current_dex_data);
+    if (current_dex_data == nullptr) {
+      break;
+    }
+    if (!DexFile::IsMagicValid(current_dex_data)) {
+      LOG(ERROR) << "Invalid magic in vdex file created from " << location;
+      return false;
+    }
+    // We used `zipped_dex_file_locations_` to keep the strings in memory.
+    zipped_dex_file_locations_.push_back(DexFile::GetMultiDexLocation(i, location));
+    const char* full_location = zipped_dex_file_locations_.back().c_str();
+    oat_dex_files_.emplace_back(full_location,
+                                DexFileSource(current_dex_data),
+                                create_type_lookup_table);
+  }
+  if (oat_dex_files_.empty()) {
+    LOG(ERROR) << "No dex files in vdex file created from " << location;
+    return false;
+  }
+  return true;
+}
+
 // Add dex file source from raw memory.
 bool OatWriter::AddRawDexFileSource(const ArrayRef<const uint8_t>& data,
                                     const char* location,
@@ -771,7 +793,7 @@
       // Update quick method header.
       DCHECK_LT(method_offsets_index_, oat_class->method_headers_.size());
       OatQuickMethodHeader* method_header = &oat_class->method_headers_[method_offsets_index_];
-      uint32_t vmap_table_offset = method_header->vmap_table_offset_;
+      uint32_t vmap_table_offset = method_header->GetVmapTableOffset();
       // The code offset was 0 when the mapping/vmap table offset was set, so it's set
       // to 0-offset and we need to adjust it by code_offset.
       uint32_t code_offset = quick_code_offset - thumb_offset;
@@ -913,7 +935,7 @@
       // If vdex is enabled, we only emit the stack map of compiled code. The quickening info will
       // be in the vdex file.
       if (!compiled_method->GetQuickCode().empty() || !kIsVdexEnabled) {
-        DCHECK_EQ(oat_class->method_headers_[method_offsets_index_].vmap_table_offset_, 0u);
+        DCHECK_EQ(oat_class->method_headers_[method_offsets_index_].GetVmapTableOffset(), 0u);
 
         ArrayRef<const uint8_t> map = compiled_method->GetVmapTable();
         uint32_t map_size = map.size() * sizeof(map[0]);
@@ -927,7 +949,7 @@
               });
           // Code offset is not initialized yet, so set the map offset to 0u-offset.
           DCHECK_EQ(oat_class->method_offsets_[method_offsets_index_].code_offset_, 0u);
-          oat_class->method_headers_[method_offsets_index_].vmap_table_offset_ = 0u - offset;
+          oat_class->method_headers_[method_offsets_index_].SetVmapTableOffset(0u - offset);
         }
       }
       ++method_offsets_index_;
@@ -1384,7 +1406,7 @@
       size_t file_offset = file_offset_;
       OutputStream* out = out_;
 
-      uint32_t map_offset = oat_class->method_headers_[method_offsets_index_].vmap_table_offset_;
+      uint32_t map_offset = oat_class->method_headers_[method_offsets_index_].GetVmapTableOffset();
       uint32_t code_offset = oat_class->method_offsets_[method_offsets_index_].code_offset_;
       ++method_offsets_index_;
 
@@ -1759,7 +1781,7 @@
   }
 
   std::vector<uint8_t> buffer;
-  verifier_deps->Encode(&buffer);
+  verifier_deps->Encode(*dex_files_, &buffer);
 
   if (!vdex_out->WriteFully(buffer.data(), buffer.size())) {
     PLOG(ERROR) << "Failed to write verifier deps."
@@ -2090,7 +2112,11 @@
   if (!SeekToDexFile(out, file, oat_dex_file)) {
     return false;
   }
-  if (oat_dex_file->source_.IsZipEntry()) {
+  if (profile_compilation_info_ != nullptr) {
+    if (!LayoutAndWriteDexFile(out, oat_dex_file)) {
+      return false;
+    }
+  } else if (oat_dex_file->source_.IsZipEntry()) {
     if (!WriteDexFile(out, file, oat_dex_file, oat_dex_file->source_.GetZipEntry())) {
       return false;
     }
@@ -2155,6 +2181,39 @@
   return true;
 }
 
+bool OatWriter::LayoutAndWriteDexFile(OutputStream* out, OatDexFile* oat_dex_file) {
+  TimingLogger::ScopedTiming split("Dex Layout", timings_);
+  std::string error_msg;
+  std::string location(oat_dex_file->GetLocation());
+  std::unique_ptr<const DexFile> dex_file;
+  if (oat_dex_file->source_.IsZipEntry()) {
+    ZipEntry* zip_entry = oat_dex_file->source_.GetZipEntry();
+    std::unique_ptr<MemMap> mem_map(
+        zip_entry->ExtractToMemMap(location.c_str(), "classes.dex", &error_msg));
+    dex_file = DexFile::Open(location,
+                             zip_entry->GetCrc32(),
+                             std::move(mem_map),
+                             /* verify */ true,
+                             /* verify_checksum */ true,
+                             &error_msg);
+  } else {
+    DCHECK(oat_dex_file->source_.IsRawFile());
+    File* raw_file = oat_dex_file->source_.GetRawFile();
+    dex_file = DexFile::OpenDex(raw_file->Fd(), location, /* verify_checksum */ true, &error_msg);
+  }
+  Options options;
+  options.output_to_memmap_ = true;
+  DexLayout dex_layout(options, profile_compilation_info_, nullptr);
+  dex_layout.ProcessDexFile(location.c_str(), dex_file.get(), 0);
+  std::unique_ptr<MemMap> mem_map(dex_layout.GetAndReleaseMemMap());
+  if (!WriteDexFile(out, oat_dex_file, mem_map->Begin())) {
+    return false;
+  }
+  // Set the checksum of the new oat dex file to be the original file's checksum.
+  oat_dex_file->dex_file_location_checksum_ = dex_file->GetLocationChecksum();
+  return true;
+}
+
 bool OatWriter::WriteDexFile(OutputStream* out,
                              File* file,
                              OatDexFile* oat_dex_file,
@@ -2481,8 +2540,15 @@
 
     // Create the lookup table. When `nullptr` is given as the storage buffer,
     // TypeLookupTable allocates its own and OatDexFile takes ownership.
-    oat_dex_file->InitTypeLookupTable(*opened_dex_files[i], /* storage */ nullptr);
-    TypeLookupTable* table = oat_dex_file->GetTypeLookupTable();
+    const DexFile& dex_file = *opened_dex_files[i];
+    {
+      std::unique_ptr<TypeLookupTable> type_lookup_table =
+          TypeLookupTable::Create(dex_file, /* storage */ nullptr);
+      type_lookup_table_oat_dex_files_.push_back(
+          std::make_unique<art::OatDexFile>(std::move(type_lookup_table)));
+      dex_file.SetOatDexFile(type_lookup_table_oat_dex_files_.back().get());
+    }
+    TypeLookupTable* const table = type_lookup_table_oat_dex_files_.back()->GetTypeLookupTable();
 
     // Type tables are required to be 4 byte aligned.
     size_t initial_offset = oat_size_;
diff --git a/compiler/oat_writer.h b/compiler/oat_writer.h
index 3d08ad3..0dcf79e 100644
--- a/compiler/oat_writer.h
+++ b/compiler/oat_writer.h
@@ -38,9 +38,11 @@
 class CompiledMethod;
 class CompilerDriver;
 class ImageWriter;
+class ProfileCompilationInfo;
 class OutputStream;
 class TimingLogger;
 class TypeLookupTable;
+class VdexFile;
 class ZipEntry;
 
 namespace debug {
@@ -110,12 +112,13 @@
     kDefault = kCreate
   };
 
-  OatWriter(bool compiling_boot_image, TimingLogger* timings);
+  OatWriter(bool compiling_boot_image, TimingLogger* timings, ProfileCompilationInfo* info);
 
   // To produce a valid oat file, the user must first add sources with any combination of
   //   - AddDexFileSource(),
   //   - AddZippedDexFilesSource(),
-  //   - AddRawDexFileSource().
+  //   - AddRawDexFileSource(),
+  //   - AddVdexDexFilesSource().
   // Then the user must call in order
   //   - WriteAndOpenDexFiles()
   //   - Initialize()
@@ -144,6 +147,11 @@
       const char* location,
       uint32_t location_checksum,
       CreateTypeLookupTable create_type_lookup_table = CreateTypeLookupTable::kDefault);
+  // Add dex file source(s) from a vdex file.
+  bool AddVdexDexFilesSource(
+      const VdexFile& vdex_file,
+      const char* location,
+      CreateTypeLookupTable create_type_lookup_table = CreateTypeLookupTable::kDefault);
   dchecked_vector<const char*> GetSourceLocations() const;
 
   // Write raw dex files to the vdex file, mmap the file and open the dex files from it.
@@ -258,6 +266,7 @@
   bool WriteDexFiles(OutputStream* out, File* file);
   bool WriteDexFile(OutputStream* out, File* file, OatDexFile* oat_dex_file);
   bool SeekToDexFile(OutputStream* out, File* file, OatDexFile* oat_dex_file);
+  bool LayoutAndWriteDexFile(OutputStream* out, OatDexFile* oat_dex_file);
   bool WriteDexFile(OutputStream* out,
                     File* file,
                     OatDexFile* oat_dex_file,
@@ -363,6 +372,9 @@
   // Offset of the oat data from the start of the mmapped region of the elf file.
   size_t oat_data_offset_;
 
+  // Fake OatDexFiles to hold type lookup tables for the compiler.
+  std::vector<std::unique_ptr<art::OatDexFile>> type_lookup_table_oat_dex_files_;
+
   // data to write
   std::unique_ptr<OatHeader> oat_header_;
   dchecked_vector<OatDexFile> oat_dex_files_;
@@ -419,6 +431,9 @@
   // The locations of absolute patches relative to the start of the executable section.
   dchecked_vector<uintptr_t> absolute_patch_locations_;
 
+  // Profile info used to generate new layout of files.
+  ProfileCompilationInfo* profile_compilation_info_;
+
   DISALLOW_COPY_AND_ASSIGN(OatWriter);
 };
 
diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc
index d2357a5..7dc094b 100644
--- a/compiler/optimizing/bounds_check_elimination.cc
+++ b/compiler/optimizing/bounds_check_elimination.cc
@@ -548,7 +548,21 @@
   void VisitBasicBlock(HBasicBlock* block) OVERRIDE {
     DCHECK(!IsAddedBlock(block));
     first_index_bounds_check_map_.clear();
-    HGraphVisitor::VisitBasicBlock(block);
+    // Visit phis and instructions using a safe iterator. The iteration protects
+    // against deleting the current instruction during iteration. However, it
+    // must advance next_ if that instruction is deleted during iteration.
+    for (HInstruction* instruction = block->GetFirstPhi(); instruction != nullptr;) {
+      DCHECK(instruction->IsInBlock());
+      next_ = instruction->GetNext();
+      instruction->Accept(this);
+      instruction = next_;
+    }
+    for (HInstruction* instruction = block->GetFirstInstruction(); instruction != nullptr;) {
+      DCHECK(instruction->IsInBlock());
+      next_ = instruction->GetNext();
+      instruction->Accept(this);
+      instruction = next_;
+    }
     // We should never deoptimize from an osr method, otherwise we might wrongly optimize
     // code dominated by the deoptimization.
     if (!GetGraph()->IsCompilingOsr()) {
@@ -1798,7 +1812,12 @@
   }
 
   /** Helper method to replace an instruction with another instruction. */
-  static void ReplaceInstruction(HInstruction* instruction, HInstruction* replacement) {
+  void ReplaceInstruction(HInstruction* instruction, HInstruction* replacement) {
+    // Safe iteration.
+    if (instruction == next_) {
+      next_ = next_->GetNext();
+    }
+    // Replace and remove.
     instruction->ReplaceWith(replacement);
     instruction->GetBlock()->RemoveInstruction(instruction);
   }
@@ -1831,6 +1850,9 @@
   // Range analysis based on induction variables.
   InductionVarRange induction_range_;
 
+  // Safe iteration.
+  HInstruction* next_;
+
   DISALLOW_COPY_AND_ASSIGN(BCEVisitor);
 };
 
diff --git a/compiler/optimizing/bounds_check_elimination_test.cc b/compiler/optimizing/bounds_check_elimination_test.cc
index b7c24ff..dfa1504 100644
--- a/compiler/optimizing/bounds_check_elimination_test.cc
+++ b/compiler/optimizing/bounds_check_elimination_test.cc
@@ -70,9 +70,9 @@
   graph_->AddBlock(entry);
   graph_->SetEntryBlock(entry);
   HInstruction* parameter1 = new (&allocator_)
-      HParameterValue(graph_->GetDexFile(), 0, 0, Primitive::kPrimNot);  // array
+      HParameterValue(graph_->GetDexFile(), dex::TypeIndex(0), 0, Primitive::kPrimNot);  // array
   HInstruction* parameter2 = new (&allocator_)
-      HParameterValue(graph_->GetDexFile(), 0, 0, Primitive::kPrimInt);  // i
+      HParameterValue(graph_->GetDexFile(), dex::TypeIndex(0), 0, Primitive::kPrimInt);  // i
   entry->AddInstruction(parameter1);
   entry->AddInstruction(parameter2);
 
@@ -167,9 +167,9 @@
   graph_->AddBlock(entry);
   graph_->SetEntryBlock(entry);
   HInstruction* parameter1 = new (&allocator_)
-      HParameterValue(graph_->GetDexFile(), 0, 0, Primitive::kPrimNot);  // array
+      HParameterValue(graph_->GetDexFile(), dex::TypeIndex(0), 0, Primitive::kPrimNot);  // array
   HInstruction* parameter2 = new (&allocator_)
-      HParameterValue(graph_->GetDexFile(), 0, 0, Primitive::kPrimInt);  // i
+      HParameterValue(graph_->GetDexFile(), dex::TypeIndex(0), 0, Primitive::kPrimInt);  // i
   entry->AddInstruction(parameter1);
   entry->AddInstruction(parameter2);
 
@@ -231,9 +231,9 @@
   graph_->AddBlock(entry);
   graph_->SetEntryBlock(entry);
   HInstruction* parameter1 = new (&allocator_)
-      HParameterValue(graph_->GetDexFile(), 0, 0, Primitive::kPrimNot);  // array
+      HParameterValue(graph_->GetDexFile(), dex::TypeIndex(0), 0, Primitive::kPrimNot);  // array
   HInstruction* parameter2 = new (&allocator_)
-      HParameterValue(graph_->GetDexFile(), 0, 0, Primitive::kPrimInt);  // i
+      HParameterValue(graph_->GetDexFile(), dex::TypeIndex(0), 0, Primitive::kPrimInt);  // i
   entry->AddInstruction(parameter1);
   entry->AddInstruction(parameter2);
 
@@ -295,7 +295,7 @@
   graph_->AddBlock(entry);
   graph_->SetEntryBlock(entry);
   HInstruction* parameter = new (&allocator_) HParameterValue(
-      graph_->GetDexFile(), 0, 0, Primitive::kPrimNot);
+      graph_->GetDexFile(), dex::TypeIndex(0), 0, Primitive::kPrimNot);
   entry->AddInstruction(parameter);
 
   HInstruction* constant_5 = graph_->GetIntConstant(5);
@@ -364,7 +364,7 @@
   graph->AddBlock(entry);
   graph->SetEntryBlock(entry);
   HInstruction* parameter = new (allocator) HParameterValue(
-      graph->GetDexFile(), 0, 0, Primitive::kPrimNot);
+      graph->GetDexFile(), dex::TypeIndex(0), 0, Primitive::kPrimNot);
   entry->AddInstruction(parameter);
 
   HInstruction* constant_initial = graph->GetIntConstant(initial);
@@ -479,7 +479,7 @@
   graph->AddBlock(entry);
   graph->SetEntryBlock(entry);
   HInstruction* parameter = new (allocator) HParameterValue(
-      graph->GetDexFile(), 0, 0, Primitive::kPrimNot);
+      graph->GetDexFile(), dex::TypeIndex(0), 0, Primitive::kPrimNot);
   entry->AddInstruction(parameter);
 
   HInstruction* constant_initial = graph->GetIntConstant(initial);
@@ -600,7 +600,7 @@
       constant_10,
       graph->GetCurrentMethod(),
       0,
-      Primitive::kPrimInt,
+      dex::TypeIndex(static_cast<uint16_t>(Primitive::kPrimInt)),
       graph->GetDexFile(),
       kQuickAllocArray);
   block->AddInstruction(new_array);
@@ -692,7 +692,7 @@
   graph->AddBlock(entry);
   graph->SetEntryBlock(entry);
   HInstruction* parameter = new (allocator) HParameterValue(
-      graph->GetDexFile(), 0, 0, Primitive::kPrimNot);
+      graph->GetDexFile(), dex::TypeIndex(0), 0, Primitive::kPrimNot);
   entry->AddInstruction(parameter);
 
   HInstruction* constant_initial = graph->GetIntConstant(initial);
@@ -795,7 +795,7 @@
   graph_->AddBlock(entry);
   graph_->SetEntryBlock(entry);
   HInstruction* parameter = new (&allocator_) HParameterValue(
-      graph_->GetDexFile(), 0, 0, Primitive::kPrimNot);
+      graph_->GetDexFile(), dex::TypeIndex(0), 0, Primitive::kPrimNot);
   entry->AddInstruction(parameter);
 
   HInstruction* constant_0 = graph_->GetIntConstant(0);
diff --git a/compiler/optimizing/bytecode_utils.h b/compiler/optimizing/bytecode_utils.h
index 6dfffce..133afa4 100644
--- a/compiler/optimizing/bytecode_utils.h
+++ b/compiler/optimizing/bytecode_utils.h
@@ -26,7 +26,8 @@
 
 class CodeItemIterator : public ValueObject {
  public:
-  CodeItemIterator(const DexFile::CodeItem& code_item, uint32_t start_dex_pc = 0u)
+  explicit CodeItemIterator(const DexFile::CodeItem& code_item) : CodeItemIterator(code_item, 0u) {}
+  CodeItemIterator(const DexFile::CodeItem& code_item, uint32_t start_dex_pc)
       : code_ptr_(code_item.insns_ + start_dex_pc),
         code_end_(code_item.insns_ + code_item.insns_size_in_code_units_),
         dex_pc_(start_dex_pc) {}
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 8b450e1..fa6a522 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -304,6 +304,7 @@
     SetFrameSize(RoundUp(
         first_register_slot_in_slow_path_
         + maximum_safepoint_spill_size
+        + (GetGraph()->HasShouldDeoptimizeFlag() ? kShouldDeoptimizeFlagSize : 0)
         + FrameEntrySpillSize(),
         kStackAlignment));
   }
@@ -1375,4 +1376,33 @@
   return klass->GetDisableIntrinsicFlagOffset().Uint32Value();
 }
 
+void CodeGenerator::EmitJitRoots(uint8_t* code,
+                                 Handle<mirror::ObjectArray<mirror::Object>> roots,
+                                 const uint8_t* roots_data,
+                                 Handle<mirror::DexCache> outer_dex_cache) {
+  DCHECK_EQ(static_cast<size_t>(roots->GetLength()), GetNumberOfJitRoots());
+  StackHandleScope<1> hs(Thread::Current());
+  MutableHandle<mirror::DexCache> h_dex_cache(hs.NewHandle<mirror::DexCache>(nullptr));
+  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+  size_t index = 0;
+  for (auto& entry : jit_string_roots_) {
+    const DexFile& entry_dex_file = *entry.first.dex_file;
+    // Avoid the expensive FindDexCache call by checking if the string is
+    // in the compiled method's dex file.
+    h_dex_cache.Assign(IsSameDexFile(*outer_dex_cache->GetDexFile(), entry_dex_file)
+        ? outer_dex_cache.Get()
+        : class_linker->FindDexCache(hs.Self(), entry_dex_file));
+    mirror::String* string = class_linker->LookupString(
+        entry_dex_file, entry.first.string_index, h_dex_cache);
+    DCHECK(string != nullptr) << "JIT roots require strings to have been loaded";
+    // Ensure the string is strongly interned. This is a requirement on how the JIT
+    // handles strings. b/32995596
+    class_linker->GetInternTable()->InternStrong(string);
+    roots->Set(index, string);
+    entry.second = index;
+    ++index;
+  }
+  EmitJitRootPatches(code, roots_data);
+}
+
 }  // namespace art
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index a81f24e..4b11e7c 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -30,7 +30,9 @@
 #include "memory_region.h"
 #include "nodes.h"
 #include "optimizing_compiler_stats.h"
+#include "read_barrier_option.h"
 #include "stack_map_stream.h"
+#include "string_reference.h"
 #include "utils/label.h"
 
 namespace art {
@@ -50,6 +52,9 @@
 // Maximum value for a primitive long.
 static int64_t constexpr kPrimLongMax = INT64_C(0x7fffffffffffffff);
 
+static constexpr ReadBarrierOption kCompilerReadBarrierOption =
+    kEmitCompilerReadBarrier ? kWithReadBarrier : kWithoutReadBarrier;
+
 class Assembler;
 class CodeGenerator;
 class CompilerDriver;
@@ -302,6 +307,12 @@
     return POPCOUNT(GetSlowPathSpills(locations, core_registers));
   }
 
+  size_t GetStackOffsetOfShouldDeoptimizeFlag() const {
+    DCHECK(GetGraph()->HasShouldDeoptimizeFlag());
+    DCHECK_GE(GetFrameSize(), FrameEntrySpillSize() + kShouldDeoptimizeFlagSize);
+    return GetFrameSize() - FrameEntrySpillSize() - kShouldDeoptimizeFlagSize;
+  }
+
   // Record native to dex mapping for a suspend point.  Required by runtime.
   void RecordPcInfo(HInstruction* instruction, uint32_t dex_pc, SlowPathCode* slow_path = nullptr);
   // Check whether we have already recorded mapping at this PC.
@@ -331,6 +342,17 @@
 
   void BuildStackMaps(MemoryRegion region, const DexFile::CodeItem& code_item);
   size_t ComputeStackMapsSize();
+  size_t GetNumberOfJitRoots() const {
+    return jit_string_roots_.size();
+  }
+
+  // Fills the `literals` array with literals collected during code generation.
+  // Also emits literal patches.
+  void EmitJitRoots(uint8_t* code,
+                    Handle<mirror::ObjectArray<mirror::Object>> roots,
+                    const uint8_t* roots_data,
+                    Handle<mirror::DexCache> outer_dex_cache)
+      REQUIRES_SHARED(Locks::mutator_lock_);
 
   bool IsLeafMethod() const {
     return is_leaf_;
@@ -511,6 +533,26 @@
   virtual HLoadClass::LoadKind GetSupportedLoadClassKind(
       HLoadClass::LoadKind desired_class_load_kind) = 0;
 
+  static LocationSummary::CallKind GetLoadStringCallKind(HLoadString* load) {
+    switch (load->GetLoadKind()) {
+      case HLoadString::LoadKind::kBssEntry:
+        DCHECK(load->NeedsEnvironment());
+        return LocationSummary::kCallOnSlowPath;
+      case HLoadString::LoadKind::kDexCacheViaMethod:
+        DCHECK(load->NeedsEnvironment());
+        return LocationSummary::kCallOnMainOnly;
+      case HLoadString::LoadKind::kJitTableAddress:
+        DCHECK(!load->NeedsEnvironment());
+        return kEmitCompilerReadBarrier
+            ? LocationSummary::kCallOnSlowPath
+            : LocationSummary::kNoCall;
+        break;
+      default:
+        DCHECK(!load->NeedsEnvironment());
+        return LocationSummary::kNoCall;
+    }
+  }
+
   // Check if the desired_dispatch_info is supported. If it is, return it,
   // otherwise return a fall-back info that should be used instead.
   virtual HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch(
@@ -567,6 +609,8 @@
         fpu_callee_save_mask_(fpu_callee_save_mask),
         stack_map_stream_(graph->GetArena()),
         block_order_(nullptr),
+        jit_string_roots_(StringReferenceValueComparator(),
+                          graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
         disasm_info_(nullptr),
         stats_(stats),
         graph_(graph),
@@ -633,6 +677,12 @@
     return current_slow_path_;
   }
 
+  // Emit the patches assocatied with JIT roots. Only applies to JIT compiled code.
+  virtual void EmitJitRootPatches(uint8_t* code ATTRIBUTE_UNUSED,
+                                  const uint8_t* roots_data ATTRIBUTE_UNUSED) {
+    DCHECK_EQ(jit_string_roots_.size(), 0u);
+  }
+
   // Frame size required for this method.
   uint32_t frame_size_;
   uint32_t core_spill_mask_;
@@ -658,6 +708,11 @@
   // The order to use for code generation.
   const ArenaVector<HBasicBlock*>* block_order_;
 
+  // Maps a StringReference (dex_file, string_index) to the index in the literal table.
+  // Entries are intially added with a 0 index, and `EmitJitRoots` will compute all the
+  // indices.
+  ArenaSafeMap<StringReference, size_t, StringReferenceValueComparator> jit_string_roots_;
+
   DisassemblyInformation* disasm_info_;
 
  private:
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index be65f89..ed6eef1 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -383,7 +383,7 @@
     SaveLiveRegisters(codegen, locations);
 
     InvokeRuntimeCallingConvention calling_convention;
-    __ LoadImmediate(calling_convention.GetRegisterAt(0), cls_->GetTypeIndex());
+    __ LoadImmediate(calling_convention.GetRegisterAt(0), cls_->GetTypeIndex().index_);
     QuickEntrypointEnum entrypoint = do_clinit_ ? kQuickInitializeStaticStorage
                                                 : kQuickInitializeType;
     arm_codegen->InvokeRuntime(entrypoint, at_, dex_pc_, this);
@@ -430,7 +430,7 @@
     LocationSummary* locations = instruction_->GetLocations();
     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
     HLoadString* load = instruction_->AsLoadString();
-    const uint32_t string_index = load->GetStringIndex();
+    const uint32_t string_index = load->GetStringIndex().index_;
     Register out = locations->Out().AsRegister<Register>();
     Register temp = locations->GetTemp(0).AsRegister<Register>();
     constexpr bool call_saves_everything_except_r0 = (!kUseReadBarrier || kUseBakerReadBarrier);
@@ -489,8 +489,6 @@
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     LocationSummary* locations = instruction_->GetLocations();
-    Location object_class = instruction_->IsCheckCast() ? locations->GetTemp(0)
-                                                        : locations->Out();
     DCHECK(instruction_->IsCheckCast()
            || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
 
@@ -504,26 +502,26 @@
     // We're moving two locations to locations that could overlap, so we need a parallel
     // move resolver.
     InvokeRuntimeCallingConvention calling_convention;
-    codegen->EmitParallelMoves(
-        locations->InAt(1),
-        Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
-        Primitive::kPrimNot,
-        object_class,
-        Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
-        Primitive::kPrimNot);
-
+    codegen->EmitParallelMoves(locations->InAt(0),
+                               Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
+                               Primitive::kPrimNot,
+                               locations->InAt(1),
+                               Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
+                               Primitive::kPrimNot);
     if (instruction_->IsInstanceOf()) {
       arm_codegen->InvokeRuntime(kQuickInstanceofNonTrivial,
                                  instruction_,
                                  instruction_->GetDexPc(),
                                  this);
-      CheckEntrypointTypes<
-          kQuickInstanceofNonTrivial, size_t, const mirror::Class*, const mirror::Class*>();
+      CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t, mirror::Object*, mirror::Class*>();
       arm_codegen->Move32(locations->Out(), Location::RegisterLocation(R0));
     } else {
       DCHECK(instruction_->IsCheckCast());
-      arm_codegen->InvokeRuntime(kQuickCheckCast, instruction_, instruction_->GetDexPc(), this);
-      CheckEntrypointTypes<kQuickCheckCast, void, const mirror::Class*, const mirror::Class*>();
+      arm_codegen->InvokeRuntime(kQuickCheckInstanceOf,
+                                 instruction_,
+                                 instruction_->GetDexPc(),
+                                 this);
+      CheckEntrypointTypes<kQuickCheckInstanceOf, void, mirror::Object*, mirror::Class*>();
     }
 
     if (!is_fatal_) {
@@ -614,8 +612,10 @@
 // reference (different from `ref`) in `obj.field`).
 class ReadBarrierMarkSlowPathARM : public SlowPathCodeARM {
  public:
-  ReadBarrierMarkSlowPathARM(HInstruction* instruction, Location ref)
-      : SlowPathCodeARM(instruction), ref_(ref) {
+  ReadBarrierMarkSlowPathARM(HInstruction* instruction,
+                             Location ref,
+                             Location entrypoint = Location::NoLocation())
+      : SlowPathCodeARM(instruction), ref_(ref), entrypoint_(entrypoint) {
     DCHECK(kEmitCompilerReadBarrier);
   }
 
@@ -638,6 +638,11 @@
            (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified()))
         << "Unexpected instruction in read barrier marking slow path: "
         << instruction_->DebugName();
+    // The read barrier instrumentation of object ArrayGet
+    // instructions does not support the HIntermediateAddress
+    // instruction.
+    DCHECK(!(instruction_->IsArrayGet() &&
+             instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress()));
 
     __ Bind(GetEntryLabel());
     // No need to save live registers; it's taken care of by the
@@ -665,10 +670,15 @@
     //
     //   rX <- ReadBarrierMarkRegX(rX)
     //
-    int32_t entry_point_offset =
-        CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref_reg);
-    // This runtime call does not require a stack map.
-    arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
+    if (entrypoint_.IsValid()) {
+      arm_codegen->ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction_, this);
+      __ blx(entrypoint_.AsRegister<Register>());
+    } else {
+      int32_t entry_point_offset =
+          CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref_reg);
+      // This runtime call does not require a stack map.
+      arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
+    }
     __ b(GetExitLabel());
   }
 
@@ -676,6 +686,9 @@
   // The location (register) of the marked object reference.
   const Location ref_;
 
+  // The location of the entrypoint if already loaded.
+  const Location entrypoint_;
+
   DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathARM);
 };
 
@@ -894,6 +907,11 @@
            (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified())
         << "Unexpected instruction in read barrier for heap reference slow path: "
         << instruction_->DebugName();
+    // The read barrier instrumentation of object ArrayGet
+    // instructions does not support the HIntermediateAddress
+    // instruction.
+    DCHECK(!(instruction_->IsArrayGet() &&
+             instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress()));
 
     __ Bind(GetEntryLabel());
     SaveLiveRegisters(codegen, locations);
@@ -1196,7 +1214,9 @@
                                graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       boot_image_address_patches_(std::less<uint32_t>(),
-                                  graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {
+                                  graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      jit_string_patches_(StringReferenceValueComparator(),
+                          graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {
   // Always save the LR register to mimic Quick.
   AddAllocatedRegister(Location::RegisterLocation(LR));
 }
@@ -1309,6 +1329,13 @@
     __ cfi().AdjustCFAOffset(kArmWordSize * POPCOUNT(fpu_spill_mask_));
     __ cfi().RelOffsetForMany(DWARFReg(S0), 0, fpu_spill_mask_, kArmWordSize);
   }
+
+  if (GetGraph()->HasShouldDeoptimizeFlag()) {
+    // Initialize should_deoptimize flag to 0.
+    __ mov(IP, ShifterOperand(0));
+    __ StoreToOffset(kStoreWord, IP, SP, -kShouldDeoptimizeFlagSize);
+  }
+
   int adjust = GetFrameSize() - FrameEntrySpillSize();
   __ AddConstant(SP, -adjust);
   __ cfi().AdjustCFAOffset(adjust);
@@ -1924,6 +1951,19 @@
                         /* false_target */ nullptr);
 }
 
+void LocationsBuilderARM::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
+  LocationSummary* locations = new (GetGraph()->GetArena())
+      LocationSummary(flag, LocationSummary::kNoCall);
+  locations->SetOut(Location::RequiresRegister());
+}
+
+void InstructionCodeGeneratorARM::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
+  __ LoadFromOffset(kLoadWord,
+                    flag->GetLocations()->Out().AsRegister<Register>(),
+                    SP,
+                    codegen_->GetStackOffsetOfShouldDeoptimizeFlag());
+}
+
 void LocationsBuilderARM::VisitSelect(HSelect* select) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(select);
   if (Primitive::IsFloatingPointType(select->GetType())) {
@@ -3933,7 +3973,7 @@
 
 void InstructionCodeGeneratorARM::VisitNewArray(HNewArray* instruction) {
   InvokeRuntimeCallingConvention calling_convention;
-  __ LoadImmediate(calling_convention.GetRegisterAt(0), instruction->GetTypeIndex());
+  __ LoadImmediate(calling_convention.GetRegisterAt(0), instruction->GetTypeIndex().index_);
   // Note: if heap poisoning is enabled, the entry point takes cares
   // of poisoning the reference.
   codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc());
@@ -4841,8 +4881,6 @@
                                         instruction->IsStringCharAt();
   HInstruction* array_instr = instruction->GetArray();
   bool has_intermediate_address = array_instr->IsIntermediateAddress();
-  // The read barrier instrumentation does not support the HIntermediateAddress instruction yet.
-  DCHECK(!(has_intermediate_address && kEmitCompilerReadBarrier));
 
   switch (type) {
     case Primitive::kPrimBoolean:
@@ -4850,16 +4888,21 @@
     case Primitive::kPrimShort:
     case Primitive::kPrimChar:
     case Primitive::kPrimInt: {
+      Register length;
+      if (maybe_compressed_char_at) {
+        length = locations->GetTemp(0).AsRegister<Register>();
+        uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
+        __ LoadFromOffset(kLoadWord, length, obj, count_offset);
+        codegen_->MaybeRecordImplicitNullCheck(instruction);
+      }
       if (index.IsConstant()) {
         int32_t const_index = index.GetConstant()->AsIntConstant()->GetValue();
         if (maybe_compressed_char_at) {
-          Register length = IP;
           Label uncompressed_load, done;
-          uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
-          __ LoadFromOffset(kLoadWord, length, obj, count_offset);
-          codegen_->MaybeRecordImplicitNullCheck(instruction);
-          __ cmp(length, ShifterOperand(0));
-          __ b(&uncompressed_load, GE);
+          __ Lsrs(length, length, 1u);  // LSRS has a 16-bit encoding, TST (immediate) does not.
+          static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
+                        "Expecting 0=compressed, 1=uncompressed");
+          __ b(&uncompressed_load, CS);
           __ LoadFromOffset(kLoadUnsignedByte,
                             out_loc.AsRegister<Register>(),
                             obj,
@@ -4894,12 +4937,10 @@
         }
         if (maybe_compressed_char_at) {
           Label uncompressed_load, done;
-          uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
-          Register length = locations->GetTemp(0).AsRegister<Register>();
-          __ LoadFromOffset(kLoadWord, length, obj, count_offset);
-          codegen_->MaybeRecordImplicitNullCheck(instruction);
-          __ cmp(length, ShifterOperand(0));
-          __ b(&uncompressed_load, GE);
+          __ Lsrs(length, length, 1u);  // LSRS has a 16-bit encoding, TST (immediate) does not.
+          static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
+                        "Expecting 0=compressed, 1=uncompressed");
+          __ b(&uncompressed_load, CS);
           __ ldrb(out_loc.AsRegister<Register>(),
                   Address(temp, index.AsRegister<Register>(), Shift::LSL, 0));
           __ b(&done);
@@ -4915,6 +4956,11 @@
     }
 
     case Primitive::kPrimNot: {
+      // The read barrier instrumentation of object ArrayGet
+      // instructions does not support the HIntermediateAddress
+      // instruction.
+      DCHECK(!(has_intermediate_address && kEmitCompilerReadBarrier));
+
       static_assert(
           sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
           "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
@@ -5055,8 +5101,6 @@
   Location value_loc = locations->InAt(2);
   HInstruction* array_instr = instruction->GetArray();
   bool has_intermediate_address = array_instr->IsIntermediateAddress();
-  // The read barrier instrumentation does not support the HIntermediateAddress instruction yet.
-  DCHECK(!(has_intermediate_address && kEmitCompilerReadBarrier));
 
   switch (value_type) {
     case Primitive::kPrimBoolean:
@@ -5301,13 +5345,11 @@
   codegen_->MaybeRecordImplicitNullCheck(instruction);
   // Mask out compression flag from String's array length.
   if (mirror::kUseStringCompression && instruction->IsStringLength()) {
-    __ bic(out, out, ShifterOperand(1u << 31));
+    __ Lsr(out, out, 1u);
   }
 }
 
 void LocationsBuilderARM::VisitIntermediateAddress(HIntermediateAddress* instruction) {
-  // The read barrier instrumentation does not support the HIntermediateAddress instruction yet.
-  DCHECK(!kEmitCompilerReadBarrier);
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
 
@@ -5322,9 +5364,6 @@
   Location first = locations->InAt(0);
   Location second = locations->InAt(1);
 
-  // The read barrier instrumentation does not support the HIntermediateAddress instruction yet.
-  DCHECK(!kEmitCompilerReadBarrier);
-
   if (second.IsRegister()) {
     __ add(out.AsRegister<Register>(),
            first.AsRegister<Register>(),
@@ -5724,7 +5763,7 @@
 void InstructionCodeGeneratorARM::VisitLoadClass(HLoadClass* cls) {
   LocationSummary* locations = cls->GetLocations();
   if (cls->NeedsAccessCheck()) {
-    codegen_->MoveConstant(locations->GetTemp(0), cls->GetTypeIndex());
+    codegen_->MoveConstant(locations->GetTemp(0), cls->GetTypeIndex().index_);
     codegen_->InvokeRuntime(kQuickInitializeTypeAndVerifyAccess, cls, cls->GetDexPc());
     CheckEntrypointTypes<kQuickInitializeTypeAndVerifyAccess, void*, uint32_t>();
     return;
@@ -5733,7 +5772,9 @@
   Location out_loc = locations->Out();
   Register out = out_loc.AsRegister<Register>();
 
-  const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage();
+  const ReadBarrierOption read_barrier_option = cls->IsInBootImage()
+      ? kWithoutReadBarrier
+      : kCompilerReadBarrierOption;
   bool generate_null_check = false;
   switch (cls->GetLoadKind()) {
     case HLoadClass::LoadKind::kReferrersClass: {
@@ -5745,17 +5786,17 @@
                               out_loc,
                               current_method,
                               ArtMethod::DeclaringClassOffset().Int32Value(),
-                              requires_read_barrier);
+                              read_barrier_option);
       break;
     }
     case HLoadClass::LoadKind::kBootImageLinkTimeAddress: {
-      DCHECK(!requires_read_barrier);
+      DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
       __ LoadLiteral(out, codegen_->DeduplicateBootImageTypeLiteral(cls->GetDexFile(),
                                                                     cls->GetTypeIndex()));
       break;
     }
     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: {
-      DCHECK(!requires_read_barrier);
+      DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
       CodeGeneratorARM::PcRelativePatchInfo* labels =
           codegen_->NewPcRelativeTypePatch(cls->GetDexFile(), cls->GetTypeIndex());
       __ BindTrackedLabel(&labels->movw_label);
@@ -5767,7 +5808,7 @@
       break;
     }
     case HLoadClass::LoadKind::kBootImageAddress: {
-      DCHECK(!requires_read_barrier);
+      DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
       DCHECK_NE(cls->GetAddress(), 0u);
       uint32_t address = dchecked_integral_cast<uint32_t>(cls->GetAddress());
       __ LoadLiteral(out, codegen_->DeduplicateBootImageAddressLiteral(address));
@@ -5787,7 +5828,7 @@
       uint32_t offset = address & MaxInt<uint32_t>(offset_bits);
       __ LoadLiteral(out, codegen_->DeduplicateDexCacheAddressLiteral(base_address));
       // /* GcRoot<mirror::Class> */ out = *(base_address + offset)
-      GenerateGcRootFieldLoad(cls, out_loc, out, offset, requires_read_barrier);
+      GenerateGcRootFieldLoad(cls, out_loc, out, offset, read_barrier_option);
       generate_null_check = !cls->IsInDexCache();
       break;
     }
@@ -5796,7 +5837,7 @@
       HArmDexCacheArraysBase* base = cls->InputAt(0)->AsArmDexCacheArraysBase();
       int32_t offset = cls->GetDexCacheElementOffset() - base->GetElementOffset();
       // /* GcRoot<mirror::Class> */ out = *(dex_cache_arrays_base + offset)
-      GenerateGcRootFieldLoad(cls, out_loc, base_reg, offset, requires_read_barrier);
+      GenerateGcRootFieldLoad(cls, out_loc, base_reg, offset, read_barrier_option);
       generate_null_check = !cls->IsInDexCache();
       break;
     }
@@ -5809,8 +5850,8 @@
                         current_method,
                         ArtMethod::DexCacheResolvedTypesOffset(kArmPointerSize).Int32Value());
       // /* GcRoot<mirror::Class> */ out = out[type_index]
-      size_t offset = CodeGenerator::GetCacheOffset(cls->GetTypeIndex());
-      GenerateGcRootFieldLoad(cls, out_loc, out, offset, requires_read_barrier);
+      size_t offset = CodeGenerator::GetCacheOffset(cls->GetTypeIndex().index_);
+      GenerateGcRootFieldLoad(cls, out_loc, out, offset, read_barrier_option);
       generate_null_check = !cls->IsInDexCache();
     }
   }
@@ -5871,12 +5912,12 @@
       break;
     case HLoadString::LoadKind::kBootImageAddress:
       break;
-    case HLoadString::LoadKind::kDexCacheAddress:
-      DCHECK(Runtime::Current()->UseJitCompilation());
-      break;
     case HLoadString::LoadKind::kBssEntry:
       DCHECK(!Runtime::Current()->UseJitCompilation());
       break;
+    case HLoadString::LoadKind::kJitTableAddress:
+      DCHECK(Runtime::Current()->UseJitCompilation());
+      break;
     case HLoadString::LoadKind::kDexCacheViaMethod:
       break;
   }
@@ -5884,13 +5925,8 @@
 }
 
 void LocationsBuilderARM::VisitLoadString(HLoadString* load) {
-  LocationSummary::CallKind call_kind = load->NeedsEnvironment()
-      ? ((load->GetLoadKind() == HLoadString::LoadKind::kDexCacheViaMethod)
-          ? LocationSummary::kCallOnMainOnly
-          : LocationSummary::kCallOnSlowPath)
-      : LocationSummary::kNoCall;
+  LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load);
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind);
-
   HLoadString::LoadKind load_kind = load->GetLoadKind();
   if (load_kind == HLoadString::LoadKind::kDexCacheViaMethod) {
     locations->SetOut(Location::RegisterLocation(R0));
@@ -5930,7 +5966,7 @@
     case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
       DCHECK(codegen_->GetCompilerOptions().IsBootImage());
       CodeGeneratorARM::PcRelativePatchInfo* labels =
-          codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex());
+          codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex().index_);
       __ BindTrackedLabel(&labels->movw_label);
       __ movw(out, /* placeholder */ 0u);
       __ BindTrackedLabel(&labels->movt_label);
@@ -5949,20 +5985,27 @@
       DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
       Register temp = locations->GetTemp(0).AsRegister<Register>();
       CodeGeneratorARM::PcRelativePatchInfo* labels =
-          codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex());
+          codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex().index_);
       __ BindTrackedLabel(&labels->movw_label);
       __ movw(temp, /* placeholder */ 0u);
       __ BindTrackedLabel(&labels->movt_label);
       __ movt(temp, /* placeholder */ 0u);
       __ BindTrackedLabel(&labels->add_pc_label);
       __ add(temp, temp, ShifterOperand(PC));
-      GenerateGcRootFieldLoad(load, out_loc, temp, /* offset */ 0, kEmitCompilerReadBarrier);
+      GenerateGcRootFieldLoad(load, out_loc, temp, /* offset */ 0, kCompilerReadBarrierOption);
       SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathARM(load);
       codegen_->AddSlowPath(slow_path);
       __ CompareAndBranchIfZero(out, slow_path->GetEntryLabel());
       __ Bind(slow_path->GetExitLabel());
       return;
     }
+    case HLoadString::LoadKind::kJitTableAddress: {
+      __ LoadLiteral(out, codegen_->DeduplicateJitStringLiteral(load->GetDexFile(),
+                                                                load->GetStringIndex()));
+      // /* GcRoot<mirror::String> */ out = *out
+      GenerateGcRootFieldLoad(load, out_loc, out, /* offset */ 0, kCompilerReadBarrierOption);
+      return;
+    }
     default:
       break;
   }
@@ -5971,7 +6014,7 @@
   DCHECK(load_kind == HLoadString::LoadKind::kDexCacheViaMethod);
   InvokeRuntimeCallingConvention calling_convention;
   DCHECK_EQ(calling_convention.GetRegisterAt(0), out);
-  __ LoadImmediate(calling_convention.GetRegisterAt(0), load->GetStringIndex());
+  __ LoadImmediate(calling_convention.GetRegisterAt(0), load->GetStringIndex().index_);
   codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc());
   CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
 }
@@ -6012,12 +6055,26 @@
   CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
 }
 
-static bool TypeCheckNeedsATemporary(TypeCheckKind type_check_kind) {
-  return kEmitCompilerReadBarrier &&
-      (kUseBakerReadBarrier ||
-       type_check_kind == TypeCheckKind::kAbstractClassCheck ||
-       type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
-       type_check_kind == TypeCheckKind::kArrayObjectCheck);
+// Temp is used for read barrier.
+static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) {
+  if (kEmitCompilerReadBarrier &&
+       (kUseBakerReadBarrier ||
+          type_check_kind == TypeCheckKind::kAbstractClassCheck ||
+          type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
+          type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
+    return 1;
+  }
+  return 0;
+}
+
+// Interface case has 3 temps, one for holding the number of interfaces, one for the current
+// interface pointer, one for loading the current interface.
+// The other checks have one temp for loading the object's class.
+static size_t NumberOfCheckCastTemps(TypeCheckKind type_check_kind) {
+  if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
+    return 3;
+  }
+  return 1 + NumberOfInstanceOfTemps(type_check_kind);
 }
 
 void LocationsBuilderARM::VisitInstanceOf(HInstanceOf* instruction) {
@@ -6049,11 +6106,7 @@
   // The "out" register is used as a temporary, so it overlaps with the inputs.
   // Note that TypeCheckSlowPathARM uses this register too.
   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
-  // When read barriers are enabled, we need a temporary register for
-  // some cases.
-  if (TypeCheckNeedsATemporary(type_check_kind)) {
-    locations->AddTemp(Location::RequiresRegister());
-  }
+  locations->AddRegisterTemps(NumberOfInstanceOfTemps(type_check_kind));
 }
 
 void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) {
@@ -6064,9 +6117,9 @@
   Register cls = locations->InAt(1).AsRegister<Register>();
   Location out_loc = locations->Out();
   Register out = out_loc.AsRegister<Register>();
-  Location maybe_temp_loc = TypeCheckNeedsATemporary(type_check_kind) ?
-      locations->GetTemp(0) :
-      Location::NoLocation();
+  const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind);
+  DCHECK_LE(num_temps, 1u);
+  Location maybe_temp_loc = (num_temps >= 1) ? locations->GetTemp(0) : Location::NoLocation();
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
@@ -6080,11 +6133,15 @@
     __ CompareAndBranchIfZero(obj, &zero);
   }
 
-  // /* HeapReference<Class> */ out = obj->klass_
-  GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset, maybe_temp_loc);
-
   switch (type_check_kind) {
     case TypeCheckKind::kExactCheck: {
+      // /* HeapReference<Class> */ out = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        out_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp_loc,
+                                        kCompilerReadBarrierOption);
       __ cmp(out, ShifterOperand(cls));
       // Classes must be equal for the instanceof to succeed.
       __ b(&zero, NE);
@@ -6094,12 +6151,23 @@
     }
 
     case TypeCheckKind::kAbstractClassCheck: {
+      // /* HeapReference<Class> */ out = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        out_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp_loc,
+                                        kCompilerReadBarrierOption);
       // If the class is abstract, we eagerly fetch the super class of the
       // object to avoid doing a comparison we know will fail.
       Label loop;
       __ Bind(&loop);
       // /* HeapReference<Class> */ out = out->super_class_
-      GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, maybe_temp_loc);
+      GenerateReferenceLoadOneRegister(instruction,
+                                       out_loc,
+                                       super_offset,
+                                       maybe_temp_loc,
+                                       kCompilerReadBarrierOption);
       // If `out` is null, we use it for the result, and jump to `done`.
       __ CompareAndBranchIfZero(out, &done);
       __ cmp(out, ShifterOperand(cls));
@@ -6112,13 +6180,24 @@
     }
 
     case TypeCheckKind::kClassHierarchyCheck: {
+      // /* HeapReference<Class> */ out = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        out_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp_loc,
+                                        kCompilerReadBarrierOption);
       // Walk over the class hierarchy to find a match.
       Label loop, success;
       __ Bind(&loop);
       __ cmp(out, ShifterOperand(cls));
       __ b(&success, EQ);
       // /* HeapReference<Class> */ out = out->super_class_
-      GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, maybe_temp_loc);
+      GenerateReferenceLoadOneRegister(instruction,
+                                       out_loc,
+                                       super_offset,
+                                       maybe_temp_loc,
+                                       kCompilerReadBarrierOption);
       __ CompareAndBranchIfNonZero(out, &loop);
       // If `out` is null, we use it for the result, and jump to `done`.
       __ b(&done);
@@ -6131,13 +6210,24 @@
     }
 
     case TypeCheckKind::kArrayObjectCheck: {
+      // /* HeapReference<Class> */ out = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        out_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp_loc,
+                                        kCompilerReadBarrierOption);
       // Do an exact check.
       Label exact_check;
       __ cmp(out, ShifterOperand(cls));
       __ b(&exact_check, EQ);
       // Otherwise, we need to check that the object's class is a non-primitive array.
       // /* HeapReference<Class> */ out = out->component_type_
-      GenerateReferenceLoadOneRegister(instruction, out_loc, component_offset, maybe_temp_loc);
+      GenerateReferenceLoadOneRegister(instruction,
+                                       out_loc,
+                                       component_offset,
+                                       maybe_temp_loc,
+                                       kCompilerReadBarrierOption);
       // If `out` is null, we use it for the result, and jump to `done`.
       __ CompareAndBranchIfZero(out, &done);
       __ LoadFromOffset(kLoadUnsignedHalfword, out, out, primitive_offset);
@@ -6150,6 +6240,14 @@
     }
 
     case TypeCheckKind::kArrayCheck: {
+      // No read barrier since the slow path will retry upon failure.
+      // /* HeapReference<Class> */ out = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        out_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp_loc,
+                                        kWithoutReadBarrier);
       __ cmp(out, ShifterOperand(cls));
       DCHECK(locations->OnlyCallsOnSlowPath());
       slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM(instruction,
@@ -6233,13 +6331,7 @@
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetInAt(1, Location::RequiresRegister());
-  // Note that TypeCheckSlowPathARM uses this "temp" register too.
-  locations->AddTemp(Location::RequiresRegister());
-  // When read barriers are enabled, we need an additional temporary
-  // register for some cases.
-  if (TypeCheckNeedsATemporary(type_check_kind)) {
-    locations->AddTemp(Location::RequiresRegister());
-  }
+  locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind));
 }
 
 void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) {
@@ -6250,20 +6342,31 @@
   Register cls = locations->InAt(1).AsRegister<Register>();
   Location temp_loc = locations->GetTemp(0);
   Register temp = temp_loc.AsRegister<Register>();
-  Location maybe_temp2_loc = TypeCheckNeedsATemporary(type_check_kind) ?
-      locations->GetTemp(1) :
-      Location::NoLocation();
-  uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
-  uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
-  uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
-  uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
+  const size_t num_temps = NumberOfCheckCastTemps(type_check_kind);
+  DCHECK_LE(num_temps, 3u);
+  Location maybe_temp2_loc = (num_temps >= 2) ? locations->GetTemp(1) : Location::NoLocation();
+  Location maybe_temp3_loc = (num_temps >= 3) ? locations->GetTemp(2) : Location::NoLocation();
+  const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
+  const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
+  const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
+  const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
+  const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value();
+  const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value();
+  const uint32_t object_array_data_offset =
+      mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
 
-  bool is_type_check_slow_path_fatal =
-      (type_check_kind == TypeCheckKind::kExactCheck ||
-       type_check_kind == TypeCheckKind::kAbstractClassCheck ||
-       type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
-       type_check_kind == TypeCheckKind::kArrayObjectCheck) &&
-      !instruction->CanThrowIntoCatchBlock();
+  // Always false for read barriers since we may need to go to the entrypoint for non-fatal cases
+  // from false negatives. The false negatives may come from avoiding read barriers below. Avoiding
+  // read barriers is done for performance and code size reasons.
+  bool is_type_check_slow_path_fatal = false;
+  if (!kEmitCompilerReadBarrier) {
+    is_type_check_slow_path_fatal =
+        (type_check_kind == TypeCheckKind::kExactCheck ||
+         type_check_kind == TypeCheckKind::kAbstractClassCheck ||
+         type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
+         type_check_kind == TypeCheckKind::kArrayObjectCheck) &&
+        !instruction->CanThrowIntoCatchBlock();
+  }
   SlowPathCodeARM* type_check_slow_path =
       new (GetGraph()->GetArena()) TypeCheckSlowPathARM(instruction,
                                                         is_type_check_slow_path_fatal);
@@ -6275,12 +6378,17 @@
     __ CompareAndBranchIfZero(obj, &done);
   }
 
-  // /* HeapReference<Class> */ temp = obj->klass_
-  GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
-
   switch (type_check_kind) {
     case TypeCheckKind::kExactCheck:
     case TypeCheckKind::kArrayCheck: {
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        temp_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp2_loc,
+                                        kWithoutReadBarrier);
+
       __ cmp(temp, ShifterOperand(cls));
       // Jump to slow path for throwing the exception or doing a
       // more involved array check.
@@ -6289,34 +6397,44 @@
     }
 
     case TypeCheckKind::kAbstractClassCheck: {
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        temp_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp2_loc,
+                                        kWithoutReadBarrier);
+
       // If the class is abstract, we eagerly fetch the super class of the
       // object to avoid doing a comparison we know will fail.
-      Label loop, compare_classes;
+      Label loop;
       __ Bind(&loop);
       // /* HeapReference<Class> */ temp = temp->super_class_
-      GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, maybe_temp2_loc);
+      GenerateReferenceLoadOneRegister(instruction,
+                                       temp_loc,
+                                       super_offset,
+                                       maybe_temp2_loc,
+                                       kWithoutReadBarrier);
 
-      // If the class reference currently in `temp` is not null, jump
-      // to the `compare_classes` label to compare it with the checked
-      // class.
-      __ CompareAndBranchIfNonZero(temp, &compare_classes);
-      // Otherwise, jump to the slow path to throw the exception.
-      //
-      // But before, move back the object's class into `temp` before
-      // going into the slow path, as it has been overwritten in the
-      // meantime.
-      // /* HeapReference<Class> */ temp = obj->klass_
-      GenerateReferenceLoadTwoRegisters(
-          instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
-      __ b(type_check_slow_path->GetEntryLabel());
+      // If the class reference currently in `temp` is null, jump to the slow path to throw the
+      // exception.
+      __ CompareAndBranchIfZero(temp, type_check_slow_path->GetEntryLabel());
 
-      __ Bind(&compare_classes);
+      // Otherwise, compare the classes.
       __ cmp(temp, ShifterOperand(cls));
       __ b(&loop, NE);
       break;
     }
 
     case TypeCheckKind::kClassHierarchyCheck: {
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        temp_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp2_loc,
+                                        kWithoutReadBarrier);
+
       // Walk over the class hierarchy to find a match.
       Label loop;
       __ Bind(&loop);
@@ -6324,65 +6442,52 @@
       __ b(&done, EQ);
 
       // /* HeapReference<Class> */ temp = temp->super_class_
-      GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, maybe_temp2_loc);
+      GenerateReferenceLoadOneRegister(instruction,
+                                       temp_loc,
+                                       super_offset,
+                                       maybe_temp2_loc,
+                                       kWithoutReadBarrier);
 
-      // If the class reference currently in `temp` is not null, jump
-      // back at the beginning of the loop.
-      __ CompareAndBranchIfNonZero(temp, &loop);
-      // Otherwise, jump to the slow path to throw the exception.
-      //
-      // But before, move back the object's class into `temp` before
-      // going into the slow path, as it has been overwritten in the
-      // meantime.
-      // /* HeapReference<Class> */ temp = obj->klass_
-      GenerateReferenceLoadTwoRegisters(
-          instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
-      __ b(type_check_slow_path->GetEntryLabel());
+      // If the class reference currently in `temp` is null, jump to the slow path to throw the
+      // exception.
+      __ CompareAndBranchIfZero(temp, type_check_slow_path->GetEntryLabel());
+      // Otherwise, jump to the beginning of the loop.
+      __ b(&loop);
       break;
     }
 
     case TypeCheckKind::kArrayObjectCheck: {
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        temp_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp2_loc,
+                                        kWithoutReadBarrier);
+
       // Do an exact check.
-      Label check_non_primitive_component_type;
       __ cmp(temp, ShifterOperand(cls));
       __ b(&done, EQ);
 
       // Otherwise, we need to check that the object's class is a non-primitive array.
       // /* HeapReference<Class> */ temp = temp->component_type_
-      GenerateReferenceLoadOneRegister(instruction, temp_loc, component_offset, maybe_temp2_loc);
-
-      // If the component type is not null (i.e. the object is indeed
-      // an array), jump to label `check_non_primitive_component_type`
-      // to further check that this component type is not a primitive
-      // type.
-      __ CompareAndBranchIfNonZero(temp, &check_non_primitive_component_type);
-      // Otherwise, jump to the slow path to throw the exception.
-      //
-      // But before, move back the object's class into `temp` before
-      // going into the slow path, as it has been overwritten in the
-      // meantime.
-      // /* HeapReference<Class> */ temp = obj->klass_
-      GenerateReferenceLoadTwoRegisters(
-          instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
-      __ b(type_check_slow_path->GetEntryLabel());
-
-      __ Bind(&check_non_primitive_component_type);
+      GenerateReferenceLoadOneRegister(instruction,
+                                       temp_loc,
+                                       component_offset,
+                                       maybe_temp2_loc,
+                                       kWithoutReadBarrier);
+      // If the component type is null, jump to the slow path to throw the exception.
+      __ CompareAndBranchIfZero(temp, type_check_slow_path->GetEntryLabel());
+      // Otherwise,the object is indeed an array, jump to label `check_non_primitive_component_type`
+      // to further check that this component type is not a primitive type.
       __ LoadFromOffset(kLoadUnsignedHalfword, temp, temp, primitive_offset);
       static_assert(Primitive::kPrimNot == 0, "Expected 0 for art::Primitive::kPrimNot");
-      __ CompareAndBranchIfZero(temp, &done);
-      // Same comment as above regarding `temp` and the slow path.
-      // /* HeapReference<Class> */ temp = obj->klass_
-      GenerateReferenceLoadTwoRegisters(
-          instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
-      __ b(type_check_slow_path->GetEntryLabel());
+      __ CompareAndBranchIfNonZero(temp, type_check_slow_path->GetEntryLabel());
       break;
     }
 
     case TypeCheckKind::kUnresolvedCheck:
-    case TypeCheckKind::kInterfaceCheck:
-      // We always go into the type check slow path for the unresolved
-      // and interface check cases.
-      //
+      // We always go into the type check slow path for the unresolved check case.
       // We cannot directly call the CheckCast runtime entry point
       // without resorting to a type checking slow path here (i.e. by
       // calling InvokeRuntime directly), as it would require to
@@ -6390,15 +6495,47 @@
       // instruction (following the runtime calling convention), which
       // might be cluttered by the potential first read barrier
       // emission at the beginning of this method.
-      //
-      // TODO: Introduce a new runtime entry point taking the object
-      // to test (instead of its class) as argument, and let it deal
-      // with the read barrier issues. This will let us refactor this
-      // case of the `switch` code as it was previously (with a direct
-      // call to the runtime not using a type checking slow path).
-      // This should also be beneficial for the other cases above.
+
       __ b(type_check_slow_path->GetEntryLabel());
       break;
+
+    case TypeCheckKind::kInterfaceCheck: {
+      // Avoid read barriers to improve performance of the fast path. We can not get false
+      // positives by doing this.
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        temp_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp2_loc,
+                                        kWithoutReadBarrier);
+
+      // /* HeapReference<Class> */ temp = temp->iftable_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        temp_loc,
+                                        temp_loc,
+                                        iftable_offset,
+                                        maybe_temp2_loc,
+                                        kWithoutReadBarrier);
+      // Iftable is never null.
+      __ ldr(maybe_temp2_loc.AsRegister<Register>(), Address(temp, array_length_offset));
+      // Loop through the iftable and check if any class matches.
+      Label start_loop;
+      __ Bind(&start_loop);
+      __ CompareAndBranchIfZero(maybe_temp2_loc.AsRegister<Register>(),
+                                type_check_slow_path->GetEntryLabel());
+      __ ldr(maybe_temp3_loc.AsRegister<Register>(), Address(temp, object_array_data_offset));
+      __ MaybeUnpoisonHeapReference(maybe_temp3_loc.AsRegister<Register>());
+      // Go to next interface.
+      __ add(temp, temp, ShifterOperand(2 * kHeapReferenceSize));
+      __ sub(maybe_temp2_loc.AsRegister<Register>(),
+             maybe_temp2_loc.AsRegister<Register>(),
+             ShifterOperand(2));
+      // Compare the classes and continue the loop if they do not match.
+      __ cmp(cls, ShifterOperand(maybe_temp3_loc.AsRegister<Register>()));
+      __ b(&start_loop, NE);
+      break;
+    }
   }
   __ Bind(&done);
 
@@ -6672,12 +6809,15 @@
   }
 }
 
-void InstructionCodeGeneratorARM::GenerateReferenceLoadOneRegister(HInstruction* instruction,
-                                                                   Location out,
-                                                                   uint32_t offset,
-                                                                   Location maybe_temp) {
+void InstructionCodeGeneratorARM::GenerateReferenceLoadOneRegister(
+    HInstruction* instruction,
+    Location out,
+    uint32_t offset,
+    Location maybe_temp,
+    ReadBarrierOption read_barrier_option) {
   Register out_reg = out.AsRegister<Register>();
-  if (kEmitCompilerReadBarrier) {
+  if (read_barrier_option == kWithReadBarrier) {
+    CHECK(kEmitCompilerReadBarrier);
     DCHECK(maybe_temp.IsRegister()) << maybe_temp;
     if (kUseBakerReadBarrier) {
       // Load with fast path based Baker's read barrier.
@@ -6702,14 +6842,17 @@
   }
 }
 
-void InstructionCodeGeneratorARM::GenerateReferenceLoadTwoRegisters(HInstruction* instruction,
-                                                                    Location out,
-                                                                    Location obj,
-                                                                    uint32_t offset,
-                                                                    Location maybe_temp) {
+void InstructionCodeGeneratorARM::GenerateReferenceLoadTwoRegisters(
+    HInstruction* instruction,
+    Location out,
+    Location obj,
+    uint32_t offset,
+    Location maybe_temp,
+    ReadBarrierOption read_barrier_option) {
   Register out_reg = out.AsRegister<Register>();
   Register obj_reg = obj.AsRegister<Register>();
-  if (kEmitCompilerReadBarrier) {
+  if (read_barrier_option == kWithReadBarrier) {
+    CHECK(kEmitCompilerReadBarrier);
     if (kUseBakerReadBarrier) {
       DCHECK(maybe_temp.IsRegister()) << maybe_temp;
       // Load with fast path based Baker's read barrier.
@@ -6734,17 +6877,18 @@
                                                           Location root,
                                                           Register obj,
                                                           uint32_t offset,
-                                                          bool requires_read_barrier) {
+                                                          ReadBarrierOption read_barrier_option) {
   Register root_reg = root.AsRegister<Register>();
-  if (requires_read_barrier) {
+  if (read_barrier_option == kWithReadBarrier) {
     DCHECK(kEmitCompilerReadBarrier);
     if (kUseBakerReadBarrier) {
       // Fast path implementation of art::ReadBarrier::BarrierForRoot when
       // Baker's read barrier are used:
       //
       //   root = obj.field;
-      //   if (Thread::Current()->GetIsGcMarking()) {
-      //     root = ReadBarrier::Mark(root)
+      //   temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
+      //   if (temp != null) {
+      //     root = temp(root)
       //   }
 
       // /* GcRoot<mirror::Object> */ root = *(obj + offset)
@@ -6758,14 +6902,23 @@
                     "have different sizes.");
 
       // Slow path marking the GC root `root`.
+      Location temp = Location::RegisterLocation(LR);
       SlowPathCodeARM* slow_path =
-          new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM(instruction, root);
+          new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM(
+              instruction,
+              root,
+              /*entrypoint*/ temp);
       codegen_->AddSlowPath(slow_path);
 
-      // IP = Thread::Current()->GetIsGcMarking()
-      __ LoadFromOffset(
-          kLoadWord, IP, TR, Thread::IsGcMarkingOffset<kArmPointerSize>().Int32Value());
-      __ CompareAndBranchIfNonZero(IP, slow_path->GetEntryLabel());
+      // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
+      const int32_t entry_point_offset =
+          CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(root.reg());
+      // Loading the entrypoint does not require a load acquire since it is only changed when
+      // threads are suspended or running a checkpoint.
+      __ LoadFromOffset(kLoadWord, temp.AsRegister<Register>(), TR, entry_point_offset);
+      // The entrypoint is null when the GC is not marking, this prevents one load compared to
+      // checking GetIsGcMarking.
+      __ CompareAndBranchIfNonZero(temp.AsRegister<Register>(), slow_path->GetEntryLabel());
       __ Bind(slow_path->GetExitLabel());
     } else {
       // GC root loaded through a slow path for read barriers other
@@ -6843,7 +6996,7 @@
   //   uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
   //   lfence;  // Load fence or artificial data dependency to prevent load-load reordering
   //   HeapReference<Object> ref = *src;  // Original reference load.
-  //   bool is_gray = (rb_state == ReadBarrier::gray_ptr_);
+  //   bool is_gray = (rb_state == ReadBarrier::GrayState());
   //   if (is_gray) {
   //     ref = ReadBarrier::Mark(ref);  // Performed by runtime entrypoint slow path.
   //   }
@@ -6919,14 +7072,13 @@
   }
   AddSlowPath(slow_path);
 
-  // if (rb_state == ReadBarrier::gray_ptr_)
+  // if (rb_state == ReadBarrier::GrayState())
   //   ref = ReadBarrier::Mark(ref);
   // Given the numeric representation, it's enough to check the low bit of the
   // rb_state. We do that by shifting the bit out of the lock word with LSRS
   // which can be a 16-bit instruction unlike the TST immediate.
-  static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0");
-  static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1");
-  static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2");
+  static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
+  static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
   __ Lsrs(temp_reg, temp_reg, LockWord::kReadBarrierStateShift + 1);
   __ b(slow_path->GetEntryLabel(), CS);  // Carry flag is the last bit shifted out by LSRS.
   __ Bind(slow_path->GetExitLabel());
@@ -7192,8 +7344,8 @@
 }
 
 CodeGeneratorARM::PcRelativePatchInfo* CodeGeneratorARM::NewPcRelativeTypePatch(
-    const DexFile& dex_file, uint32_t type_index) {
-  return NewPcRelativePatch(dex_file, type_index, &pc_relative_type_patches_);
+    const DexFile& dex_file, dex::TypeIndex type_index) {
+  return NewPcRelativePatch(dex_file, type_index.index_, &pc_relative_type_patches_);
 }
 
 CodeGeneratorARM::PcRelativePatchInfo* CodeGeneratorARM::NewPcRelativeDexCacheArrayPatch(
@@ -7208,14 +7360,14 @@
 }
 
 Literal* CodeGeneratorARM::DeduplicateBootImageStringLiteral(const DexFile& dex_file,
-                                                             uint32_t string_index) {
+                                                             dex::StringIndex string_index) {
   return boot_image_string_patches_.GetOrCreate(
       StringReference(&dex_file, string_index),
       [this]() { return __ NewLiteral<uint32_t>(/* placeholder */ 0u); });
 }
 
 Literal* CodeGeneratorARM::DeduplicateBootImageTypeLiteral(const DexFile& dex_file,
-                                                           uint32_t type_index) {
+                                                           dex::TypeIndex type_index) {
   return boot_image_type_patches_.GetOrCreate(
       TypeReference(&dex_file, type_index),
       [this]() { return __ NewLiteral<uint32_t>(/* placeholder */ 0u); });
@@ -7231,6 +7383,14 @@
   return DeduplicateUint32Literal(address, &uint32_literals_);
 }
 
+Literal* CodeGeneratorARM::DeduplicateJitStringLiteral(const DexFile& dex_file,
+                                                       dex::StringIndex string_index) {
+  jit_string_roots_.Overwrite(StringReference(&dex_file, string_index), /* placeholder */ 0u);
+  return jit_string_patches_.GetOrCreate(
+      StringReference(&dex_file, string_index),
+      [this]() { return __ NewLiteral<uint32_t>(/* placeholder */ 0u); });
+}
+
 template <LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
 inline void CodeGeneratorARM::EmitPcRelativeLinkerPatches(
     const ArenaDeque<PcRelativePatchInfo>& infos,
@@ -7296,7 +7456,7 @@
     uint32_t literal_offset = literal->GetLabel()->Position();
     linker_patches->push_back(LinkerPatch::StringPatch(literal_offset,
                                                        target_string.dex_file,
-                                                       target_string.string_index));
+                                                       target_string.string_index.index_));
   }
   if (!GetCompilerOptions().IsBootImage()) {
     EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_,
@@ -7312,7 +7472,7 @@
     uint32_t literal_offset = literal->GetLabel()->Position();
     linker_patches->push_back(LinkerPatch::TypePatch(literal_offset,
                                                      target_type.dex_file,
-                                                     target_type.type_index));
+                                                     target_type.type_index.index_));
   }
   EmitPcRelativeLinkerPatches<LinkerPatch::RelativeTypePatch>(pc_relative_type_patches_,
                                                               linker_patches);
@@ -7547,6 +7707,21 @@
   }
 }
 
+void CodeGeneratorARM::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
+  for (const auto& entry : jit_string_patches_) {
+    const auto& it = jit_string_roots_.find(entry.first);
+    DCHECK(it != jit_string_roots_.end());
+    size_t index_in_table = it->second;
+    Literal* literal = entry.second;
+    DCHECK(literal->GetLabel()->IsBound());
+    uint32_t literal_offset = literal->GetLabel()->Position();
+    uintptr_t address =
+        reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>);
+    uint8_t* data = code + literal_offset;
+    reinterpret_cast<uint32_t*>(data)[0] = dchecked_integral_cast<uint32_t>(address);
+  }
+}
+
 #undef __
 #undef QUICK_ENTRY_POINT
 
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index 3d46aab..8230512 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -19,6 +19,7 @@
 
 #include "base/enums.h"
 #include "code_generator.h"
+#include "dex_file_types.h"
 #include "driver/compiler_options.h"
 #include "nodes.h"
 #include "string_reference.h"
@@ -263,7 +264,8 @@
   void GenerateReferenceLoadOneRegister(HInstruction* instruction,
                                         Location out,
                                         uint32_t offset,
-                                        Location maybe_temp);
+                                        Location maybe_temp,
+                                        ReadBarrierOption read_barrier_option);
   // Generate a heap reference load using two different registers
   // `out` and `obj`:
   //
@@ -278,17 +280,18 @@
                                          Location out,
                                          Location obj,
                                          uint32_t offset,
-                                         Location maybe_temp);
+                                         Location maybe_temp,
+                                         ReadBarrierOption read_barrier_option);
   // Generate a GC root reference load:
   //
   //   root <- *(obj + offset)
   //
-  // while honoring read barriers if `requires_read_barrier` is true.
+  // while honoring read barriers based on read_barrier_option.
   void GenerateGcRootFieldLoad(HInstruction* instruction,
                                Location root,
                                Register obj,
                                uint32_t offset,
-                               bool requires_read_barrier);
+                               ReadBarrierOption read_barrier_option);
   void GenerateTestAndBranch(HInstruction* instruction,
                              size_t condition_input_index,
                              Label* true_target,
@@ -479,16 +482,20 @@
   };
 
   PcRelativePatchInfo* NewPcRelativeStringPatch(const DexFile& dex_file, uint32_t string_index);
-  PcRelativePatchInfo* NewPcRelativeTypePatch(const DexFile& dex_file, uint32_t type_index);
+  PcRelativePatchInfo* NewPcRelativeTypePatch(const DexFile& dex_file, dex::TypeIndex type_index);
   PcRelativePatchInfo* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file,
                                                        uint32_t element_offset);
-  Literal* DeduplicateBootImageStringLiteral(const DexFile& dex_file, uint32_t string_index);
-  Literal* DeduplicateBootImageTypeLiteral(const DexFile& dex_file, uint32_t type_index);
+  Literal* DeduplicateBootImageStringLiteral(const DexFile& dex_file,
+                                             dex::StringIndex string_index);
+  Literal* DeduplicateBootImageTypeLiteral(const DexFile& dex_file, dex::TypeIndex type_index);
   Literal* DeduplicateBootImageAddressLiteral(uint32_t address);
   Literal* DeduplicateDexCacheAddressLiteral(uint32_t address);
+  Literal* DeduplicateJitStringLiteral(const DexFile& dex_file, dex::StringIndex string_index);
 
   void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) OVERRIDE;
 
+  void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) OVERRIDE;
+
   // Fast path implementation of ReadBarrier::Barrier for a heap
   // reference field load when Baker's read barriers are used.
   void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
@@ -589,9 +596,9 @@
 
   using Uint32ToLiteralMap = ArenaSafeMap<uint32_t, Literal*>;
   using MethodToLiteralMap = ArenaSafeMap<MethodReference, Literal*, MethodReferenceComparator>;
-  using BootStringToLiteralMap = ArenaSafeMap<StringReference,
-                                              Literal*,
-                                              StringReferenceValueComparator>;
+  using StringToLiteralMap = ArenaSafeMap<StringReference,
+                                          Literal*,
+                                          StringReferenceValueComparator>;
   using BootTypeToLiteralMap = ArenaSafeMap<TypeReference,
                                             Literal*,
                                             TypeReferenceValueComparator>;
@@ -603,7 +610,6 @@
   PcRelativePatchInfo* NewPcRelativePatch(const DexFile& dex_file,
                                           uint32_t offset_or_index,
                                           ArenaDeque<PcRelativePatchInfo>* patches);
-
   template <LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
   static void EmitPcRelativeLinkerPatches(const ArenaDeque<PcRelativePatchInfo>& infos,
                                           ArenaVector<LinkerPatch>* linker_patches);
@@ -628,7 +634,7 @@
   // PC-relative patch info for each HArmDexCacheArraysBase.
   ArenaDeque<PcRelativePatchInfo> pc_relative_dex_cache_patches_;
   // Deduplication map for boot string literals for kBootImageLinkTimeAddress.
-  BootStringToLiteralMap boot_image_string_patches_;
+  StringToLiteralMap boot_image_string_patches_;
   // PC-relative String patch info; type depends on configuration (app .bss or boot image PIC).
   ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_;
   // Deduplication map for boot type literals for kBootImageLinkTimeAddress.
@@ -638,6 +644,9 @@
   // Deduplication map for patchable boot image addresses.
   Uint32ToLiteralMap boot_image_address_patches_;
 
+  // Patches for string literals in JIT compiled code.
+  StringToLiteralMap jit_string_patches_;
+
   DISALLOW_COPY_AND_ASSIGN(CodeGeneratorARM);
 };
 
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index b537509..6eebd69 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -288,7 +288,7 @@
     SaveLiveRegisters(codegen, locations);
 
     InvokeRuntimeCallingConvention calling_convention;
-    __ Mov(calling_convention.GetRegisterAt(0).W(), cls_->GetTypeIndex());
+    __ Mov(calling_convention.GetRegisterAt(0).W(), cls_->GetTypeIndex().index_);
     QuickEntrypointEnum entrypoint = do_clinit_ ? kQuickInitializeStaticStorage
                                                 : kQuickInitializeType;
     arm64_codegen->InvokeRuntime(entrypoint, at_, dex_pc_, this);
@@ -349,7 +349,7 @@
     SaveLiveRegisters(codegen, locations);
 
     InvokeRuntimeCallingConvention calling_convention;
-    const uint32_t string_index = instruction_->AsLoadString()->GetStringIndex();
+    const uint32_t string_index = instruction_->AsLoadString()->GetStringIndex().index_;
     __ Mov(calling_convention.GetRegisterAt(0).W(), string_index);
     arm64_codegen->InvokeRuntime(kQuickResolveString, instruction_, instruction_->GetDexPc(), this);
     CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
@@ -459,9 +459,7 @@
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     LocationSummary* locations = instruction_->GetLocations();
-    Location class_to_check = locations->InAt(1);
-    Location object_class = instruction_->IsCheckCast() ? locations->GetTemp(0)
-                                                        : locations->Out();
+
     DCHECK(instruction_->IsCheckCast()
            || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
@@ -476,21 +474,22 @@
     // We're moving two locations to locations that could overlap, so we need a parallel
     // move resolver.
     InvokeRuntimeCallingConvention calling_convention;
-    codegen->EmitParallelMoves(
-        class_to_check, LocationFrom(calling_convention.GetRegisterAt(0)), Primitive::kPrimNot,
-        object_class, LocationFrom(calling_convention.GetRegisterAt(1)), Primitive::kPrimNot);
-
+    codegen->EmitParallelMoves(locations->InAt(0),
+                               LocationFrom(calling_convention.GetRegisterAt(0)),
+                               Primitive::kPrimNot,
+                               locations->InAt(1),
+                               LocationFrom(calling_convention.GetRegisterAt(1)),
+                               Primitive::kPrimNot);
     if (instruction_->IsInstanceOf()) {
       arm64_codegen->InvokeRuntime(kQuickInstanceofNonTrivial, instruction_, dex_pc, this);
-      CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t,
-                           const mirror::Class*, const mirror::Class*>();
+      CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t, mirror::Object*, mirror::Class*>();
       Primitive::Type ret_type = instruction_->GetType();
       Location ret_loc = calling_convention.GetReturnLocation(ret_type);
       arm64_codegen->MoveLocation(locations->Out(), ret_loc, ret_type);
     } else {
       DCHECK(instruction_->IsCheckCast());
-      arm64_codegen->InvokeRuntime(kQuickCheckCast, instruction_, dex_pc, this);
-      CheckEntrypointTypes<kQuickCheckCast, void, const mirror::Class*, const mirror::Class*>();
+      arm64_codegen->InvokeRuntime(kQuickCheckInstanceOf, instruction_, dex_pc, this);
+      CheckEntrypointTypes<kQuickCheckInstanceOf, void, mirror::Object*, mirror::Class*>();
     }
 
     if (!is_fatal_) {
@@ -573,8 +572,10 @@
 
   // We are about to use the assembler to place literals directly. Make sure we have enough
   // underlying code buffer and we have generated the jump table with right size.
-  CodeBufferCheckScope scope(codegen->GetVIXLAssembler(), num_entries * sizeof(int32_t),
-                             CodeBufferCheckScope::kCheck, CodeBufferCheckScope::kExactSize);
+  vixl::CodeBufferCheckScope scope(codegen->GetVIXLAssembler(),
+                                   num_entries * sizeof(int32_t),
+                                   vixl::CodeBufferCheckScope::kReserveBufferSpace,
+                                   vixl::CodeBufferCheckScope::kExactSize);
 
   __ Bind(&table_start_);
   const ArenaVector<HBasicBlock*>& successors = switch_instr_->GetBlock()->GetSuccessors();
@@ -600,10 +601,16 @@
 // probably still be a from-space reference (unless it gets updated by
 // another thread, or if another thread installed another object
 // reference (different from `ref`) in `obj.field`).
+// If entrypoint is a valid location it is assumed to already be holding the entrypoint. The case
+// where the entrypoint is passed in is for the GcRoot read barrier.
 class ReadBarrierMarkSlowPathARM64 : public SlowPathCodeARM64 {
  public:
-  ReadBarrierMarkSlowPathARM64(HInstruction* instruction, Location ref)
-      : SlowPathCodeARM64(instruction), ref_(ref) {
+  ReadBarrierMarkSlowPathARM64(HInstruction* instruction,
+                               Location ref,
+                               Location entrypoint = Location::NoLocation())
+      : SlowPathCodeARM64(instruction),
+        ref_(ref),
+        entrypoint_(entrypoint) {
     DCHECK(kEmitCompilerReadBarrier);
   }
 
@@ -626,6 +633,11 @@
            (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified()))
         << "Unexpected instruction in read barrier marking slow path: "
         << instruction_->DebugName();
+    // The read barrier instrumentation of object ArrayGet
+    // instructions does not support the HIntermediateAddress
+    // instruction.
+    DCHECK(!(instruction_->IsArrayGet() &&
+             instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress()));
 
     __ Bind(GetEntryLabel());
     // No need to save live registers; it's taken care of by the
@@ -653,10 +665,16 @@
     //
     //   rX <- ReadBarrierMarkRegX(rX)
     //
-    int32_t entry_point_offset =
-        CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ref_.reg());
-    // This runtime call does not require a stack map.
-    arm64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
+    if (entrypoint_.IsValid()) {
+      arm64_codegen->ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction_, this);
+      __ Blr(XRegisterFrom(entrypoint_));
+    } else {
+      // Entrypoint is not already loaded, load from the thread.
+      int32_t entry_point_offset =
+          CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ref_.reg());
+      // This runtime call does not require a stack map.
+      arm64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
+    }
     __ B(GetExitLabel());
   }
 
@@ -664,6 +682,9 @@
   // The location (register) of the marked object reference.
   const Location ref_;
 
+  // The location of the entrypoint if it is already loaded.
+  const Location entrypoint_;
+
   DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathARM64);
 };
 
@@ -876,7 +897,9 @@
            (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified())
         << "Unexpected instruction in read barrier for heap reference slow path: "
         << instruction_->DebugName();
-    // The read barrier instrumentation does not support the HIntermediateAddress instruction yet.
+    // The read barrier instrumentation of object ArrayGet
+    // instructions does not support the HIntermediateAddress
+    // instruction.
     DCHECK(!(instruction_->IsArrayGet() &&
              instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress()));
 
@@ -1137,7 +1160,9 @@
                                graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       boot_image_address_patches_(std::less<uint32_t>(),
-                                  graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {
+                                  graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      jit_string_patches_(StringReferenceValueComparator(),
+                          graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {
   // Save the link register (containing the return address) to mimic Quick.
   AddAllocatedRegister(LocationFrom(lr));
 }
@@ -1248,6 +1273,12 @@
         frame_size - GetCoreSpillSize());
     GetAssembler()->SpillRegisters(GetFramePreservedFPRegisters(),
         frame_size - FrameEntrySpillSize());
+
+    if (GetGraph()->HasShouldDeoptimizeFlag()) {
+      // Initialize should_deoptimize flag to 0.
+      Register wzr = Register(VIXLRegCodeFromART(WZR), kWRegSize);
+      __ Str(wzr, MemOperand(sp, GetStackOffsetOfShouldDeoptimizeFlag()));
+    }
   }
 }
 
@@ -2192,8 +2223,6 @@
 }
 
 void LocationsBuilderARM64::VisitIntermediateAddress(HIntermediateAddress* instruction) {
-  // The read barrier instrumentation does not support the HIntermediateAddress instruction yet.
-  DCHECK(!kEmitCompilerReadBarrier);
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
   locations->SetInAt(0, Location::RequiresRegister());
@@ -2201,10 +2230,7 @@
   locations->SetOut(Location::RequiresRegister());
 }
 
-void InstructionCodeGeneratorARM64::VisitIntermediateAddress(
-    HIntermediateAddress* instruction) {
-  // The read barrier instrumentation does not support the HIntermediateAddress instruction yet.
-  DCHECK(!kEmitCompilerReadBarrier);
+void InstructionCodeGeneratorARM64::VisitIntermediateAddress(HIntermediateAddress* instruction) {
   __ Add(OutputRegister(instruction),
          InputRegisterAt(instruction, 0),
          Operand(InputOperandAt(instruction, 1)));
@@ -2242,10 +2268,10 @@
         masm->GetCursorAddress<vixl::aarch64::Instruction*>() - kInstructionSize;
     if (prev->IsLoadOrStore()) {
       // Make sure we emit only exactly one nop.
-      vixl::aarch64::CodeBufferCheckScope scope(masm,
-                                                kInstructionSize,
-                                                vixl::aarch64::CodeBufferCheckScope::kCheck,
-                                                vixl::aarch64::CodeBufferCheckScope::kExactSize);
+      vixl::CodeBufferCheckScope scope(masm,
+                                       kInstructionSize,
+                                       vixl::CodeBufferCheckScope::kReserveBufferSpace,
+                                       vixl::CodeBufferCheckScope::kExactSize);
       __ nop();
     }
   }
@@ -2304,11 +2330,15 @@
   // Block pools between `Load` and `MaybeRecordImplicitNullCheck`.
   BlockPoolsScope block_pools(masm);
 
+  // The read barrier instrumentation of object ArrayGet instructions
+  // does not support the HIntermediateAddress instruction.
+  DCHECK(!((type == Primitive::kPrimNot) &&
+           instruction->GetArray()->IsIntermediateAddress() &&
+           kEmitCompilerReadBarrier));
+
   if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
     // Object ArrayGet with Baker's read barrier case.
     Register temp = temps.AcquireW();
-    // The read barrier instrumentation does not support the HIntermediateAddress instruction yet.
-    DCHECK(!instruction->GetArray()->IsIntermediateAddress());
     // Note that a potential implicit null check is handled in the
     // CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier call.
     codegen_->GenerateArrayLoadWithBakerReadBarrier(
@@ -2320,13 +2350,22 @@
     if (maybe_compressed_char_at) {
       uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
       length = temps.AcquireW();
-      __ Ldr(length, HeapOperand(obj, count_offset));
+      if (instruction->GetArray()->IsIntermediateAddress()) {
+        DCHECK_LT(count_offset, offset);
+        int64_t adjusted_offset = static_cast<int64_t>(count_offset) - static_cast<int64_t>(offset);
+        // Note that `adjusted_offset` is negative, so this will be a LDUR.
+        __ Ldr(length, MemOperand(obj.X(), adjusted_offset));
+      } else {
+        __ Ldr(length, HeapOperand(obj, count_offset));
+      }
       codegen_->MaybeRecordImplicitNullCheck(instruction);
     }
     if (index.IsConstant()) {
       if (maybe_compressed_char_at) {
         vixl::aarch64::Label uncompressed_load, done;
-        __ Tbz(length.W(), kWRegSize - 1, &uncompressed_load);
+        static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
+                      "Expecting 0=compressed, 1=uncompressed");
+        __ Tbnz(length.W(), 0, &uncompressed_load);
         __ Ldrb(Register(OutputCPURegister(instruction)),
                 HeapOperand(obj, offset + Int64ConstantFrom(index)));
         __ B(&done);
@@ -2341,9 +2380,6 @@
     } else {
       Register temp = temps.AcquireSameSizeAs(obj);
       if (instruction->GetArray()->IsIntermediateAddress()) {
-        // The read barrier instrumentation does not support the
-        // HIntermediateAddress instruction yet.
-        DCHECK(!kEmitCompilerReadBarrier);
         // We do not need to compute the intermediate address from the array: the
         // input instruction has done it already. See the comment in
         // `TryExtractArrayAccessAddress()`.
@@ -2357,7 +2393,9 @@
       }
       if (maybe_compressed_char_at) {
         vixl::aarch64::Label uncompressed_load, done;
-        __ Tbz(length.W(), kWRegSize - 1, &uncompressed_load);
+        static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
+                      "Expecting 0=compressed, 1=uncompressed");
+        __ Tbnz(length.W(), 0, &uncompressed_load);
         __ Ldrb(Register(OutputCPURegister(instruction)),
                 HeapOperand(temp, XRegisterFrom(index), LSL, 0));
         __ B(&done);
@@ -2402,7 +2440,7 @@
   codegen_->MaybeRecordImplicitNullCheck(instruction);
   // Mask out compression flag from String's array length.
   if (mirror::kUseStringCompression && instruction->IsStringLength()) {
-    __ And(out.W(), out.W(), Operand(static_cast<int32_t>(INT32_MAX)));
+    __ Lsr(out.W(), out.W(), 1u);
   }
 }
 
@@ -2451,9 +2489,6 @@
       UseScratchRegisterScope temps(masm);
       Register temp = temps.AcquireSameSizeAs(array);
       if (instruction->GetArray()->IsIntermediateAddress()) {
-        // The read barrier instrumentation does not support the
-        // HIntermediateAddress instruction yet.
-        DCHECK(!kEmitCompilerReadBarrier);
         // We do not need to compute the intermediate address from the array: the
         // input instruction has done it already. See the comment in
         // `TryExtractArrayAccessAddress()`.
@@ -3206,6 +3241,17 @@
                         /* false_target */ nullptr);
 }
 
+void LocationsBuilderARM64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
+  LocationSummary* locations = new (GetGraph()->GetArena())
+      LocationSummary(flag, LocationSummary::kNoCall);
+  locations->SetOut(Location::RequiresRegister());
+}
+
+void InstructionCodeGeneratorARM64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
+  __ Ldr(OutputRegister(flag),
+         MemOperand(sp, codegen_->GetStackOffsetOfShouldDeoptimizeFlag()));
+}
+
 static inline bool IsConditionOnFloatingPointValues(HInstruction* condition) {
   return condition->IsCondition() &&
          Primitive::IsFloatingPointType(condition->InputAt(0)->GetType());
@@ -3314,12 +3360,26 @@
   HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
 }
 
-static bool TypeCheckNeedsATemporary(TypeCheckKind type_check_kind) {
-  return kEmitCompilerReadBarrier &&
+// Temp is used for read barrier.
+static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) {
+  if (kEmitCompilerReadBarrier &&
       (kUseBakerReadBarrier ||
-       type_check_kind == TypeCheckKind::kAbstractClassCheck ||
-       type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
-       type_check_kind == TypeCheckKind::kArrayObjectCheck);
+          type_check_kind == TypeCheckKind::kAbstractClassCheck ||
+          type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
+          type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
+    return 1;
+  }
+  return 0;
+}
+
+// Interface case has 3 temps, one for holding the number of interfaces, one for the current
+// interface pointer, one for loading the current interface.
+// The other checks have one temp for loading the object's class.
+static size_t NumberOfCheckCastTemps(TypeCheckKind type_check_kind) {
+  if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
+    return 3;
+  }
+  return 1 + NumberOfInstanceOfTemps(type_check_kind);
 }
 
 void LocationsBuilderARM64::VisitInstanceOf(HInstanceOf* instruction) {
@@ -3351,11 +3411,8 @@
   // The "out" register is used as a temporary, so it overlaps with the inputs.
   // Note that TypeCheckSlowPathARM64 uses this register too.
   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
-  // When read barriers are enabled, we need a temporary register for
-  // some cases.
-  if (TypeCheckNeedsATemporary(type_check_kind)) {
-    locations->AddTemp(Location::RequiresRegister());
-  }
+  // Add temps if necessary for read barriers.
+  locations->AddRegisterTemps(NumberOfInstanceOfTemps(type_check_kind));
 }
 
 void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) {
@@ -3366,9 +3423,9 @@
   Register cls = InputRegisterAt(instruction, 1);
   Location out_loc = locations->Out();
   Register out = OutputRegister(instruction);
-  Location maybe_temp_loc = TypeCheckNeedsATemporary(type_check_kind) ?
-      locations->GetTemp(0) :
-      Location::NoLocation();
+  const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind);
+  DCHECK_LE(num_temps, 1u);
+  Location maybe_temp_loc = (num_temps >= 1) ? locations->GetTemp(0) : Location::NoLocation();
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
@@ -3383,11 +3440,15 @@
     __ Cbz(obj, &zero);
   }
 
-  // /* HeapReference<Class> */ out = obj->klass_
-  GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset, maybe_temp_loc);
-
   switch (type_check_kind) {
     case TypeCheckKind::kExactCheck: {
+      // /* HeapReference<Class> */ out = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        out_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp_loc,
+                                        kCompilerReadBarrierOption);
       __ Cmp(out, cls);
       __ Cset(out, eq);
       if (zero.IsLinked()) {
@@ -3397,12 +3458,23 @@
     }
 
     case TypeCheckKind::kAbstractClassCheck: {
+      // /* HeapReference<Class> */ out = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        out_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp_loc,
+                                        kCompilerReadBarrierOption);
       // If the class is abstract, we eagerly fetch the super class of the
       // object to avoid doing a comparison we know will fail.
       vixl::aarch64::Label loop, success;
       __ Bind(&loop);
       // /* HeapReference<Class> */ out = out->super_class_
-      GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, maybe_temp_loc);
+      GenerateReferenceLoadOneRegister(instruction,
+                                       out_loc,
+                                       super_offset,
+                                       maybe_temp_loc,
+                                       kCompilerReadBarrierOption);
       // If `out` is null, we use it for the result, and jump to `done`.
       __ Cbz(out, &done);
       __ Cmp(out, cls);
@@ -3415,13 +3487,24 @@
     }
 
     case TypeCheckKind::kClassHierarchyCheck: {
+      // /* HeapReference<Class> */ out = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        out_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp_loc,
+                                        kCompilerReadBarrierOption);
       // Walk over the class hierarchy to find a match.
       vixl::aarch64::Label loop, success;
       __ Bind(&loop);
       __ Cmp(out, cls);
       __ B(eq, &success);
       // /* HeapReference<Class> */ out = out->super_class_
-      GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, maybe_temp_loc);
+      GenerateReferenceLoadOneRegister(instruction,
+                                       out_loc,
+                                       super_offset,
+                                       maybe_temp_loc,
+                                       kCompilerReadBarrierOption);
       __ Cbnz(out, &loop);
       // If `out` is null, we use it for the result, and jump to `done`.
       __ B(&done);
@@ -3434,13 +3517,24 @@
     }
 
     case TypeCheckKind::kArrayObjectCheck: {
+      // /* HeapReference<Class> */ out = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        out_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp_loc,
+                                        kCompilerReadBarrierOption);
       // Do an exact check.
       vixl::aarch64::Label exact_check;
       __ Cmp(out, cls);
       __ B(eq, &exact_check);
       // Otherwise, we need to check that the object's class is a non-primitive array.
       // /* HeapReference<Class> */ out = out->component_type_
-      GenerateReferenceLoadOneRegister(instruction, out_loc, component_offset, maybe_temp_loc);
+      GenerateReferenceLoadOneRegister(instruction,
+                                       out_loc,
+                                       component_offset,
+                                       maybe_temp_loc,
+                                       kCompilerReadBarrierOption);
       // If `out` is null, we use it for the result, and jump to `done`.
       __ Cbz(out, &done);
       __ Ldrh(out, HeapOperand(out, primitive_offset));
@@ -3453,6 +3547,14 @@
     }
 
     case TypeCheckKind::kArrayCheck: {
+      // No read barrier since the slow path will retry upon failure.
+      // /* HeapReference<Class> */ out = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        out_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp_loc,
+                                        kWithoutReadBarrier);
       __ Cmp(out, cls);
       DCHECK(locations->OnlyCallsOnSlowPath());
       slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM64(instruction,
@@ -3536,13 +3638,8 @@
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetInAt(1, Location::RequiresRegister());
-  // Note that TypeCheckSlowPathARM64 uses this "temp" register too.
-  locations->AddTemp(Location::RequiresRegister());
-  // When read barriers are enabled, we need an additional temporary
-  // register for some cases.
-  if (TypeCheckNeedsATemporary(type_check_kind)) {
-    locations->AddTemp(Location::RequiresRegister());
-  }
+  // Add temps for read barriers and other uses. One is used by TypeCheckSlowPathARM64.
+  locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind));
 }
 
 void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) {
@@ -3551,22 +3648,34 @@
   Location obj_loc = locations->InAt(0);
   Register obj = InputRegisterAt(instruction, 0);
   Register cls = InputRegisterAt(instruction, 1);
+  const size_t num_temps = NumberOfCheckCastTemps(type_check_kind);
+  DCHECK_GE(num_temps, 1u);
+  DCHECK_LE(num_temps, 3u);
   Location temp_loc = locations->GetTemp(0);
-  Location maybe_temp2_loc = TypeCheckNeedsATemporary(type_check_kind) ?
-      locations->GetTemp(1) :
-      Location::NoLocation();
+  Location maybe_temp2_loc = (num_temps >= 2) ? locations->GetTemp(1) : Location::NoLocation();
+  Location maybe_temp3_loc = (num_temps >= 3) ? locations->GetTemp(2) : Location::NoLocation();
   Register temp = WRegisterFrom(temp_loc);
-  uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
-  uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
-  uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
-  uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
+  const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
+  const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
+  const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
+  const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
+  const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value();
+  const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value();
+  const uint32_t object_array_data_offset =
+      mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
 
-  bool is_type_check_slow_path_fatal =
-      (type_check_kind == TypeCheckKind::kExactCheck ||
-       type_check_kind == TypeCheckKind::kAbstractClassCheck ||
-       type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
-       type_check_kind == TypeCheckKind::kArrayObjectCheck) &&
-      !instruction->CanThrowIntoCatchBlock();
+  bool is_type_check_slow_path_fatal = false;
+  // Always false for read barriers since we may need to go to the entrypoint for non-fatal cases
+  // from false negatives. The false negatives may come from avoiding read barriers below. Avoiding
+  // read barriers is done for performance and code size reasons.
+  if (!kEmitCompilerReadBarrier) {
+    is_type_check_slow_path_fatal =
+        (type_check_kind == TypeCheckKind::kExactCheck ||
+         type_check_kind == TypeCheckKind::kAbstractClassCheck ||
+         type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
+         type_check_kind == TypeCheckKind::kArrayObjectCheck) &&
+        !instruction->CanThrowIntoCatchBlock();
+  }
   SlowPathCodeARM64* type_check_slow_path =
       new (GetGraph()->GetArena()) TypeCheckSlowPathARM64(instruction,
                                                           is_type_check_slow_path_fatal);
@@ -3578,12 +3687,17 @@
     __ Cbz(obj, &done);
   }
 
-  // /* HeapReference<Class> */ temp = obj->klass_
-  GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
-
   switch (type_check_kind) {
     case TypeCheckKind::kExactCheck:
     case TypeCheckKind::kArrayCheck: {
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        temp_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp2_loc,
+                                        kWithoutReadBarrier);
+
       __ Cmp(temp, cls);
       // Jump to slow path for throwing the exception or doing a
       // more involved array check.
@@ -3592,34 +3706,43 @@
     }
 
     case TypeCheckKind::kAbstractClassCheck: {
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        temp_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp2_loc,
+                                        kWithoutReadBarrier);
+
       // If the class is abstract, we eagerly fetch the super class of the
       // object to avoid doing a comparison we know will fail.
-      vixl::aarch64::Label loop, compare_classes;
+      vixl::aarch64::Label loop;
       __ Bind(&loop);
       // /* HeapReference<Class> */ temp = temp->super_class_
-      GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, maybe_temp2_loc);
+      GenerateReferenceLoadOneRegister(instruction,
+                                       temp_loc,
+                                       super_offset,
+                                       maybe_temp2_loc,
+                                       kWithoutReadBarrier);
 
-      // If the class reference currently in `temp` is not null, jump
-      // to the `compare_classes` label to compare it with the checked
-      // class.
-      __ Cbnz(temp, &compare_classes);
-      // Otherwise, jump to the slow path to throw the exception.
-      //
-      // But before, move back the object's class into `temp` before
-      // going into the slow path, as it has been overwritten in the
-      // meantime.
-      // /* HeapReference<Class> */ temp = obj->klass_
-      GenerateReferenceLoadTwoRegisters(
-          instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
-      __ B(type_check_slow_path->GetEntryLabel());
-
-      __ Bind(&compare_classes);
+      // If the class reference currently in `temp` is null, jump to the slow path to throw the
+      // exception.
+      __ Cbz(temp, type_check_slow_path->GetEntryLabel());
+      // Otherwise, compare classes.
       __ Cmp(temp, cls);
       __ B(ne, &loop);
       break;
     }
 
     case TypeCheckKind::kClassHierarchyCheck: {
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        temp_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp2_loc,
+                                        kWithoutReadBarrier);
+
       // Walk over the class hierarchy to find a match.
       vixl::aarch64::Label loop;
       __ Bind(&loop);
@@ -3627,64 +3750,53 @@
       __ B(eq, &done);
 
       // /* HeapReference<Class> */ temp = temp->super_class_
-      GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, maybe_temp2_loc);
+      GenerateReferenceLoadOneRegister(instruction,
+                                       temp_loc,
+                                       super_offset,
+                                       maybe_temp2_loc,
+                                       kWithoutReadBarrier);
 
       // If the class reference currently in `temp` is not null, jump
       // back at the beginning of the loop.
       __ Cbnz(temp, &loop);
       // Otherwise, jump to the slow path to throw the exception.
-      //
-      // But before, move back the object's class into `temp` before
-      // going into the slow path, as it has been overwritten in the
-      // meantime.
-      // /* HeapReference<Class> */ temp = obj->klass_
-      GenerateReferenceLoadTwoRegisters(
-          instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
       __ B(type_check_slow_path->GetEntryLabel());
       break;
     }
 
     case TypeCheckKind::kArrayObjectCheck: {
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        temp_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp2_loc,
+                                        kWithoutReadBarrier);
+
       // Do an exact check.
-      vixl::aarch64::Label check_non_primitive_component_type;
       __ Cmp(temp, cls);
       __ B(eq, &done);
 
       // Otherwise, we need to check that the object's class is a non-primitive array.
       // /* HeapReference<Class> */ temp = temp->component_type_
-      GenerateReferenceLoadOneRegister(instruction, temp_loc, component_offset, maybe_temp2_loc);
+      GenerateReferenceLoadOneRegister(instruction,
+                                       temp_loc,
+                                       component_offset,
+                                       maybe_temp2_loc,
+                                       kWithoutReadBarrier);
 
-      // If the component type is not null (i.e. the object is indeed
-      // an array), jump to label `check_non_primitive_component_type`
-      // to further check that this component type is not a primitive
-      // type.
-      __ Cbnz(temp, &check_non_primitive_component_type);
-      // Otherwise, jump to the slow path to throw the exception.
-      //
-      // But before, move back the object's class into `temp` before
-      // going into the slow path, as it has been overwritten in the
-      // meantime.
-      // /* HeapReference<Class> */ temp = obj->klass_
-      GenerateReferenceLoadTwoRegisters(
-          instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
-      __ B(type_check_slow_path->GetEntryLabel());
-
-      __ Bind(&check_non_primitive_component_type);
+      // If the component type is null, jump to the slow path to throw the exception.
+      __ Cbz(temp, type_check_slow_path->GetEntryLabel());
+      // Otherwise, the object is indeed an array. Further check that this component type is not a
+      // primitive type.
       __ Ldrh(temp, HeapOperand(temp, primitive_offset));
       static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
-      __ Cbz(temp, &done);
-      // Same comment as above regarding `temp` and the slow path.
-      // /* HeapReference<Class> */ temp = obj->klass_
-      GenerateReferenceLoadTwoRegisters(
-          instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
-      __ B(type_check_slow_path->GetEntryLabel());
+      __ Cbnz(temp, type_check_slow_path->GetEntryLabel());
       break;
     }
 
     case TypeCheckKind::kUnresolvedCheck:
-    case TypeCheckKind::kInterfaceCheck:
-      // We always go into the type check slow path for the unresolved
-      // and interface check cases.
+      // We always go into the type check slow path for the unresolved check cases.
       //
       // We cannot directly call the CheckCast runtime entry point
       // without resorting to a type checking slow path here (i.e. by
@@ -3693,15 +3805,40 @@
       // instruction (following the runtime calling convention), which
       // might be cluttered by the potential first read barrier
       // emission at the beginning of this method.
-      //
-      // TODO: Introduce a new runtime entry point taking the object
-      // to test (instead of its class) as argument, and let it deal
-      // with the read barrier issues. This will let us refactor this
-      // case of the `switch` code as it was previously (with a direct
-      // call to the runtime not using a type checking slow path).
-      // This should also be beneficial for the other cases above.
       __ B(type_check_slow_path->GetEntryLabel());
       break;
+    case TypeCheckKind::kInterfaceCheck: {
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        temp_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp2_loc,
+                                        kWithoutReadBarrier);
+
+      // /* HeapReference<Class> */ temp = temp->iftable_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        temp_loc,
+                                        temp_loc,
+                                        iftable_offset,
+                                        maybe_temp2_loc,
+                                        kWithoutReadBarrier);
+      // Iftable is never null.
+      __ Ldr(WRegisterFrom(maybe_temp2_loc), HeapOperand(temp.W(), array_length_offset));
+      // Loop through the iftable and check if any class matches.
+      vixl::aarch64::Label start_loop;
+      __ Bind(&start_loop);
+      __ Cbz(WRegisterFrom(maybe_temp2_loc), type_check_slow_path->GetEntryLabel());
+      __ Ldr(WRegisterFrom(maybe_temp3_loc), HeapOperand(temp.W(), object_array_data_offset));
+      GetAssembler()->MaybeUnpoisonHeapReference(WRegisterFrom(maybe_temp3_loc));
+      // Go to next interface.
+      __ Add(temp, temp, 2 * kHeapReferenceSize);
+      __ Sub(WRegisterFrom(maybe_temp2_loc), WRegisterFrom(maybe_temp2_loc), 2);
+      // Compare the classes and continue the loop if they do not match.
+      __ Cmp(cls, WRegisterFrom(maybe_temp3_loc));
+      __ B(ne, &start_loop);
+      break;
+    }
   }
   __ Bind(&done);
 
@@ -3918,7 +4055,8 @@
       vixl::aarch64::Label* label = &relative_call_patches_.back().label;
       SingleEmissionCheckScope guard(GetVIXLAssembler());
       __ Bind(label);
-      __ bl(0);  // Branch and link to itself. This will be overriden at link time.
+      // Branch and link to itself. This will be overriden at link time.
+      __ bl(static_cast<int64_t>(0));
       break;
     }
     case HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup:
@@ -3984,9 +4122,9 @@
 
 vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativeTypePatch(
     const DexFile& dex_file,
-    uint32_t type_index,
+    dex::TypeIndex type_index,
     vixl::aarch64::Label* adrp_label) {
-  return NewPcRelativePatch(dex_file, type_index, adrp_label, &pc_relative_type_patches_);
+  return NewPcRelativePatch(dex_file, type_index.index_, adrp_label, &pc_relative_type_patches_);
 }
 
 vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativeDexCacheArrayPatch(
@@ -4011,14 +4149,14 @@
 }
 
 vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateBootImageStringLiteral(
-    const DexFile& dex_file, uint32_t string_index) {
+    const DexFile& dex_file, dex::StringIndex string_index) {
   return boot_image_string_patches_.GetOrCreate(
       StringReference(&dex_file, string_index),
       [this]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(/* placeholder */ 0u); });
 }
 
 vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateBootImageTypeLiteral(
-    const DexFile& dex_file, uint32_t type_index) {
+    const DexFile& dex_file, dex::TypeIndex type_index) {
   return boot_image_type_patches_.GetOrCreate(
       TypeReference(&dex_file, type_index),
       [this]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(/* placeholder */ 0u); });
@@ -4036,12 +4174,20 @@
   return DeduplicateUint64Literal(address);
 }
 
+vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateJitStringLiteral(
+    const DexFile& dex_file, dex::StringIndex string_index) {
+  jit_string_roots_.Overwrite(StringReference(&dex_file, string_index), /* placeholder */ 0u);
+  return jit_string_patches_.GetOrCreate(
+      StringReference(&dex_file, string_index),
+      [this]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(/* placeholder */ 0u); });
+}
+
 void CodeGeneratorARM64::EmitAdrpPlaceholder(vixl::aarch64::Label* fixup_label,
                                              vixl::aarch64::Register reg) {
   DCHECK(reg.IsX());
   SingleEmissionCheckScope guard(GetVIXLAssembler());
   __ Bind(fixup_label);
-  __ adrp(reg, /* offset placeholder */ 0);
+  __ adrp(reg, /* offset placeholder */ static_cast<int64_t>(0));
 }
 
 void CodeGeneratorARM64::EmitAddPlaceholder(vixl::aarch64::Label* fixup_label,
@@ -4117,7 +4263,7 @@
     vixl::aarch64::Literal<uint32_t>* literal = entry.second;
     linker_patches->push_back(LinkerPatch::StringPatch(literal->GetOffset(),
                                                        target_string.dex_file,
-                                                       target_string.string_index));
+                                                       target_string.string_index.index_));
   }
   if (!GetCompilerOptions().IsBootImage()) {
     EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_,
@@ -4131,7 +4277,7 @@
     vixl::aarch64::Literal<uint32_t>* literal = entry.second;
     linker_patches->push_back(LinkerPatch::TypePatch(literal->GetOffset(),
                                                      target_type.dex_file,
-                                                     target_type.type_index));
+                                                     target_type.type_index.index_));
   }
   EmitPcRelativeLinkerPatches<LinkerPatch::RelativeTypePatch>(pc_relative_type_patches_,
                                                                 linker_patches);
@@ -4255,7 +4401,7 @@
 
 void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) {
   if (cls->NeedsAccessCheck()) {
-    codegen_->MoveConstant(cls->GetLocations()->GetTemp(0), cls->GetTypeIndex());
+    codegen_->MoveConstant(cls->GetLocations()->GetTemp(0), cls->GetTypeIndex().index_);
     codegen_->InvokeRuntime(kQuickInitializeTypeAndVerifyAccess, cls, cls->GetDexPc());
     CheckEntrypointTypes<kQuickInitializeTypeAndVerifyAccess, void*, uint32_t>();
     return;
@@ -4264,7 +4410,9 @@
   Location out_loc = cls->GetLocations()->Out();
   Register out = OutputRegister(cls);
 
-  const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage();
+  const ReadBarrierOption read_barrier_option = cls->IsInBootImage()
+      ? kWithoutReadBarrier
+      : kCompilerReadBarrierOption;
   bool generate_null_check = false;
   switch (cls->GetLoadKind()) {
     case HLoadClass::LoadKind::kReferrersClass: {
@@ -4277,19 +4425,19 @@
                               current_method,
                               ArtMethod::DeclaringClassOffset().Int32Value(),
                               /* fixup_label */ nullptr,
-                              requires_read_barrier);
+                              read_barrier_option);
       break;
     }
     case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
-      DCHECK(!requires_read_barrier);
+      DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
       __ Ldr(out, codegen_->DeduplicateBootImageTypeLiteral(cls->GetDexFile(),
                                                             cls->GetTypeIndex()));
       break;
     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: {
-      DCHECK(!requires_read_barrier);
+      DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
       // Add ADRP with its PC-relative type patch.
       const DexFile& dex_file = cls->GetDexFile();
-      uint32_t type_index = cls->GetTypeIndex();
+      dex::TypeIndex type_index = cls->GetTypeIndex();
       vixl::aarch64::Label* adrp_label = codegen_->NewPcRelativeTypePatch(dex_file, type_index);
       codegen_->EmitAdrpPlaceholder(adrp_label, out.X());
       // Add ADD with its PC-relative type patch.
@@ -4299,7 +4447,7 @@
       break;
     }
     case HLoadClass::LoadKind::kBootImageAddress: {
-      DCHECK(!requires_read_barrier);
+      DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
       DCHECK(cls->GetAddress() != 0u && IsUint<32>(cls->GetAddress()));
       __ Ldr(out.W(), codegen_->DeduplicateBootImageAddressLiteral(cls->GetAddress()));
       break;
@@ -4322,7 +4470,7 @@
                               out.X(),
                               offset,
                               /* fixup_label */ nullptr,
-                              requires_read_barrier);
+                              read_barrier_option);
       generate_null_check = !cls->IsInDexCache();
       break;
     }
@@ -4342,7 +4490,7 @@
                               out.X(),
                               /* offset placeholder */ 0,
                               ldr_label,
-                              requires_read_barrier);
+                              read_barrier_option);
       generate_null_check = !cls->IsInDexCache();
       break;
     }
@@ -4357,9 +4505,9 @@
       GenerateGcRootFieldLoad(cls,
                               out_loc,
                               out.X(),
-                              CodeGenerator::GetCacheOffset(cls->GetTypeIndex()),
+                              CodeGenerator::GetCacheOffset(cls->GetTypeIndex().index_),
                               /* fixup_label */ nullptr,
-                              requires_read_barrier);
+                              read_barrier_option);
       generate_null_check = !cls->IsInDexCache();
       break;
     }
@@ -4414,24 +4562,20 @@
       break;
     case HLoadString::LoadKind::kBootImageAddress:
       break;
-    case HLoadString::LoadKind::kDexCacheAddress:
-      DCHECK(Runtime::Current()->UseJitCompilation());
-      break;
     case HLoadString::LoadKind::kBssEntry:
       DCHECK(!Runtime::Current()->UseJitCompilation());
       break;
     case HLoadString::LoadKind::kDexCacheViaMethod:
       break;
+    case HLoadString::LoadKind::kJitTableAddress:
+      DCHECK(Runtime::Current()->UseJitCompilation());
+      break;
   }
   return desired_string_load_kind;
 }
 
 void LocationsBuilderARM64::VisitLoadString(HLoadString* load) {
-  LocationSummary::CallKind call_kind = load->NeedsEnvironment()
-      ? ((load->GetLoadKind() == HLoadString::LoadKind::kDexCacheViaMethod)
-          ? LocationSummary::kCallOnMainOnly
-          : LocationSummary::kCallOnSlowPath)
-      : LocationSummary::kNoCall;
+  LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load);
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind);
   if (load->GetLoadKind() == HLoadString::LoadKind::kDexCacheViaMethod) {
     InvokeRuntimeCallingConvention calling_convention;
@@ -4457,6 +4601,7 @@
 
 void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) {
   Register out = OutputRegister(load);
+  Location out_loc = load->GetLocations()->Out();
 
   switch (load->GetLoadKind()) {
     case HLoadString::LoadKind::kBootImageLinkTimeAddress:
@@ -4466,7 +4611,7 @@
     case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
       // Add ADRP with its PC-relative String patch.
       const DexFile& dex_file = load->GetDexFile();
-      uint32_t string_index = load->GetStringIndex();
+      uint32_t string_index = load->GetStringIndex().index_;
       DCHECK(codegen_->GetCompilerOptions().IsBootImage());
       vixl::aarch64::Label* adrp_label = codegen_->NewPcRelativeStringPatch(dex_file, string_index);
       codegen_->EmitAdrpPlaceholder(adrp_label, out.X());
@@ -4484,7 +4629,7 @@
     case HLoadString::LoadKind::kBssEntry: {
       // Add ADRP with its PC-relative String .bss entry patch.
       const DexFile& dex_file = load->GetDexFile();
-      uint32_t string_index = load->GetStringIndex();
+      uint32_t string_index = load->GetStringIndex().index_;
       DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
       UseScratchRegisterScope temps(codegen_->GetVIXLAssembler());
       Register temp = temps.AcquireX();
@@ -4493,13 +4638,13 @@
       // Add LDR with its PC-relative String patch.
       vixl::aarch64::Label* ldr_label =
           codegen_->NewPcRelativeStringPatch(dex_file, string_index, adrp_label);
-      // /* GcRoot<mirror::Class> */ out = *(base_address + offset)  /* PC-relative */
+      // /* GcRoot<mirror::String> */ out = *(base_address + offset)  /* PC-relative */
       GenerateGcRootFieldLoad(load,
-                              load->GetLocations()->Out(),
+                              out_loc,
                               temp,
                               /* offset placeholder */ 0u,
                               ldr_label,
-                              kEmitCompilerReadBarrier);
+                              kCompilerReadBarrierOption);
       SlowPathCodeARM64* slow_path =
           new (GetGraph()->GetArena()) LoadStringSlowPathARM64(load, temp, adrp_label);
       codegen_->AddSlowPath(slow_path);
@@ -4507,6 +4652,17 @@
       __ Bind(slow_path->GetExitLabel());
       return;
     }
+    case HLoadString::LoadKind::kJitTableAddress: {
+      __ Ldr(out, codegen_->DeduplicateJitStringLiteral(load->GetDexFile(),
+                                                        load->GetStringIndex()));
+      GenerateGcRootFieldLoad(load,
+                              out_loc,
+                              out.X(),
+                              /* offset */ 0,
+                              /* fixup_label */ nullptr,
+                              kCompilerReadBarrierOption);
+      return;
+    }
     default:
       break;
   }
@@ -4514,7 +4670,7 @@
   // TODO: Re-add the compiler code to do string dex cache lookup again.
   InvokeRuntimeCallingConvention calling_convention;
   DCHECK_EQ(calling_convention.GetRegisterAt(0).GetCode(), out.GetCode());
-  __ Mov(calling_convention.GetRegisterAt(0).W(), load->GetStringIndex());
+  __ Mov(calling_convention.GetRegisterAt(0).W(), load->GetStringIndex().index_);
   codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc());
   CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
 }
@@ -4639,7 +4795,7 @@
   InvokeRuntimeCallingConvention calling_convention;
   Register type_index = RegisterFrom(locations->GetTemp(0), Primitive::kPrimInt);
   DCHECK(type_index.Is(w0));
-  __ Mov(type_index, instruction->GetTypeIndex());
+  __ Mov(type_index, instruction->GetTypeIndex().index_);
   // Note: if heap poisoning is enabled, the entry point takes cares
   // of poisoning the reference.
   codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc());
@@ -5201,13 +5357,16 @@
   }
 }
 
-void InstructionCodeGeneratorARM64::GenerateReferenceLoadOneRegister(HInstruction* instruction,
-                                                                     Location out,
-                                                                     uint32_t offset,
-                                                                     Location maybe_temp) {
+void InstructionCodeGeneratorARM64::GenerateReferenceLoadOneRegister(
+    HInstruction* instruction,
+    Location out,
+    uint32_t offset,
+    Location maybe_temp,
+    ReadBarrierOption read_barrier_option) {
   Primitive::Type type = Primitive::kPrimNot;
   Register out_reg = RegisterFrom(out, type);
-  if (kEmitCompilerReadBarrier) {
+  if (read_barrier_option == kWithReadBarrier) {
+    CHECK(kEmitCompilerReadBarrier);
     Register temp_reg = RegisterFrom(maybe_temp, type);
     if (kUseBakerReadBarrier) {
       // Load with fast path based Baker's read barrier.
@@ -5237,15 +5396,18 @@
   }
 }
 
-void InstructionCodeGeneratorARM64::GenerateReferenceLoadTwoRegisters(HInstruction* instruction,
-                                                                      Location out,
-                                                                      Location obj,
-                                                                      uint32_t offset,
-                                                                      Location maybe_temp) {
+void InstructionCodeGeneratorARM64::GenerateReferenceLoadTwoRegisters(
+    HInstruction* instruction,
+    Location out,
+    Location obj,
+    uint32_t offset,
+    Location maybe_temp,
+    ReadBarrierOption read_barrier_option) {
   Primitive::Type type = Primitive::kPrimNot;
   Register out_reg = RegisterFrom(out, type);
   Register obj_reg = RegisterFrom(obj, type);
-  if (kEmitCompilerReadBarrier) {
+  if (read_barrier_option == kWithReadBarrier) {
+    CHECK(kEmitCompilerReadBarrier);
     if (kUseBakerReadBarrier) {
       // Load with fast path based Baker's read barrier.
       Register temp_reg = RegisterFrom(maybe_temp, type);
@@ -5271,23 +5433,25 @@
   }
 }
 
-void InstructionCodeGeneratorARM64::GenerateGcRootFieldLoad(HInstruction* instruction,
-                                                            Location root,
-                                                            Register obj,
-                                                            uint32_t offset,
-                                                            vixl::aarch64::Label* fixup_label,
-                                                            bool requires_read_barrier) {
+void InstructionCodeGeneratorARM64::GenerateGcRootFieldLoad(
+    HInstruction* instruction,
+    Location root,
+    Register obj,
+    uint32_t offset,
+    vixl::aarch64::Label* fixup_label,
+    ReadBarrierOption read_barrier_option) {
   DCHECK(fixup_label == nullptr || offset == 0u);
   Register root_reg = RegisterFrom(root, Primitive::kPrimNot);
-  if (requires_read_barrier) {
+  if (read_barrier_option == kWithReadBarrier) {
     DCHECK(kEmitCompilerReadBarrier);
     if (kUseBakerReadBarrier) {
       // Fast path implementation of art::ReadBarrier::BarrierForRoot when
       // Baker's read barrier are used:
       //
       //   root = obj.field;
-      //   if (Thread::Current()->GetIsGcMarking()) {
-      //     root = ReadBarrier::Mark(root)
+      //   temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
+      //   if (temp != null) {
+      //     root = temp(root)
       //   }
 
       // /* GcRoot<mirror::Object> */ root = *(obj + offset)
@@ -5304,16 +5468,22 @@
                     "art::mirror::CompressedReference<mirror::Object> and int32_t "
                     "have different sizes.");
 
-      // Slow path marking the GC root `root`.
-      SlowPathCodeARM64* slow_path =
-          new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM64(instruction, root);
-      codegen_->AddSlowPath(slow_path);
+      Register temp = lr;
 
-      MacroAssembler* masm = GetVIXLAssembler();
-      UseScratchRegisterScope temps(masm);
-      Register temp = temps.AcquireW();
-      // temp = Thread::Current()->GetIsGcMarking()
-      __ Ldr(temp, MemOperand(tr, Thread::IsGcMarkingOffset<kArm64PointerSize>().Int32Value()));
+      // Slow path marking the GC root `root`. The entrypoint will alrady be loaded in temp.
+      SlowPathCodeARM64* slow_path =
+          new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM64(instruction,
+                                                                    root,
+                                                                    LocationFrom(temp));
+      codegen_->AddSlowPath(slow_path);
+      const int32_t entry_point_offset =
+          CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(root.reg());
+      // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
+      // Loading the entrypoint does not require a load acquire since it is only changed when
+      // threads are suspended or running a checkpoint.
+      __ Ldr(temp, MemOperand(tr, entry_point_offset));
+      // The entrypoint is null when the GC is not marking, this prevents one load compared to
+      // checking GetIsGcMarking.
       __ Cbnz(temp, slow_path->GetEntryLabel());
       __ Bind(slow_path->GetExitLabel());
     } else {
@@ -5426,7 +5596,7 @@
   //   uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
   //   lfence;  // Load fence or artificial data dependency to prevent load-load reordering
   //   HeapReference<Object> ref = *src;  // Original reference load.
-  //   bool is_gray = (rb_state == ReadBarrier::gray_ptr_);
+  //   bool is_gray = (rb_state == ReadBarrier::GrayState());
   //   if (is_gray) {
   //     ref = ReadBarrier::Mark(ref);  // Performed by runtime entrypoint slow path.
   //   }
@@ -5517,12 +5687,11 @@
   }
   AddSlowPath(slow_path);
 
-  // if (rb_state == ReadBarrier::gray_ptr_)
+  // if (rb_state == ReadBarrier::GrayState())
   //   ref = ReadBarrier::Mark(ref);
   // Given the numeric representation, it's enough to check the low bit of the rb_state.
-  static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0");
-  static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1");
-  static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2");
+  static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
+  static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
   __ Tbnz(temp, LockWord::kReadBarrierStateShift, slow_path->GetEntryLabel());
   __ Bind(slow_path->GetExitLabel());
 }
@@ -5613,7 +5782,19 @@
   }
 }
 
-
+void CodeGeneratorARM64::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
+  for (const auto& entry : jit_string_patches_) {
+    const auto& it = jit_string_roots_.find(entry.first);
+    DCHECK(it != jit_string_roots_.end());
+    size_t index_in_table = it->second;
+    vixl::aarch64::Literal<uint32_t>* literal = entry.second;
+    uint32_t literal_offset = literal->GetOffset();
+    uintptr_t address =
+        reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>);
+    uint8_t* data = code + literal_offset;
+    reinterpret_cast<uint32_t*>(data)[0] = dchecked_integral_cast<uint32_t>(address);
+  }
+}
 
 #undef __
 #undef QUICK_ENTRY_POINT
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index 7f54b4b..868c8b0 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -20,6 +20,7 @@
 #include "arch/arm64/quick_method_frame_info_arm64.h"
 #include "code_generator.h"
 #include "common_arm64.h"
+#include "dex_file_types.h"
 #include "driver/compiler_options.h"
 #include "nodes.h"
 #include "parallel_move_resolver.h"
@@ -269,7 +270,8 @@
   void GenerateReferenceLoadOneRegister(HInstruction* instruction,
                                         Location out,
                                         uint32_t offset,
-                                        Location maybe_temp);
+                                        Location maybe_temp,
+                                        ReadBarrierOption read_barrier_option);
   // Generate a heap reference load using two different registers
   // `out` and `obj`:
   //
@@ -284,18 +286,19 @@
                                          Location out,
                                          Location obj,
                                          uint32_t offset,
-                                         Location maybe_temp);
+                                         Location maybe_temp,
+                                         ReadBarrierOption read_barrier_option);
   // Generate a GC root reference load:
   //
   //   root <- *(obj + offset)
   //
-  // while honoring read barriers if `requires_read_barrier` is true.
+  // while honoring read barriers based on read_barrier_option.
   void GenerateGcRootFieldLoad(HInstruction* instruction,
                                Location root,
                                vixl::aarch64::Register obj,
                                uint32_t offset,
                                vixl::aarch64::Label* fixup_label,
-                               bool requires_read_barrier);
+                               ReadBarrierOption read_barrier_option);
 
   // Generate a floating-point comparison.
   void GenerateFcmp(HInstruction* instruction);
@@ -545,7 +548,7 @@
   // ADRP (pass `adrp_label = null`) or the ADD (pass `adrp_label` pointing
   // to the associated ADRP patch label).
   vixl::aarch64::Label* NewPcRelativeTypePatch(const DexFile& dex_file,
-                                               uint32_t type_index,
+                                               dex::TypeIndex type_index,
                                                vixl::aarch64::Label* adrp_label = nullptr);
 
   // Add a new PC-relative dex cache array patch for an instruction and return
@@ -557,12 +560,15 @@
       uint32_t element_offset,
       vixl::aarch64::Label* adrp_label = nullptr);
 
-  vixl::aarch64::Literal<uint32_t>* DeduplicateBootImageStringLiteral(const DexFile& dex_file,
-                                                                      uint32_t string_index);
+  vixl::aarch64::Literal<uint32_t>* DeduplicateBootImageStringLiteral(
+      const DexFile& dex_file,
+      dex::StringIndex string_index);
   vixl::aarch64::Literal<uint32_t>* DeduplicateBootImageTypeLiteral(const DexFile& dex_file,
-                                                                    uint32_t type_index);
+                                                                    dex::TypeIndex type_index);
   vixl::aarch64::Literal<uint32_t>* DeduplicateBootImageAddressLiteral(uint64_t address);
   vixl::aarch64::Literal<uint64_t>* DeduplicateDexCacheAddressLiteral(uint64_t address);
+  vixl::aarch64::Literal<uint32_t>* DeduplicateJitStringLiteral(const DexFile& dex_file,
+                                                                dex::StringIndex string_index);
 
   void EmitAdrpPlaceholder(vixl::aarch64::Label* fixup_label, vixl::aarch64::Register reg);
   void EmitAddPlaceholder(vixl::aarch64::Label* fixup_label,
@@ -574,6 +580,8 @@
 
   void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) OVERRIDE;
 
+  void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) OVERRIDE;
+
   // Fast path implementation of ReadBarrier::Barrier for a heap
   // reference field load when Baker's read barriers are used.
   void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
@@ -671,9 +679,9 @@
   using MethodToLiteralMap = ArenaSafeMap<MethodReference,
                                           vixl::aarch64::Literal<uint64_t>*,
                                           MethodReferenceComparator>;
-  using BootStringToLiteralMap = ArenaSafeMap<StringReference,
-                                              vixl::aarch64::Literal<uint32_t>*,
-                                              StringReferenceValueComparator>;
+  using StringToLiteralMap = ArenaSafeMap<StringReference,
+                                          vixl::aarch64::Literal<uint32_t>*,
+                                          StringReferenceValueComparator>;
   using BootTypeToLiteralMap = ArenaSafeMap<TypeReference,
                                             vixl::aarch64::Literal<uint32_t>*,
                                             TypeReferenceValueComparator>;
@@ -737,7 +745,7 @@
   // PC-relative DexCache access info.
   ArenaDeque<PcRelativePatchInfo> pc_relative_dex_cache_patches_;
   // Deduplication map for boot string literals for kBootImageLinkTimeAddress.
-  BootStringToLiteralMap boot_image_string_patches_;
+  StringToLiteralMap boot_image_string_patches_;
   // PC-relative String patch info; type depends on configuration (app .bss or boot image PIC).
   ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_;
   // Deduplication map for boot type literals for kBootImageLinkTimeAddress.
@@ -747,6 +755,9 @@
   // Deduplication map for patchable boot image addresses.
   Uint32ToLiteralMap boot_image_address_patches_;
 
+  // Patches for string literals in JIT compiled code.
+  StringToLiteralMap jit_string_patches_;
+
   DISALLOW_COPY_AND_ASSIGN(CodeGeneratorARM64);
 };
 
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index f1d1135..1ca439e 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -23,6 +23,7 @@
 #include "compiled_method.h"
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "gc/accounting/card_table.h"
+#include "intrinsics_arm_vixl.h"
 #include "mirror/array-inl.h"
 #include "mirror/class-inl.h"
 #include "thread.h"
@@ -39,13 +40,14 @@
 
 using helpers::DRegisterFrom;
 using helpers::DWARFReg;
-using helpers::FromLowSToD;
 using helpers::HighDRegisterFrom;
 using helpers::HighRegisterFrom;
 using helpers::InputOperandAt;
+using helpers::InputRegister;
 using helpers::InputRegisterAt;
 using helpers::InputSRegisterAt;
 using helpers::InputVRegisterAt;
+using helpers::Int32ConstantFrom;
 using helpers::LocationFrom;
 using helpers::LowRegisterFrom;
 using helpers::LowSRegisterFrom;
@@ -62,7 +64,9 @@
   return ((location.low() & 1) == 0) && (location.low() + 1 == location.high());
 }
 
+static constexpr int kCurrentMethodStackOffset = 0;
 static constexpr size_t kArmInstrMaxSizeInBytes = 4u;
+static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 7;
 
 #ifdef __
 #error "ARM Codegen VIXL macro-assembler macro already defined."
@@ -129,7 +133,7 @@
       vixl32::Register base = sp;
       if (stack_offset != 0) {
         base = temps.Acquire();
-        __ Add(base, sp, stack_offset);
+        __ Add(base, sp, Operand::From(stack_offset));
       }
       __ Vstm(F64, base, NO_WRITE_BACK, DRegisterList(d_reg, number_of_d_regs));
     }
@@ -177,7 +181,7 @@
       vixl32::Register base = sp;
       if (stack_offset != 0) {
         base = temps.Acquire();
-        __ Add(base, sp, stack_offset);
+        __ Add(base, sp, Operand::From(stack_offset));
       }
       __ Vldm(F64, base, NO_WRITE_BACK, DRegisterList(d_reg, number_of_d_regs));
     }
@@ -340,6 +344,46 @@
   DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathARMVIXL);
 };
 
+class BoundsCheckSlowPathARMVIXL : public SlowPathCodeARMVIXL {
+ public:
+  explicit BoundsCheckSlowPathARMVIXL(HBoundsCheck* instruction)
+      : SlowPathCodeARMVIXL(instruction) {}
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
+    LocationSummary* locations = instruction_->GetLocations();
+
+    __ Bind(GetEntryLabel());
+    if (instruction_->CanThrowIntoCatchBlock()) {
+      // Live registers will be restored in the catch block if caught.
+      SaveLiveRegisters(codegen, instruction_->GetLocations());
+    }
+    // We're moving two locations to locations that could overlap, so we need a parallel
+    // move resolver.
+    InvokeRuntimeCallingConventionARMVIXL calling_convention;
+    codegen->EmitParallelMoves(
+        locations->InAt(0),
+        LocationFrom(calling_convention.GetRegisterAt(0)),
+        Primitive::kPrimInt,
+        locations->InAt(1),
+        LocationFrom(calling_convention.GetRegisterAt(1)),
+        Primitive::kPrimInt);
+    QuickEntrypointEnum entrypoint = instruction_->AsBoundsCheck()->IsStringCharAt()
+        ? kQuickThrowStringBounds
+        : kQuickThrowArrayBounds;
+    arm_codegen->InvokeRuntime(entrypoint, instruction_, instruction_->GetDexPc(), this);
+    CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>();
+    CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
+  }
+
+  bool IsFatal() const OVERRIDE { return true; }
+
+  const char* GetDescription() const OVERRIDE { return "BoundsCheckSlowPathARMVIXL"; }
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathARMVIXL);
+};
+
 class LoadClassSlowPathARMVIXL : public SlowPathCodeARMVIXL {
  public:
   LoadClassSlowPathARMVIXL(HLoadClass* cls, HInstruction* at, uint32_t dex_pc, bool do_clinit)
@@ -355,7 +399,7 @@
     SaveLiveRegisters(codegen, locations);
 
     InvokeRuntimeCallingConventionARMVIXL calling_convention;
-    __ Mov(calling_convention.GetRegisterAt(0), cls_->GetTypeIndex());
+    __ Mov(calling_convention.GetRegisterAt(0), cls_->GetTypeIndex().index_);
     QuickEntrypointEnum entrypoint = do_clinit_ ? kQuickInitializeStaticStorage
                                                 : kQuickInitializeType;
     arm_codegen->InvokeRuntime(entrypoint, at_, dex_pc_, this);
@@ -394,6 +438,125 @@
   DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathARMVIXL);
 };
 
+class TypeCheckSlowPathARMVIXL : public SlowPathCodeARMVIXL {
+ public:
+  TypeCheckSlowPathARMVIXL(HInstruction* instruction, bool is_fatal)
+      : SlowPathCodeARMVIXL(instruction), is_fatal_(is_fatal) {}
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    LocationSummary* locations = instruction_->GetLocations();
+    DCHECK(instruction_->IsCheckCast()
+           || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
+
+    CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
+    __ Bind(GetEntryLabel());
+
+    if (!is_fatal_) {
+      SaveLiveRegisters(codegen, locations);
+    }
+
+    // We're moving two locations to locations that could overlap, so we need a parallel
+    // move resolver.
+    InvokeRuntimeCallingConventionARMVIXL calling_convention;
+
+    codegen->EmitParallelMoves(locations->InAt(0),
+                               LocationFrom(calling_convention.GetRegisterAt(0)),
+                               Primitive::kPrimNot,
+                               locations->InAt(1),
+                               LocationFrom(calling_convention.GetRegisterAt(1)),
+                               Primitive::kPrimNot);
+    if (instruction_->IsInstanceOf()) {
+      arm_codegen->InvokeRuntime(kQuickInstanceofNonTrivial,
+                                 instruction_,
+                                 instruction_->GetDexPc(),
+                                 this);
+      CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t, mirror::Object*, mirror::Class*>();
+      arm_codegen->Move32(locations->Out(), LocationFrom(r0));
+    } else {
+      DCHECK(instruction_->IsCheckCast());
+      arm_codegen->InvokeRuntime(kQuickCheckInstanceOf,
+                                 instruction_,
+                                 instruction_->GetDexPc(),
+                                 this);
+      CheckEntrypointTypes<kQuickCheckInstanceOf, void, mirror::Object*, mirror::Class*>();
+    }
+
+    if (!is_fatal_) {
+      RestoreLiveRegisters(codegen, locations);
+      __ B(GetExitLabel());
+    }
+  }
+
+  const char* GetDescription() const OVERRIDE { return "TypeCheckSlowPathARMVIXL"; }
+
+  bool IsFatal() const OVERRIDE { return is_fatal_; }
+
+ private:
+  const bool is_fatal_;
+
+  DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathARMVIXL);
+};
+
+class DeoptimizationSlowPathARMVIXL : public SlowPathCodeARMVIXL {
+ public:
+  explicit DeoptimizationSlowPathARMVIXL(HDeoptimize* instruction)
+      : SlowPathCodeARMVIXL(instruction) {}
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
+    __ Bind(GetEntryLabel());
+    arm_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this);
+    CheckEntrypointTypes<kQuickDeoptimize, void, void>();
+  }
+
+  const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathARMVIXL"; }
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathARMVIXL);
+};
+
+class ArraySetSlowPathARMVIXL : public SlowPathCodeARMVIXL {
+ public:
+  explicit ArraySetSlowPathARMVIXL(HInstruction* instruction) : SlowPathCodeARMVIXL(instruction) {}
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    LocationSummary* locations = instruction_->GetLocations();
+    __ Bind(GetEntryLabel());
+    SaveLiveRegisters(codegen, locations);
+
+    InvokeRuntimeCallingConventionARMVIXL calling_convention;
+    HParallelMove parallel_move(codegen->GetGraph()->GetArena());
+    parallel_move.AddMove(
+        locations->InAt(0),
+        LocationFrom(calling_convention.GetRegisterAt(0)),
+        Primitive::kPrimNot,
+        nullptr);
+    parallel_move.AddMove(
+        locations->InAt(1),
+        LocationFrom(calling_convention.GetRegisterAt(1)),
+        Primitive::kPrimInt,
+        nullptr);
+    parallel_move.AddMove(
+        locations->InAt(2),
+        LocationFrom(calling_convention.GetRegisterAt(2)),
+        Primitive::kPrimNot,
+        nullptr);
+    codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
+
+    CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
+    arm_codegen->InvokeRuntime(kQuickAputObject, instruction_, instruction_->GetDexPc(), this);
+    CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
+    RestoreLiveRegisters(codegen, locations);
+    __ B(GetExitLabel());
+  }
+
+  const char* GetDescription() const OVERRIDE { return "ArraySetSlowPathARMVIXL"; }
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathARMVIXL);
+};
+
+
 inline vixl32::Condition ARMCondition(IfCondition cond) {
   switch (cond) {
     case kCondEQ: return eq;
@@ -467,6 +630,11 @@
   return mask;
 }
 
+size_t CodeGeneratorARMVIXL::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
+  GetAssembler()->LoadSFromOffset(vixl32::SRegister(reg_id), sp, stack_index);
+  return kArmWordSize;
+}
+
 #undef __
 
 CodeGeneratorARMVIXL::CodeGeneratorARMVIXL(HGraph* graph,
@@ -482,6 +650,7 @@
                     compiler_options,
                     stats),
       block_labels_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      jump_tables_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       location_builder_(graph, this),
       instruction_visitor_(graph, this),
       move_resolver_(graph->GetArena(), this),
@@ -500,9 +669,53 @@
   GetVIXLAssembler()->GetScratchVRegisterList()->Combine(d15);
 }
 
-#define __ reinterpret_cast<ArmVIXLAssembler*>(GetAssembler())->GetVIXLAssembler()->
+void JumpTableARMVIXL::EmitTable(CodeGeneratorARMVIXL* codegen) {
+  uint32_t num_entries = switch_instr_->GetNumEntries();
+  DCHECK_GE(num_entries, kPackedSwitchCompareJumpThreshold);
+
+  // We are about to use the assembler to place literals directly. Make sure we have enough
+  // underlying code buffer and we have generated a jump table of the right size, using
+  // codegen->GetVIXLAssembler()->GetBuffer().Align();
+  AssemblerAccurateScope aas(codegen->GetVIXLAssembler(),
+                             num_entries * sizeof(int32_t),
+                             CodeBufferCheckScope::kMaximumSize);
+  // TODO(VIXL): Check that using lower case bind is fine here.
+  codegen->GetVIXLAssembler()->bind(&table_start_);
+  for (uint32_t i = 0; i < num_entries; i++) {
+    codegen->GetVIXLAssembler()->place(bb_addresses_[i].get());
+  }
+}
+
+void JumpTableARMVIXL::FixTable(CodeGeneratorARMVIXL* codegen) {
+  uint32_t num_entries = switch_instr_->GetNumEntries();
+  DCHECK_GE(num_entries, kPackedSwitchCompareJumpThreshold);
+
+  const ArenaVector<HBasicBlock*>& successors = switch_instr_->GetBlock()->GetSuccessors();
+  for (uint32_t i = 0; i < num_entries; i++) {
+    vixl32::Label* target_label = codegen->GetLabelOf(successors[i]);
+    DCHECK(target_label->IsBound());
+    int32_t jump_offset = target_label->GetLocation() - table_start_.GetLocation();
+    // When doing BX to address we need to have lower bit set to 1 in T32.
+    if (codegen->GetVIXLAssembler()->IsUsingT32()) {
+      jump_offset++;
+    }
+    DCHECK_GT(jump_offset, std::numeric_limits<int32_t>::min());
+    DCHECK_LE(jump_offset, std::numeric_limits<int32_t>::max());
+
+    bb_addresses_[i].get()->UpdateValue(jump_offset, codegen->GetVIXLAssembler()->GetBuffer());
+  }
+}
+
+void CodeGeneratorARMVIXL::FixJumpTables() {
+  for (auto&& jump_table : jump_tables_) {
+    jump_table->FixTable(this);
+  }
+}
+
+#define __ reinterpret_cast<ArmVIXLAssembler*>(GetAssembler())->GetVIXLAssembler()->  // NOLINT
 
 void CodeGeneratorARMVIXL::Finalize(CodeAllocator* allocator) {
+  FixJumpTables();
   GetAssembler()->FinalizeCode();
   CodeGenerator::Finalize(allocator);
 }
@@ -682,9 +895,9 @@
   }
 }
 
-void CodeGeneratorARMVIXL::MoveConstant(Location destination ATTRIBUTE_UNUSED,
-                                        int32_t value ATTRIBUTE_UNUSED) {
-  TODO_VIXL32(FATAL);
+void CodeGeneratorARMVIXL::MoveConstant(Location location, int32_t value) {
+  DCHECK(location.IsRegister());
+  __ Mov(RegisterFrom(location), value);
 }
 
 void CodeGeneratorARMVIXL::MoveLocation(Location dst, Location src, Primitive::Type dst_type) {
@@ -695,9 +908,15 @@
   GetMoveResolver()->EmitNativeCode(&move);
 }
 
-void CodeGeneratorARMVIXL::AddLocationAsTemp(Location location ATTRIBUTE_UNUSED,
-                                             LocationSummary* locations ATTRIBUTE_UNUSED) {
-  TODO_VIXL32(FATAL);
+void CodeGeneratorARMVIXL::AddLocationAsTemp(Location location, LocationSummary* locations) {
+  if (location.IsRegister()) {
+    locations->AddTemp(location);
+  } else if (location.IsRegisterPair()) {
+    locations->AddTemp(LocationFrom(LowRegisterFrom(location)));
+    locations->AddTemp(LocationFrom(HighRegisterFrom(location)));
+  } else {
+    UNIMPLEMENTED(FATAL) << "AddLocationAsTemp not implemented for location " << location;
+  }
 }
 
 void CodeGeneratorARMVIXL::InvokeRuntime(QuickEntrypointEnum entrypoint,
@@ -795,14 +1014,14 @@
       __ Vcmp(F32, InputSRegisterAt(instruction, 0), 0.0);
     } else {
       DCHECK_EQ(type, Primitive::kPrimDouble);
-      __ Vcmp(F64, FromLowSToD(LowSRegisterFrom(lhs_loc)), 0.0);
+      __ Vcmp(F64, DRegisterFrom(lhs_loc), 0.0);
     }
   } else {
     if (type == Primitive::kPrimFloat) {
       __ Vcmp(InputSRegisterAt(instruction, 0), InputSRegisterAt(instruction, 1));
     } else {
       DCHECK_EQ(type, Primitive::kPrimDouble);
-      __ Vcmp(FromLowSToD(LowSRegisterFrom(lhs_loc)), FromLowSToD(LowSRegisterFrom(rhs_loc)));
+      __ Vcmp(DRegisterFrom(lhs_loc), DRegisterFrom(rhs_loc));
     }
   }
 }
@@ -934,7 +1153,8 @@
 void InstructionCodeGeneratorARMVIXL::GenerateTestAndBranch(HInstruction* instruction,
                                                             size_t condition_input_index,
                                                             vixl32::Label* true_target,
-                                                            vixl32::Label* false_target) {
+                                                            vixl32::Label* false_target,
+                                                            bool far_target) {
   HInstruction* cond = instruction->InputAt(condition_input_index);
 
   if (true_target == nullptr && false_target == nullptr) {
@@ -970,9 +1190,13 @@
       DCHECK(cond_val.IsRegister());
     }
     if (true_target == nullptr) {
-      __ Cbz(InputRegisterAt(instruction, condition_input_index), false_target);
+      __ CompareAndBranchIfZero(InputRegisterAt(instruction, condition_input_index),
+                                false_target,
+                                far_target);
     } else {
-      __ Cbnz(InputRegisterAt(instruction, condition_input_index), true_target);
+      __ CompareAndBranchIfNonZero(InputRegisterAt(instruction, condition_input_index),
+                                   true_target,
+                                   far_target);
     }
   } else {
     // Condition has not been materialized. Use its inputs as the comparison and
@@ -1028,6 +1252,24 @@
   GenerateTestAndBranch(if_instr, /* condition_input_index */ 0, true_target, false_target);
 }
 
+void LocationsBuilderARMVIXL::VisitDeoptimize(HDeoptimize* deoptimize) {
+  LocationSummary* locations = new (GetGraph()->GetArena())
+      LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
+  locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
+  if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
+    locations->SetInAt(0, Location::RequiresRegister());
+  }
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitDeoptimize(HDeoptimize* deoptimize) {
+  SlowPathCodeARMVIXL* slow_path =
+      deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathARMVIXL>(deoptimize);
+  GenerateTestAndBranch(deoptimize,
+                        /* condition_input_index */ 0,
+                        slow_path->GetEntryLabel(),
+                        /* false_target */ nullptr);
+}
+
 void LocationsBuilderARMVIXL::VisitSelect(HSelect* select) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(select);
   if (Primitive::IsFloatingPointType(select->GetType())) {
@@ -1049,11 +1291,20 @@
   GenerateTestAndBranch(select,
                         /* condition_input_index */ 2,
                         /* true_target */ nullptr,
-                        &false_target);
+                        &false_target,
+                        /* far_target */ false);
   codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType());
   __ Bind(&false_target);
 }
 
+void LocationsBuilderARMVIXL::VisitNativeDebugInfo(HNativeDebugInfo* info) {
+  new (GetGraph()->GetArena()) LocationSummary(info);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitNativeDebugInfo(HNativeDebugInfo*) {
+  // MaybeRecordNativeDebugInfo is already called implicitly in CodeGenerator::Compile.
+}
+
 void CodeGeneratorARMVIXL::GenerateNop() {
   __ Nop();
 }
@@ -1071,11 +1322,10 @@
       }
       break;
 
-    // TODO(VIXL): https://android-review.googlesource.com/#/c/252265/
     case Primitive::kPrimFloat:
     case Primitive::kPrimDouble:
       locations->SetInAt(0, Location::RequiresFpuRegister());
-      locations->SetInAt(1, Location::RequiresFpuRegister());
+      locations->SetInAt(1, ArithmeticZeroOrFpuRegister(cond->InputAt(1)));
       if (!cond->IsEmittedAtUseSite()) {
         locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
       }
@@ -1095,13 +1345,20 @@
     return;
   }
 
+  Location right = cond->GetLocations()->InAt(1);
   vixl32::Register out = OutputRegister(cond);
   vixl32::Label true_label, false_label;
 
   switch (cond->InputAt(0)->GetType()) {
     default: {
       // Integer case.
-      __ Cmp(InputRegisterAt(cond, 0), InputOperandAt(cond, 1));
+      if (right.IsRegister()) {
+        __ Cmp(InputRegisterAt(cond, 0), InputOperandAt(cond, 1));
+      } else {
+        DCHECK(right.IsConstant());
+        __ Cmp(InputRegisterAt(cond, 0),
+               CodeGenerator::GetInt32ValueOf(right.GetConstant()));
+      }
       AssemblerAccurateScope aas(GetVIXLAssembler(),
                                  kArmInstrMaxSizeInBytes * 3u,
                                  CodeBufferCheckScope::kMaximumSize);
@@ -1250,7 +1507,8 @@
   locations->SetOut(Location::ConstantLocation(constant));
 }
 
-void InstructionCodeGeneratorARMVIXL::VisitFloatConstant(HFloatConstant* constant ATTRIBUTE_UNUSED) {
+void InstructionCodeGeneratorARMVIXL::VisitFloatConstant(
+    HFloatConstant* constant ATTRIBUTE_UNUSED) {
   // Will be generated at use site.
 }
 
@@ -1260,7 +1518,8 @@
   locations->SetOut(Location::ConstantLocation(constant));
 }
 
-void InstructionCodeGeneratorARMVIXL::VisitDoubleConstant(HDoubleConstant* constant ATTRIBUTE_UNUSED) {
+void InstructionCodeGeneratorARMVIXL::VisitDoubleConstant(
+    HDoubleConstant* constant ATTRIBUTE_UNUSED) {
   // Will be generated at use site.
 }
 
@@ -1290,14 +1549,42 @@
   codegen_->GenerateFrameExit();
 }
 
+void LocationsBuilderARMVIXL::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
+  // The trampoline uses the same calling convention as dex calling conventions,
+  // except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain
+  // the method_idx.
+  HandleInvoke(invoke);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
+  codegen_->GenerateInvokeUnresolvedRuntimeCall(invoke);
+}
+
 void LocationsBuilderARMVIXL::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
   // Explicit clinit checks triggered by static invokes must have been pruned by
   // art::PrepareForRegisterAllocation.
   DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
 
-  // TODO(VIXL): TryDispatch
+  IntrinsicLocationsBuilderARMVIXL intrinsic(codegen_);
+  if (intrinsic.TryDispatch(invoke)) {
+    if (invoke->GetLocations()->CanCall() && invoke->HasPcRelativeDexCache()) {
+      invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::Any());
+    }
+    return;
+  }
 
   HandleInvoke(invoke);
+
+  // TODO(VIXL): invoke->HasPcRelativeDexCache()
+}
+
+static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorARMVIXL* codegen) {
+  if (invoke->GetLocations()->Intrinsified()) {
+    IntrinsicCodeGeneratorARMVIXL intrinsic(codegen);
+    intrinsic.Dispatch(invoke);
+    return true;
+  }
+  return false;
 }
 
 void InstructionCodeGeneratorARMVIXL::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
@@ -1305,7 +1592,9 @@
   // art::PrepareForRegisterAllocation.
   DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
 
-  // TODO(VIXL): TryGenerateIntrinsicCode
+  if (TryGenerateIntrinsicCode(invoke, codegen_)) {
+    return;
+  }
 
   LocationSummary* locations = invoke->GetLocations();
   DCHECK(locations->HasTemps());
@@ -1321,13 +1610,18 @@
 }
 
 void LocationsBuilderARMVIXL::VisitInvokeVirtual(HInvokeVirtual* invoke) {
-  // TODO(VIXL): TryDispatch
+  IntrinsicLocationsBuilderARMVIXL intrinsic(codegen_);
+  if (intrinsic.TryDispatch(invoke)) {
+    return;
+  }
 
   HandleInvoke(invoke);
 }
 
 void InstructionCodeGeneratorARMVIXL::VisitInvokeVirtual(HInvokeVirtual* invoke) {
-  // TODO(VIXL): TryGenerateIntrinsicCode
+  if (TryGenerateIntrinsicCode(invoke, codegen_)) {
+    return;
+  }
 
   codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0));
   DCHECK(!codegen_->IsLeafMethod());
@@ -1336,6 +1630,76 @@
   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
 }
 
+void LocationsBuilderARMVIXL::VisitInvokeInterface(HInvokeInterface* invoke) {
+  HandleInvoke(invoke);
+  // Add the hidden argument.
+  invoke->GetLocations()->AddTemp(LocationFrom(r12));
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitInvokeInterface(HInvokeInterface* invoke) {
+  // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
+  LocationSummary* locations = invoke->GetLocations();
+  vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
+  vixl32::Register hidden_reg = RegisterFrom(locations->GetTemp(1));
+  Location receiver = locations->InAt(0);
+  uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
+
+  DCHECK(!receiver.IsStackSlot());
+
+  // /* HeapReference<Class> */ temp = receiver->klass_
+  GetAssembler()->LoadFromOffset(kLoadWord, temp, RegisterFrom(receiver), class_offset);
+
+  codegen_->MaybeRecordImplicitNullCheck(invoke);
+  // Instead of simply (possibly) unpoisoning `temp` here, we should
+  // emit a read barrier for the previous class reference load.
+  // However this is not required in practice, as this is an
+  // intermediate/temporary reference and because the current
+  // concurrent copying collector keeps the from-space memory
+  // intact/accessible until the end of the marking phase (the
+  // concurrent copying collector may not in the future).
+  GetAssembler()->MaybeUnpoisonHeapReference(temp);
+  GetAssembler()->LoadFromOffset(kLoadWord,
+                                 temp,
+                                 temp,
+                                 mirror::Class::ImtPtrOffset(kArmPointerSize).Uint32Value());
+  uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
+      invoke->GetImtIndex(), kArmPointerSize));
+  // temp = temp->GetImtEntryAt(method_offset);
+  GetAssembler()->LoadFromOffset(kLoadWord, temp, temp, method_offset);
+  uint32_t entry_point =
+      ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmPointerSize).Int32Value();
+  // LR = temp->GetEntryPoint();
+  GetAssembler()->LoadFromOffset(kLoadWord, lr, temp, entry_point);
+
+  // Set the hidden (in r12) argument. It is done here, right before a BLX to prevent other
+  // instruction from clobbering it as they might use r12 as a scratch register.
+  DCHECK(hidden_reg.Is(r12));
+
+  {
+    // The VIXL macro assembler may clobber any of the scratch registers that are available to it,
+    // so it checks if the application is using them (by passing them to the macro assembler
+    // methods). The following application of UseScratchRegisterScope corrects VIXL's notion of
+    // what is available, and is the opposite of the standard usage: Instead of requesting a
+    // temporary location, it imposes an external constraint (i.e. a specific register is reserved
+    // for the hidden argument). Note that this works even if VIXL needs a scratch register itself
+    // (to materialize the constant), since the destination register becomes available for such use
+    // internally for the duration of the macro instruction.
+    UseScratchRegisterScope temps(GetVIXLAssembler());
+    temps.Exclude(hidden_reg);
+    __ Mov(hidden_reg, invoke->GetDexMethodIndex());
+  }
+
+  {
+    AssemblerAccurateScope aas(GetVIXLAssembler(),
+                               kArmInstrMaxSizeInBytes,
+                               CodeBufferCheckScope::kMaximumSize);
+    // LR();
+    __ blx(lr);
+    DCHECK(!codegen_->IsLeafMethod());
+    codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
+  }
+}
+
 void LocationsBuilderARMVIXL::VisitNeg(HNeg* neg) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(neg, LocationSummary::kNoCall);
@@ -1387,6 +1751,8 @@
 
     case Primitive::kPrimFloat:
     case Primitive::kPrimDouble:
+      // TODO(VIXL): Consider introducing an InputVRegister()
+      // helper function (equivalent to InputRegister()).
       __ Vneg(OutputVRegister(neg), InputVRegisterAt(neg, 0));
       break;
 
@@ -1685,7 +2051,7 @@
         case Primitive::kPrimFloat: {
           // Processing a Dex `float-to-int' instruction.
           vixl32::SRegister temp = LowSRegisterFrom(locations->GetTemp(0));
-          __ Vcvt(I32, F32, temp, InputSRegisterAt(conversion, 0));
+          __ Vcvt(S32, F32, temp, InputSRegisterAt(conversion, 0));
           __ Vmov(OutputRegister(conversion), temp);
           break;
         }
@@ -1693,7 +2059,7 @@
         case Primitive::kPrimDouble: {
           // Processing a Dex `double-to-int' instruction.
           vixl32::SRegister temp_s = LowSRegisterFrom(locations->GetTemp(0));
-          __ Vcvt(I32, F64, temp_s, FromLowSToD(LowSRegisterFrom(in)));
+          __ Vcvt(S32, F64, temp_s, DRegisterFrom(in));
           __ Vmov(OutputRegister(conversion), temp_s);
           break;
         }
@@ -1769,7 +2135,7 @@
         case Primitive::kPrimChar: {
           // Processing a Dex `int-to-float' instruction.
           __ Vmov(OutputSRegister(conversion), InputRegisterAt(conversion, 0));
-          __ Vcvt(F32, I32, OutputSRegister(conversion), OutputSRegister(conversion));
+          __ Vcvt(F32, S32, OutputSRegister(conversion), OutputSRegister(conversion));
           break;
         }
 
@@ -1781,7 +2147,7 @@
 
         case Primitive::kPrimDouble:
           // Processing a Dex `double-to-float' instruction.
-          __ Vcvt(F32, F64, OutputSRegister(conversion), FromLowSToD(LowSRegisterFrom(in)));
+          __ Vcvt(F32, F64, OutputSRegister(conversion), DRegisterFrom(in));
           break;
 
         default:
@@ -1800,7 +2166,7 @@
         case Primitive::kPrimChar: {
           // Processing a Dex `int-to-double' instruction.
           __ Vmov(LowSRegisterFrom(out), InputRegisterAt(conversion, 0));
-          __ Vcvt(F64, I32, FromLowSToD(LowSRegisterFrom(out)), LowSRegisterFrom(out));
+          __ Vcvt(F64, S32, DRegisterFrom(out), LowSRegisterFrom(out));
           break;
         }
 
@@ -1808,19 +2174,15 @@
           // Processing a Dex `long-to-double' instruction.
           vixl32::Register low = LowRegisterFrom(in);
           vixl32::Register high = HighRegisterFrom(in);
-
           vixl32::SRegister out_s = LowSRegisterFrom(out);
-          vixl32::DRegister out_d = FromLowSToD(out_s);
-
+          vixl32::DRegister out_d = DRegisterFrom(out);
           vixl32::SRegister temp_s = LowSRegisterFrom(locations->GetTemp(0));
-          vixl32::DRegister temp_d = FromLowSToD(temp_s);
-
-          vixl32::SRegister constant_s = LowSRegisterFrom(locations->GetTemp(1));
-          vixl32::DRegister constant_d = FromLowSToD(constant_s);
+          vixl32::DRegister temp_d = DRegisterFrom(locations->GetTemp(0));
+          vixl32::DRegister constant_d = DRegisterFrom(locations->GetTemp(1));
 
           // temp_d = int-to-double(high)
           __ Vmov(temp_s, high);
-          __ Vcvt(F64, I32, temp_d, temp_s);
+          __ Vcvt(F64, S32, temp_d, temp_s);
           // constant_d = k2Pow32EncodingForDouble
           __ Vmov(constant_d, bit_cast<double, int64_t>(k2Pow32EncodingForDouble));
           // out_d = unsigned-to-double(low)
@@ -1833,7 +2195,7 @@
 
         case Primitive::kPrimFloat:
           // Processing a Dex `float-to-double' instruction.
-          __ Vcvt(F64, F32, FromLowSToD(LowSRegisterFrom(out)), InputSRegisterAt(conversion, 0));
+          __ Vcvt(F64, F32, DRegisterFrom(out), InputSRegisterAt(conversion, 0));
           break;
 
         default:
@@ -1859,10 +2221,9 @@
       break;
     }
 
-    // TODO(VIXL): https://android-review.googlesource.com/#/c/254144/
     case Primitive::kPrimLong: {
       locations->SetInAt(0, Location::RequiresRegister());
-      locations->SetInAt(1, Location::RequiresRegister());
+      locations->SetInAt(1, ArmEncodableConstantOrRegister(add->InputAt(1), ADD));
       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
       break;
     }
@@ -1892,11 +2253,15 @@
       }
       break;
 
-    // TODO(VIXL): https://android-review.googlesource.com/#/c/254144/
     case Primitive::kPrimLong: {
-      DCHECK(second.IsRegisterPair());
-      __ Adds(LowRegisterFrom(out), LowRegisterFrom(first), LowRegisterFrom(second));
-      __ Adc(HighRegisterFrom(out), HighRegisterFrom(first), HighRegisterFrom(second));
+      if (second.IsConstant()) {
+        uint64_t value = static_cast<uint64_t>(Int64FromConstant(second.GetConstant()));
+        GenerateAddLongConst(out, first, value);
+      } else {
+        DCHECK(second.IsRegisterPair());
+        __ Adds(LowRegisterFrom(out), LowRegisterFrom(first), LowRegisterFrom(second));
+        __ Adc(HighRegisterFrom(out), HighRegisterFrom(first), HighRegisterFrom(second));
+      }
       break;
     }
 
@@ -1921,10 +2286,9 @@
       break;
     }
 
-    // TODO(VIXL): https://android-review.googlesource.com/#/c/254144/
     case Primitive::kPrimLong: {
       locations->SetInAt(0, Location::RequiresRegister());
-      locations->SetInAt(1, Location::RequiresRegister());
+      locations->SetInAt(1, ArmEncodableConstantOrRegister(sub->InputAt(1), SUB));
       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
       break;
     }
@@ -1951,11 +2315,15 @@
       break;
     }
 
-    // TODO(VIXL): https://android-review.googlesource.com/#/c/254144/
     case Primitive::kPrimLong: {
-      DCHECK(second.IsRegisterPair());
-      __ Subs(LowRegisterFrom(out), LowRegisterFrom(first), LowRegisterFrom(second));
-      __ Sbc(HighRegisterFrom(out), HighRegisterFrom(first), HighRegisterFrom(second));
+      if (second.IsConstant()) {
+        uint64_t value = static_cast<uint64_t>(Int64FromConstant(second.GetConstant()));
+        GenerateAddLongConst(out, first, -value);
+      } else {
+        DCHECK(second.IsRegisterPair());
+        __ Subs(LowRegisterFrom(out), LowRegisterFrom(first), LowRegisterFrom(second));
+        __ Sbc(HighRegisterFrom(out), HighRegisterFrom(first), HighRegisterFrom(second));
+      }
       break;
     }
 
@@ -2116,13 +2484,14 @@
   vixl32::Register dividend = InputRegisterAt(instruction, 0);
   vixl32::Register temp1 = RegisterFrom(locations->GetTemp(0));
   vixl32::Register temp2 = RegisterFrom(locations->GetTemp(1));
-  int64_t imm = second.GetConstant()->AsIntConstant()->GetValue();
+  int32_t imm = Int32ConstantFrom(second);
 
   int64_t magic;
   int shift;
   CalculateMagicAndShiftForDivRem(imm, false /* is_long */, &magic, &shift);
 
-  __ Mov(temp1, magic);
+  // TODO(VIXL): Change the static cast to Operand::From() after VIXL is fixed.
+  __ Mov(temp1, static_cast<int32_t>(magic));
   __ Smull(temp2, temp1, dividend, temp1);
 
   if (imm > 0 && magic < 0) {
@@ -2201,12 +2570,22 @@
         locations->SetInAt(1, Location::RequiresRegister());
         locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
       } else {
-        TODO_VIXL32(FATAL);
+        InvokeRuntimeCallingConventionARMVIXL calling_convention;
+        locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
+        locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
+        // Note: divrem will compute both the quotient and the remainder as the pair R0 and R1, but
+        //       we only need the former.
+        locations->SetOut(LocationFrom(r0));
       }
       break;
     }
     case Primitive::kPrimLong: {
-      TODO_VIXL32(FATAL);
+      InvokeRuntimeCallingConventionARMVIXL calling_convention;
+      locations->SetInAt(0, LocationFrom(
+          calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1)));
+      locations->SetInAt(1, LocationFrom(
+          calling_convention.GetRegisterAt(2), calling_convention.GetRegisterAt(3)));
+      locations->SetOut(LocationFrom(r0, r1));
       break;
     }
     case Primitive::kPrimFloat:
@@ -2223,6 +2602,7 @@
 }
 
 void InstructionCodeGeneratorARMVIXL::VisitDiv(HDiv* div) {
+  Location lhs = div->GetLocations()->InAt(0);
   Location rhs = div->GetLocations()->InAt(1);
 
   switch (div->GetResultType()) {
@@ -2232,13 +2612,28 @@
       } else if (codegen_->GetInstructionSetFeatures().HasDivideInstruction()) {
         __ Sdiv(OutputRegister(div), InputRegisterAt(div, 0), InputRegisterAt(div, 1));
       } else {
-        TODO_VIXL32(FATAL);
+        InvokeRuntimeCallingConventionARMVIXL calling_convention;
+        DCHECK(calling_convention.GetRegisterAt(0).Is(RegisterFrom(lhs)));
+        DCHECK(calling_convention.GetRegisterAt(1).Is(RegisterFrom(rhs)));
+        DCHECK(r0.Is(OutputRegister(div)));
+
+        codegen_->InvokeRuntime(kQuickIdivmod, div, div->GetDexPc());
+        CheckEntrypointTypes<kQuickIdivmod, int32_t, int32_t, int32_t>();
       }
       break;
     }
 
     case Primitive::kPrimLong: {
-      TODO_VIXL32(FATAL);
+      InvokeRuntimeCallingConventionARMVIXL calling_convention;
+      DCHECK(calling_convention.GetRegisterAt(0).Is(LowRegisterFrom(lhs)));
+      DCHECK(calling_convention.GetRegisterAt(1).Is(HighRegisterFrom(lhs)));
+      DCHECK(calling_convention.GetRegisterAt(2).Is(LowRegisterFrom(rhs)));
+      DCHECK(calling_convention.GetRegisterAt(3).Is(HighRegisterFrom(rhs)));
+      DCHECK(LowRegisterFrom(div->GetLocations()->Out()).Is(r0));
+      DCHECK(HighRegisterFrom(div->GetLocations()->Out()).Is(r1));
+
+      codegen_->InvokeRuntime(kQuickLdiv, div, div->GetDexPc());
+      CheckEntrypointTypes<kQuickLdiv, int64_t, int64_t, int64_t>();
       break;
     }
 
@@ -2252,16 +2647,143 @@
   }
 }
 
-void LocationsBuilderARMVIXL::VisitDivZeroCheck(HDivZeroCheck* instruction) {
-  // TODO(VIXL): https://android-review.googlesource.com/#/c/275337/
-  LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock()
-      ? LocationSummary::kCallOnSlowPath
-      : LocationSummary::kNoCall;
-  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
-  locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
-  if (instruction->HasUses()) {
-    locations->SetOut(Location::SameAsFirstInput());
+void LocationsBuilderARMVIXL::VisitRem(HRem* rem) {
+  Primitive::Type type = rem->GetResultType();
+
+  // Most remainders are implemented in the runtime.
+  LocationSummary::CallKind call_kind = LocationSummary::kCallOnMainOnly;
+  if (rem->GetResultType() == Primitive::kPrimInt && rem->InputAt(1)->IsConstant()) {
+    // sdiv will be replaced by other instruction sequence.
+    call_kind = LocationSummary::kNoCall;
+  } else if ((rem->GetResultType() == Primitive::kPrimInt)
+             && codegen_->GetInstructionSetFeatures().HasDivideInstruction()) {
+    // Have hardware divide instruction for int, do it with three instructions.
+    call_kind = LocationSummary::kNoCall;
   }
+
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(rem, call_kind);
+
+  switch (type) {
+    case Primitive::kPrimInt: {
+      if (rem->InputAt(1)->IsConstant()) {
+        locations->SetInAt(0, Location::RequiresRegister());
+        locations->SetInAt(1, Location::ConstantLocation(rem->InputAt(1)->AsConstant()));
+        locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+        int32_t value = rem->InputAt(1)->AsIntConstant()->GetValue();
+        if (value == 1 || value == 0 || value == -1) {
+          // No temp register required.
+        } else {
+          locations->AddTemp(Location::RequiresRegister());
+          if (!IsPowerOfTwo(AbsOrMin(value))) {
+            locations->AddTemp(Location::RequiresRegister());
+          }
+        }
+      } else if (codegen_->GetInstructionSetFeatures().HasDivideInstruction()) {
+        locations->SetInAt(0, Location::RequiresRegister());
+        locations->SetInAt(1, Location::RequiresRegister());
+        locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+        locations->AddTemp(Location::RequiresRegister());
+      } else {
+        InvokeRuntimeCallingConventionARMVIXL calling_convention;
+        locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
+        locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
+        // Note: divrem will compute both the quotient and the remainder as the pair R0 and R1, but
+        //       we only need the latter.
+        locations->SetOut(LocationFrom(r1));
+      }
+      break;
+    }
+    case Primitive::kPrimLong: {
+      InvokeRuntimeCallingConventionARMVIXL calling_convention;
+      locations->SetInAt(0, LocationFrom(
+          calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1)));
+      locations->SetInAt(1, LocationFrom(
+          calling_convention.GetRegisterAt(2), calling_convention.GetRegisterAt(3)));
+      // The runtime helper puts the output in R2,R3.
+      locations->SetOut(LocationFrom(r2, r3));
+      break;
+    }
+    case Primitive::kPrimFloat: {
+      InvokeRuntimeCallingConventionARMVIXL calling_convention;
+      locations->SetInAt(0, LocationFrom(calling_convention.GetFpuRegisterAt(0)));
+      locations->SetInAt(1, LocationFrom(calling_convention.GetFpuRegisterAt(1)));
+      locations->SetOut(LocationFrom(s0));
+      break;
+    }
+
+    case Primitive::kPrimDouble: {
+      InvokeRuntimeCallingConventionARMVIXL calling_convention;
+      locations->SetInAt(0, LocationFrom(
+          calling_convention.GetFpuRegisterAt(0), calling_convention.GetFpuRegisterAt(1)));
+      locations->SetInAt(1, LocationFrom(
+          calling_convention.GetFpuRegisterAt(2), calling_convention.GetFpuRegisterAt(3)));
+      locations->SetOut(LocationFrom(s0, s1));
+      break;
+    }
+
+    default:
+      LOG(FATAL) << "Unexpected rem type " << type;
+  }
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitRem(HRem* rem) {
+  LocationSummary* locations = rem->GetLocations();
+  Location second = locations->InAt(1);
+
+  Primitive::Type type = rem->GetResultType();
+  switch (type) {
+    case Primitive::kPrimInt: {
+        vixl32::Register reg1 = InputRegisterAt(rem, 0);
+        vixl32::Register out_reg = OutputRegister(rem);
+        if (second.IsConstant()) {
+          GenerateDivRemConstantIntegral(rem);
+        } else if (codegen_->GetInstructionSetFeatures().HasDivideInstruction()) {
+        vixl32::Register reg2 = RegisterFrom(second);
+        vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
+
+        // temp = reg1 / reg2  (integer division)
+        // dest = reg1 - temp * reg2
+        __ Sdiv(temp, reg1, reg2);
+        __ Mls(out_reg, temp, reg2, reg1);
+      } else {
+        InvokeRuntimeCallingConventionARMVIXL calling_convention;
+        DCHECK(reg1.Is(calling_convention.GetRegisterAt(0)));
+        DCHECK(RegisterFrom(second).Is(calling_convention.GetRegisterAt(1)));
+        DCHECK(out_reg.Is(r1));
+
+        codegen_->InvokeRuntime(kQuickIdivmod, rem, rem->GetDexPc());
+        CheckEntrypointTypes<kQuickIdivmod, int32_t, int32_t, int32_t>();
+      }
+      break;
+    }
+
+    case Primitive::kPrimLong: {
+      codegen_->InvokeRuntime(kQuickLmod, rem, rem->GetDexPc());
+        CheckEntrypointTypes<kQuickLmod, int64_t, int64_t, int64_t>();
+      break;
+    }
+
+    case Primitive::kPrimFloat: {
+      codegen_->InvokeRuntime(kQuickFmodf, rem, rem->GetDexPc());
+      CheckEntrypointTypes<kQuickFmodf, float, float, float>();
+      break;
+    }
+
+    case Primitive::kPrimDouble: {
+      codegen_->InvokeRuntime(kQuickFmod, rem, rem->GetDexPc());
+      CheckEntrypointTypes<kQuickFmod, double, double, double>();
+      break;
+    }
+
+    default:
+      LOG(FATAL) << "Unexpected rem type " << type;
+  }
+}
+
+
+void LocationsBuilderARMVIXL::VisitDivZeroCheck(HDivZeroCheck* instruction) {
+  LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
+  locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
 }
 
 void InstructionCodeGeneratorARMVIXL::VisitDivZeroCheck(HDivZeroCheck* instruction) {
@@ -2279,7 +2801,7 @@
     case Primitive::kPrimShort:
     case Primitive::kPrimInt: {
       if (value.IsRegister()) {
-        __ Cbz(InputRegisterAt(instruction, 0), slow_path->GetEntryLabel());
+        __ CompareAndBranchIfZero(InputRegisterAt(instruction, 0), slow_path->GetEntryLabel());
       } else {
         DCHECK(value.IsConstant()) << value;
         if (value.GetConstant()->AsIntConstant()->GetValue() == 0) {
@@ -2355,9 +2877,9 @@
     }
     // Rotate, or mov to out for zero or word size rotations.
     if (rot != 0u) {
-      __ Lsr(out_reg_hi, in_reg_hi, rot);
+      __ Lsr(out_reg_hi, in_reg_hi, Operand::From(rot));
       __ Orr(out_reg_hi, out_reg_hi, Operand(in_reg_lo, ShiftType::LSL, kArmBitsPerWord - rot));
-      __ Lsr(out_reg_lo, in_reg_lo, rot);
+      __ Lsr(out_reg_lo, in_reg_lo, Operand::From(rot));
       __ Orr(out_reg_lo, out_reg_lo, Operand(in_reg_hi, ShiftType::LSL, kArmBitsPerWord - rot));
     } else {
       __ Mov(out_reg_lo, in_reg_lo);
@@ -2372,7 +2894,7 @@
     __ And(shift_right, RegisterFrom(rhs), 0x1F);
     __ Lsrs(shift_left, RegisterFrom(rhs), 6);
     // TODO(VIXL): Check that flags are kept after "vixl32::LeaveFlags" enabled.
-    __ Rsb(shift_left, shift_right, kArmBitsPerWord);
+    __ Rsb(shift_left, shift_right, Operand::From(kArmBitsPerWord));
     __ B(cc, &shift_by_32_plus_shift_right);
 
     // out_reg_hi = (reg_hi << shift_left) | (reg_lo >> shift_right).
@@ -2538,11 +3060,11 @@
           // Shift the high part
           __ Lsl(o_h, high, o_l);
           // Shift the low part and `or` what overflew on the high part
-          __ Rsb(temp, o_l, kArmBitsPerWord);
+          __ Rsb(temp, o_l, Operand::From(kArmBitsPerWord));
           __ Lsr(temp, low, temp);
           __ Orr(o_h, o_h, temp);
           // If the shift is > 32 bits, override the high part
-          __ Subs(temp, o_l, kArmBitsPerWord);
+          __ Subs(temp, o_l, Operand::From(kArmBitsPerWord));
           {
             AssemblerAccurateScope guard(GetVIXLAssembler(),
                                          3 * kArmInstrMaxSizeInBytes,
@@ -2557,11 +3079,11 @@
           // Shift the low part
           __ Lsr(o_l, low, o_h);
           // Shift the high part and `or` what underflew on the low part
-          __ Rsb(temp, o_h, kArmBitsPerWord);
+          __ Rsb(temp, o_h, Operand::From(kArmBitsPerWord));
           __ Lsl(temp, high, temp);
           __ Orr(o_l, o_l, temp);
           // If the shift is > 32 bits, override the low part
-          __ Subs(temp, o_h, kArmBitsPerWord);
+          __ Subs(temp, o_h, Operand::From(kArmBitsPerWord));
           {
             AssemblerAccurateScope guard(GetVIXLAssembler(),
                                          3 * kArmInstrMaxSizeInBytes,
@@ -2575,10 +3097,10 @@
           __ And(o_h, second_reg, kMaxLongShiftDistance);
           // same as Shr except we use `Lsr`s and not `Asr`s
           __ Lsr(o_l, low, o_h);
-          __ Rsb(temp, o_h, kArmBitsPerWord);
+          __ Rsb(temp, o_h, Operand::From(kArmBitsPerWord));
           __ Lsl(temp, high, temp);
           __ Orr(o_l, o_l, temp);
-          __ Subs(temp, o_h, kArmBitsPerWord);
+          __ Subs(temp, o_h, Operand::From(kArmBitsPerWord));
           {
             AssemblerAccurateScope guard(GetVIXLAssembler(),
                                          3 * kArmInstrMaxSizeInBytes,
@@ -2721,7 +3243,7 @@
 
 void InstructionCodeGeneratorARMVIXL::VisitNewArray(HNewArray* instruction) {
   InvokeRuntimeCallingConventionARMVIXL calling_convention;
-  __ Mov(calling_convention.GetRegisterAt(0), instruction->GetTypeIndex());
+  __ Mov(calling_convention.GetRegisterAt(0), instruction->GetTypeIndex().index_);
   // Note: if heap poisoning is enabled, the entry point takes cares
   // of poisoning the reference.
   codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc());
@@ -2782,6 +3304,17 @@
   }
 }
 
+void LocationsBuilderARMVIXL::VisitBooleanNot(HBooleanNot* bool_not) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(bool_not, LocationSummary::kNoCall);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitBooleanNot(HBooleanNot* bool_not) {
+  __ Eor(OutputRegister(bool_not), InputRegister(bool_not), 1);
+}
+
 void LocationsBuilderARMVIXL::VisitCompare(HCompare* compare) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(compare, LocationSummary::kNoCall);
@@ -2911,7 +3444,7 @@
     __ Add(temp, addr, offset);
     addr = temp;
   }
-  __ Ldrexd(out_lo, out_hi, addr);
+  __ Ldrexd(out_lo, out_hi, MemOperand(addr));
 }
 
 void InstructionCodeGeneratorARMVIXL::GenerateWideAtomicStore(vixl32::Register addr,
@@ -2931,10 +3464,10 @@
   __ Bind(&fail);
   // We need a load followed by store. (The address used in a STREX instruction must
   // be the same as the address in the most recently executed LDREX instruction.)
-  __ Ldrexd(temp1, temp2, addr);
+  __ Ldrexd(temp1, temp2, MemOperand(addr));
   codegen_->MaybeRecordImplicitNullCheck(instruction);
-  __ Strexd(temp1, value_lo, value_hi, addr);
-  __ Cbnz(temp1, &fail);
+  __ Strexd(temp1, value_lo, value_hi, MemOperand(addr));
+  __ CompareAndBranchIfNonZero(temp1, &fail);
 }
 
 void LocationsBuilderARMVIXL::HandleFieldSet(
@@ -3053,7 +3586,7 @@
     }
 
     case Primitive::kPrimDouble: {
-      vixl32::DRegister value_reg = FromLowSToD(LowSRegisterFrom(value));
+      vixl32::DRegister value_reg = DRegisterFrom(value);
       if (is_volatile && !atomic_ldrd_strd) {
         vixl32::Register value_reg_lo = RegisterFrom(locations->GetTemp(0));
         vixl32::Register value_reg_hi = RegisterFrom(locations->GetTemp(1));
@@ -3281,7 +3814,7 @@
       break;
 
     case Primitive::kPrimDouble: {
-      vixl32::DRegister out_dreg = FromLowSToD(LowSRegisterFrom(out));
+      vixl32::DRegister out_dreg = DRegisterFrom(out);
       if (is_volatile && !atomic_ldrd_strd) {
         vixl32::Register lo = RegisterFrom(locations->GetTemp(0));
         vixl32::Register hi = RegisterFrom(locations->GetTemp(1));
@@ -3345,16 +3878,85 @@
   HandleFieldGet(instruction, instruction->GetFieldInfo());
 }
 
+void LocationsBuilderARMVIXL::VisitStaticFieldSet(HStaticFieldSet* instruction) {
+  HandleFieldSet(instruction, instruction->GetFieldInfo());
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitStaticFieldSet(HStaticFieldSet* instruction) {
+  HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
+}
+
+void LocationsBuilderARMVIXL::VisitUnresolvedInstanceFieldGet(
+    HUnresolvedInstanceFieldGet* instruction) {
+  FieldAccessCallingConventionARMVIXL calling_convention;
+  codegen_->CreateUnresolvedFieldLocationSummary(
+      instruction, instruction->GetFieldType(), calling_convention);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitUnresolvedInstanceFieldGet(
+    HUnresolvedInstanceFieldGet* instruction) {
+  FieldAccessCallingConventionARMVIXL calling_convention;
+  codegen_->GenerateUnresolvedFieldAccess(instruction,
+                                          instruction->GetFieldType(),
+                                          instruction->GetFieldIndex(),
+                                          instruction->GetDexPc(),
+                                          calling_convention);
+}
+
+void LocationsBuilderARMVIXL::VisitUnresolvedInstanceFieldSet(
+    HUnresolvedInstanceFieldSet* instruction) {
+  FieldAccessCallingConventionARMVIXL calling_convention;
+  codegen_->CreateUnresolvedFieldLocationSummary(
+      instruction, instruction->GetFieldType(), calling_convention);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitUnresolvedInstanceFieldSet(
+    HUnresolvedInstanceFieldSet* instruction) {
+  FieldAccessCallingConventionARMVIXL calling_convention;
+  codegen_->GenerateUnresolvedFieldAccess(instruction,
+                                          instruction->GetFieldType(),
+                                          instruction->GetFieldIndex(),
+                                          instruction->GetDexPc(),
+                                          calling_convention);
+}
+
+void LocationsBuilderARMVIXL::VisitUnresolvedStaticFieldGet(
+    HUnresolvedStaticFieldGet* instruction) {
+  FieldAccessCallingConventionARMVIXL calling_convention;
+  codegen_->CreateUnresolvedFieldLocationSummary(
+      instruction, instruction->GetFieldType(), calling_convention);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitUnresolvedStaticFieldGet(
+    HUnresolvedStaticFieldGet* instruction) {
+  FieldAccessCallingConventionARMVIXL calling_convention;
+  codegen_->GenerateUnresolvedFieldAccess(instruction,
+                                          instruction->GetFieldType(),
+                                          instruction->GetFieldIndex(),
+                                          instruction->GetDexPc(),
+                                          calling_convention);
+}
+
+void LocationsBuilderARMVIXL::VisitUnresolvedStaticFieldSet(
+    HUnresolvedStaticFieldSet* instruction) {
+  FieldAccessCallingConventionARMVIXL calling_convention;
+  codegen_->CreateUnresolvedFieldLocationSummary(
+      instruction, instruction->GetFieldType(), calling_convention);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitUnresolvedStaticFieldSet(
+    HUnresolvedStaticFieldSet* instruction) {
+  FieldAccessCallingConventionARMVIXL calling_convention;
+  codegen_->GenerateUnresolvedFieldAccess(instruction,
+                                          instruction->GetFieldType(),
+                                          instruction->GetFieldIndex(),
+                                          instruction->GetDexPc(),
+                                          calling_convention);
+}
+
 void LocationsBuilderARMVIXL::VisitNullCheck(HNullCheck* instruction) {
-  // TODO(VIXL): https://android-review.googlesource.com/#/c/275337/
-  LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock()
-      ? LocationSummary::kCallOnSlowPath
-      : LocationSummary::kNoCall;
-  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
+  LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
   locations->SetInAt(0, Location::RequiresRegister());
-  if (instruction->HasUses()) {
-    locations->SetOut(Location::SameAsFirstInput());
-  }
 }
 
 void CodeGeneratorARMVIXL::GenerateImplicitNullCheck(HNullCheck* instruction) {
@@ -3374,13 +3976,625 @@
   NullCheckSlowPathARMVIXL* slow_path =
       new (GetGraph()->GetArena()) NullCheckSlowPathARMVIXL(instruction);
   AddSlowPath(slow_path);
-  __ Cbz(InputRegisterAt(instruction, 0), slow_path->GetEntryLabel());
+  __ CompareAndBranchIfZero(InputRegisterAt(instruction, 0), slow_path->GetEntryLabel());
 }
 
 void InstructionCodeGeneratorARMVIXL::VisitNullCheck(HNullCheck* instruction) {
   codegen_->GenerateNullCheck(instruction);
 }
 
+static LoadOperandType GetLoadOperandType(Primitive::Type type) {
+  switch (type) {
+    case Primitive::kPrimNot:
+      return kLoadWord;
+    case Primitive::kPrimBoolean:
+      return kLoadUnsignedByte;
+    case Primitive::kPrimByte:
+      return kLoadSignedByte;
+    case Primitive::kPrimChar:
+      return kLoadUnsignedHalfword;
+    case Primitive::kPrimShort:
+      return kLoadSignedHalfword;
+    case Primitive::kPrimInt:
+      return kLoadWord;
+    case Primitive::kPrimLong:
+      return kLoadWordPair;
+    case Primitive::kPrimFloat:
+      return kLoadSWord;
+    case Primitive::kPrimDouble:
+      return kLoadDWord;
+    default:
+      LOG(FATAL) << "Unreachable type " << type;
+      UNREACHABLE();
+  }
+}
+
+static StoreOperandType GetStoreOperandType(Primitive::Type type) {
+  switch (type) {
+    case Primitive::kPrimNot:
+      return kStoreWord;
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+      return kStoreByte;
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      return kStoreHalfword;
+    case Primitive::kPrimInt:
+      return kStoreWord;
+    case Primitive::kPrimLong:
+      return kStoreWordPair;
+    case Primitive::kPrimFloat:
+      return kStoreSWord;
+    case Primitive::kPrimDouble:
+      return kStoreDWord;
+    default:
+      LOG(FATAL) << "Unreachable type " << type;
+      UNREACHABLE();
+  }
+}
+
+void CodeGeneratorARMVIXL::LoadFromShiftedRegOffset(Primitive::Type type,
+                                                    Location out_loc,
+                                                    vixl32::Register base,
+                                                    vixl32::Register reg_index,
+                                                    vixl32::Condition cond) {
+  uint32_t shift_count = Primitive::ComponentSizeShift(type);
+  MemOperand mem_address(base, reg_index, vixl32::LSL, shift_count);
+
+  switch (type) {
+    case Primitive::kPrimByte:
+      __ Ldrsb(cond, RegisterFrom(out_loc), mem_address);
+      break;
+    case Primitive::kPrimBoolean:
+      __ Ldrb(cond, RegisterFrom(out_loc), mem_address);
+      break;
+    case Primitive::kPrimShort:
+      __ Ldrsh(cond, RegisterFrom(out_loc), mem_address);
+      break;
+    case Primitive::kPrimChar:
+      __ Ldrh(cond, RegisterFrom(out_loc), mem_address);
+      break;
+    case Primitive::kPrimNot:
+    case Primitive::kPrimInt:
+      __ Ldr(cond, RegisterFrom(out_loc), mem_address);
+      break;
+    // T32 doesn't support LoadFromShiftedRegOffset mem address mode for these types.
+    case Primitive::kPrimLong:
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+    default:
+      LOG(FATAL) << "Unreachable type " << type;
+      UNREACHABLE();
+  }
+}
+
+void CodeGeneratorARMVIXL::StoreToShiftedRegOffset(Primitive::Type type,
+                                                   Location loc,
+                                                   vixl32::Register base,
+                                                   vixl32::Register reg_index,
+                                                   vixl32::Condition cond) {
+  uint32_t shift_count = Primitive::ComponentSizeShift(type);
+  MemOperand mem_address(base, reg_index, vixl32::LSL, shift_count);
+
+  switch (type) {
+    case Primitive::kPrimByte:
+    case Primitive::kPrimBoolean:
+      __ Strb(cond, RegisterFrom(loc), mem_address);
+      break;
+    case Primitive::kPrimShort:
+    case Primitive::kPrimChar:
+      __ Strh(cond, RegisterFrom(loc), mem_address);
+      break;
+    case Primitive::kPrimNot:
+    case Primitive::kPrimInt:
+      __ Str(cond, RegisterFrom(loc), mem_address);
+      break;
+    // T32 doesn't support StoreToShiftedRegOffset mem address mode for these types.
+    case Primitive::kPrimLong:
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+    default:
+      LOG(FATAL) << "Unreachable type " << type;
+      UNREACHABLE();
+  }
+}
+
+void LocationsBuilderARMVIXL::VisitArrayGet(HArrayGet* instruction) {
+  bool object_array_get_with_read_barrier =
+      kEmitCompilerReadBarrier && (instruction->GetType() == Primitive::kPrimNot);
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(instruction,
+                                                   object_array_get_with_read_barrier ?
+                                                       LocationSummary::kCallOnSlowPath :
+                                                       LocationSummary::kNoCall);
+  if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
+    TODO_VIXL32(FATAL);
+  }
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
+  if (Primitive::IsFloatingPointType(instruction->GetType())) {
+    locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+  } else {
+    // The output overlaps in the case of an object array get with
+    // read barriers enabled: we do not want the move to overwrite the
+    // array's location, as we need it to emit the read barrier.
+    locations->SetOut(
+        Location::RequiresRegister(),
+        object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
+  }
+  // We need a temporary register for the read barrier marking slow
+  // path in CodeGeneratorARM::GenerateArrayLoadWithBakerReadBarrier.
+  // Also need for String compression feature.
+  if ((object_array_get_with_read_barrier && kUseBakerReadBarrier)
+      || (mirror::kUseStringCompression && instruction->IsStringCharAt())) {
+    locations->AddTemp(Location::RequiresRegister());
+  }
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitArrayGet(HArrayGet* instruction) {
+  UseScratchRegisterScope temps(GetAssembler()->GetVIXLAssembler());
+  LocationSummary* locations = instruction->GetLocations();
+  Location obj_loc = locations->InAt(0);
+  vixl32::Register obj = InputRegisterAt(instruction, 0);
+  Location index = locations->InAt(1);
+  Location out_loc = locations->Out();
+  uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction);
+  Primitive::Type type = instruction->GetType();
+  const bool maybe_compressed_char_at = mirror::kUseStringCompression &&
+                                        instruction->IsStringCharAt();
+  HInstruction* array_instr = instruction->GetArray();
+  bool has_intermediate_address = array_instr->IsIntermediateAddress();
+  // The read barrier instrumentation does not support the HIntermediateAddress instruction yet.
+  DCHECK(!(has_intermediate_address && kEmitCompilerReadBarrier));
+
+  switch (type) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimInt: {
+      vixl32::Register length;
+      if (maybe_compressed_char_at) {
+        length = RegisterFrom(locations->GetTemp(0));
+        uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
+        GetAssembler()->LoadFromOffset(kLoadWord, length, obj, count_offset);
+        codegen_->MaybeRecordImplicitNullCheck(instruction);
+      }
+      if (index.IsConstant()) {
+        int32_t const_index = index.GetConstant()->AsIntConstant()->GetValue();
+        if (maybe_compressed_char_at) {
+          vixl32::Label uncompressed_load, done;
+          __ Lsrs(length, length, 1u);  // LSRS has a 16-bit encoding, TST (immediate) does not.
+          static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
+                        "Expecting 0=compressed, 1=uncompressed");
+          __ B(cs, &uncompressed_load);
+          GetAssembler()->LoadFromOffset(kLoadUnsignedByte,
+                                         RegisterFrom(out_loc),
+                                         obj,
+                                         data_offset + const_index);
+          __ B(&done);
+          __ Bind(&uncompressed_load);
+          GetAssembler()->LoadFromOffset(GetLoadOperandType(Primitive::kPrimChar),
+                                         RegisterFrom(out_loc),
+                                         obj,
+                                         data_offset + (const_index << 1));
+          __ Bind(&done);
+        } else {
+          uint32_t full_offset = data_offset + (const_index << Primitive::ComponentSizeShift(type));
+
+          LoadOperandType load_type = GetLoadOperandType(type);
+          GetAssembler()->LoadFromOffset(load_type, RegisterFrom(out_loc), obj, full_offset);
+        }
+      } else {
+        vixl32::Register temp = temps.Acquire();
+
+        if (has_intermediate_address) {
+          // We do not need to compute the intermediate address from the array: the
+          // input instruction has done it already. See the comment in
+          // `TryExtractArrayAccessAddress()`.
+          if (kIsDebugBuild) {
+            HIntermediateAddress* tmp = array_instr->AsIntermediateAddress();
+            DCHECK_EQ(tmp->GetOffset()->AsIntConstant()->GetValueAsUint64(), data_offset);
+          }
+          temp = obj;
+        } else {
+          __ Add(temp, obj, data_offset);
+        }
+        if (maybe_compressed_char_at) {
+          vixl32::Label uncompressed_load, done;
+          __ Lsrs(length, length, 1u);  // LSRS has a 16-bit encoding, TST (immediate) does not.
+          static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
+                        "Expecting 0=compressed, 1=uncompressed");
+          __ B(cs, &uncompressed_load);
+          __ Ldrb(RegisterFrom(out_loc), MemOperand(temp, RegisterFrom(index), vixl32::LSL, 0));
+          __ B(&done);
+          __ Bind(&uncompressed_load);
+          __ Ldrh(RegisterFrom(out_loc), MemOperand(temp, RegisterFrom(index), vixl32::LSL, 1));
+          __ Bind(&done);
+        } else {
+          codegen_->LoadFromShiftedRegOffset(type, out_loc, temp, RegisterFrom(index));
+        }
+        temps.Release(temp);
+      }
+      break;
+    }
+
+    case Primitive::kPrimNot: {
+      static_assert(
+          sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
+          "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+      // /* HeapReference<Object> */ out =
+      //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
+      if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+        TODO_VIXL32(FATAL);
+      } else {
+        vixl32::Register out = OutputRegister(instruction);
+        if (index.IsConstant()) {
+          size_t offset =
+              (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
+          GetAssembler()->LoadFromOffset(kLoadWord, out, obj, offset);
+          codegen_->MaybeRecordImplicitNullCheck(instruction);
+          // If read barriers are enabled, emit read barriers other than
+          // Baker's using a slow path (and also unpoison the loaded
+          // reference, if heap poisoning is enabled).
+          codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset);
+        } else {
+          vixl32::Register temp = temps.Acquire();
+
+          if (has_intermediate_address) {
+            // We do not need to compute the intermediate address from the array: the
+            // input instruction has done it already. See the comment in
+            // `TryExtractArrayAccessAddress()`.
+            if (kIsDebugBuild) {
+              HIntermediateAddress* tmp = array_instr->AsIntermediateAddress();
+              DCHECK_EQ(tmp->GetOffset()->AsIntConstant()->GetValueAsUint64(), data_offset);
+            }
+            temp = obj;
+          } else {
+            __ Add(temp, obj, data_offset);
+          }
+          codegen_->LoadFromShiftedRegOffset(type, out_loc, temp, RegisterFrom(index));
+          temps.Release(temp);
+
+          codegen_->MaybeRecordImplicitNullCheck(instruction);
+          // If read barriers are enabled, emit read barriers other than
+          // Baker's using a slow path (and also unpoison the loaded
+          // reference, if heap poisoning is enabled).
+          codegen_->MaybeGenerateReadBarrierSlow(
+              instruction, out_loc, out_loc, obj_loc, data_offset, index);
+        }
+      }
+      break;
+    }
+
+    case Primitive::kPrimLong: {
+      if (index.IsConstant()) {
+        size_t offset =
+            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
+        GetAssembler()->LoadFromOffset(kLoadWordPair, LowRegisterFrom(out_loc), obj, offset);
+      } else {
+        vixl32::Register temp = temps.Acquire();
+        __ Add(temp, obj, Operand(RegisterFrom(index), vixl32::LSL, TIMES_8));
+        GetAssembler()->LoadFromOffset(kLoadWordPair, LowRegisterFrom(out_loc), temp, data_offset);
+        temps.Release(temp);
+      }
+      break;
+    }
+
+    case Primitive::kPrimFloat: {
+      vixl32::SRegister out = SRegisterFrom(out_loc);
+      if (index.IsConstant()) {
+        size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
+        GetAssembler()->LoadSFromOffset(out, obj, offset);
+      } else {
+        vixl32::Register temp = temps.Acquire();
+        __ Add(temp, obj, Operand(RegisterFrom(index), vixl32::LSL, TIMES_4));
+        GetAssembler()->LoadSFromOffset(out, temp, data_offset);
+        temps.Release(temp);
+      }
+      break;
+    }
+
+    case Primitive::kPrimDouble: {
+      if (index.IsConstant()) {
+        size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
+        GetAssembler()->LoadDFromOffset(DRegisterFrom(out_loc), obj, offset);
+      } else {
+        vixl32::Register temp = temps.Acquire();
+        __ Add(temp, obj, Operand(RegisterFrom(index), vixl32::LSL, TIMES_8));
+        GetAssembler()->LoadDFromOffset(DRegisterFrom(out_loc), temp, data_offset);
+        temps.Release(temp);
+      }
+      break;
+    }
+
+    case Primitive::kPrimVoid:
+      LOG(FATAL) << "Unreachable type " << type;
+      UNREACHABLE();
+  }
+
+  if (type == Primitive::kPrimNot) {
+    // Potential implicit null checks, in the case of reference
+    // arrays, are handled in the previous switch statement.
+  } else if (!maybe_compressed_char_at) {
+    codegen_->MaybeRecordImplicitNullCheck(instruction);
+  }
+}
+
+void LocationsBuilderARMVIXL::VisitArraySet(HArraySet* instruction) {
+  Primitive::Type value_type = instruction->GetComponentType();
+
+  bool needs_write_barrier =
+      CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
+  bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
+
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
+      instruction,
+      may_need_runtime_call_for_type_check ?
+          LocationSummary::kCallOnSlowPath :
+          LocationSummary::kNoCall);
+
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
+  if (Primitive::IsFloatingPointType(value_type)) {
+    locations->SetInAt(2, Location::RequiresFpuRegister());
+  } else {
+    locations->SetInAt(2, Location::RequiresRegister());
+  }
+  if (needs_write_barrier) {
+    // Temporary registers for the write barrier.
+    locations->AddTemp(Location::RequiresRegister());  // Possibly used for ref. poisoning too.
+    locations->AddTemp(Location::RequiresRegister());
+  }
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitArraySet(HArraySet* instruction) {
+  UseScratchRegisterScope temps(GetAssembler()->GetVIXLAssembler());
+  LocationSummary* locations = instruction->GetLocations();
+  vixl32::Register array = InputRegisterAt(instruction, 0);
+  Location index = locations->InAt(1);
+  Primitive::Type value_type = instruction->GetComponentType();
+  bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
+  bool needs_write_barrier =
+      CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
+  uint32_t data_offset =
+      mirror::Array::DataOffset(Primitive::ComponentSize(value_type)).Uint32Value();
+  Location value_loc = locations->InAt(2);
+  HInstruction* array_instr = instruction->GetArray();
+  bool has_intermediate_address = array_instr->IsIntermediateAddress();
+  // The read barrier instrumentation does not support the HIntermediateAddress instruction yet.
+  DCHECK(!(has_intermediate_address && kEmitCompilerReadBarrier));
+
+  switch (value_type) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimInt: {
+      if (index.IsConstant()) {
+        int32_t const_index = index.GetConstant()->AsIntConstant()->GetValue();
+        uint32_t full_offset =
+            data_offset + (const_index << Primitive::ComponentSizeShift(value_type));
+        StoreOperandType store_type = GetStoreOperandType(value_type);
+        GetAssembler()->StoreToOffset(store_type, RegisterFrom(value_loc), array, full_offset);
+      } else {
+        vixl32::Register temp = temps.Acquire();
+
+        if (has_intermediate_address) {
+          // We do not need to compute the intermediate address from the array: the
+          // input instruction has done it already. See the comment in
+          // `TryExtractArrayAccessAddress()`.
+          if (kIsDebugBuild) {
+            HIntermediateAddress* tmp = array_instr->AsIntermediateAddress();
+            DCHECK(tmp->GetOffset()->AsIntConstant()->GetValueAsUint64() == data_offset);
+          }
+          temp = array;
+        } else {
+          __ Add(temp, array, data_offset);
+        }
+        codegen_->StoreToShiftedRegOffset(value_type, value_loc, temp, RegisterFrom(index));
+        temps.Release(temp);
+      }
+      break;
+    }
+
+    case Primitive::kPrimNot: {
+      vixl32::Register value = RegisterFrom(value_loc);
+      // TryExtractArrayAccessAddress optimization is never applied for non-primitive ArraySet.
+      // See the comment in instruction_simplifier_shared.cc.
+      DCHECK(!has_intermediate_address);
+
+      if (instruction->InputAt(2)->IsNullConstant()) {
+        // Just setting null.
+        if (index.IsConstant()) {
+          size_t offset =
+              (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
+          GetAssembler()->StoreToOffset(kStoreWord, value, array, offset);
+        } else {
+          DCHECK(index.IsRegister()) << index;
+          vixl32::Register temp = temps.Acquire();
+          __ Add(temp, array, data_offset);
+          codegen_->StoreToShiftedRegOffset(value_type, value_loc, temp, RegisterFrom(index));
+          temps.Release(temp);
+        }
+        codegen_->MaybeRecordImplicitNullCheck(instruction);
+        DCHECK(!needs_write_barrier);
+        DCHECK(!may_need_runtime_call_for_type_check);
+        break;
+      }
+
+      DCHECK(needs_write_barrier);
+      Location temp1_loc = locations->GetTemp(0);
+      vixl32::Register temp1 = RegisterFrom(temp1_loc);
+      Location temp2_loc = locations->GetTemp(1);
+      vixl32::Register temp2 = RegisterFrom(temp2_loc);
+      uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
+      uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
+      uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
+      vixl32::Label done;
+      SlowPathCodeARMVIXL* slow_path = nullptr;
+
+      if (may_need_runtime_call_for_type_check) {
+        slow_path = new (GetGraph()->GetArena()) ArraySetSlowPathARMVIXL(instruction);
+        codegen_->AddSlowPath(slow_path);
+        if (instruction->GetValueCanBeNull()) {
+          vixl32::Label non_zero;
+          __ CompareAndBranchIfNonZero(value, &non_zero);
+          if (index.IsConstant()) {
+            size_t offset =
+               (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
+            GetAssembler()->StoreToOffset(kStoreWord, value, array, offset);
+          } else {
+            DCHECK(index.IsRegister()) << index;
+            vixl32::Register temp = temps.Acquire();
+            __ Add(temp, array, data_offset);
+            codegen_->StoreToShiftedRegOffset(value_type, value_loc, temp, RegisterFrom(index));
+            temps.Release(temp);
+          }
+          codegen_->MaybeRecordImplicitNullCheck(instruction);
+          __ B(&done);
+          __ Bind(&non_zero);
+        }
+
+        // Note that when read barriers are enabled, the type checks
+        // are performed without read barriers.  This is fine, even in
+        // the case where a class object is in the from-space after
+        // the flip, as a comparison involving such a type would not
+        // produce a false positive; it may of course produce a false
+        // negative, in which case we would take the ArraySet slow
+        // path.
+
+        // /* HeapReference<Class> */ temp1 = array->klass_
+        GetAssembler()->LoadFromOffset(kLoadWord, temp1, array, class_offset);
+        codegen_->MaybeRecordImplicitNullCheck(instruction);
+        GetAssembler()->MaybeUnpoisonHeapReference(temp1);
+
+        // /* HeapReference<Class> */ temp1 = temp1->component_type_
+        GetAssembler()->LoadFromOffset(kLoadWord, temp1, temp1, component_offset);
+        // /* HeapReference<Class> */ temp2 = value->klass_
+        GetAssembler()->LoadFromOffset(kLoadWord, temp2, value, class_offset);
+        // If heap poisoning is enabled, no need to unpoison `temp1`
+        // nor `temp2`, as we are comparing two poisoned references.
+        __ Cmp(temp1, temp2);
+
+        if (instruction->StaticTypeOfArrayIsObjectArray()) {
+          vixl32::Label do_put;
+          __ B(eq, &do_put);
+          // If heap poisoning is enabled, the `temp1` reference has
+          // not been unpoisoned yet; unpoison it now.
+          GetAssembler()->MaybeUnpoisonHeapReference(temp1);
+
+          // /* HeapReference<Class> */ temp1 = temp1->super_class_
+          GetAssembler()->LoadFromOffset(kLoadWord, temp1, temp1, super_offset);
+          // If heap poisoning is enabled, no need to unpoison
+          // `temp1`, as we are comparing against null below.
+          __ CompareAndBranchIfNonZero(temp1, slow_path->GetEntryLabel());
+          __ Bind(&do_put);
+        } else {
+          __ B(ne, slow_path->GetEntryLabel());
+        }
+      }
+
+      vixl32::Register source = value;
+      if (kPoisonHeapReferences) {
+        // Note that in the case where `value` is a null reference,
+        // we do not enter this block, as a null reference does not
+        // need poisoning.
+        DCHECK_EQ(value_type, Primitive::kPrimNot);
+        __ Mov(temp1, value);
+        GetAssembler()->PoisonHeapReference(temp1);
+        source = temp1;
+      }
+
+      if (index.IsConstant()) {
+        size_t offset =
+            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
+        GetAssembler()->StoreToOffset(kStoreWord, source, array, offset);
+      } else {
+        DCHECK(index.IsRegister()) << index;
+
+        vixl32::Register temp = temps.Acquire();
+        __ Add(temp, array, data_offset);
+        codegen_->StoreToShiftedRegOffset(value_type,
+                                          LocationFrom(source),
+                                          temp,
+                                          RegisterFrom(index));
+        temps.Release(temp);
+      }
+
+      if (!may_need_runtime_call_for_type_check) {
+        codegen_->MaybeRecordImplicitNullCheck(instruction);
+      }
+
+      codegen_->MarkGCCard(temp1, temp2, array, value, instruction->GetValueCanBeNull());
+
+      if (done.IsReferenced()) {
+        __ Bind(&done);
+      }
+
+      if (slow_path != nullptr) {
+        __ Bind(slow_path->GetExitLabel());
+      }
+
+      break;
+    }
+
+    case Primitive::kPrimLong: {
+      Location value = locations->InAt(2);
+      if (index.IsConstant()) {
+        size_t offset =
+            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
+        GetAssembler()->StoreToOffset(kStoreWordPair, LowRegisterFrom(value), array, offset);
+      } else {
+        vixl32::Register temp = temps.Acquire();
+        __ Add(temp, array, Operand(RegisterFrom(index), vixl32::LSL, TIMES_8));
+        GetAssembler()->StoreToOffset(kStoreWordPair, LowRegisterFrom(value), temp, data_offset);
+        temps.Release(temp);
+      }
+      break;
+    }
+
+    case Primitive::kPrimFloat: {
+      Location value = locations->InAt(2);
+      DCHECK(value.IsFpuRegister());
+      if (index.IsConstant()) {
+        size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
+        GetAssembler()->StoreSToOffset(SRegisterFrom(value), array, offset);
+      } else {
+        vixl32::Register temp = temps.Acquire();
+        __ Add(temp, array, Operand(RegisterFrom(index), vixl32::LSL, TIMES_4));
+        GetAssembler()->StoreSToOffset(SRegisterFrom(value), temp, data_offset);
+        temps.Release(temp);
+      }
+      break;
+    }
+
+    case Primitive::kPrimDouble: {
+      Location value = locations->InAt(2);
+      DCHECK(value.IsFpuRegisterPair());
+      if (index.IsConstant()) {
+        size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
+        GetAssembler()->StoreDToOffset(DRegisterFrom(value), array, offset);
+      } else {
+        vixl32::Register temp = temps.Acquire();
+        __ Add(temp, array, Operand(RegisterFrom(index), vixl32::LSL, TIMES_8));
+        GetAssembler()->StoreDToOffset(DRegisterFrom(value), temp, data_offset);
+        temps.Release(temp);
+      }
+      break;
+    }
+
+    case Primitive::kPrimVoid:
+      LOG(FATAL) << "Unreachable type " << value_type;
+      UNREACHABLE();
+  }
+
+  // Objects are handled in the switch.
+  if (value_type != Primitive::kPrimNot) {
+    codegen_->MaybeRecordImplicitNullCheck(instruction);
+  }
+}
+
 void LocationsBuilderARMVIXL::VisitArrayLength(HArrayLength* instruction) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
@@ -3394,7 +4608,58 @@
   vixl32::Register out = OutputRegister(instruction);
   GetAssembler()->LoadFromOffset(kLoadWord, out, obj, offset);
   codegen_->MaybeRecordImplicitNullCheck(instruction);
-  // TODO(VIXL): https://android-review.googlesource.com/#/c/272625/
+  // Mask out compression flag from String's array length.
+  if (mirror::kUseStringCompression && instruction->IsStringLength()) {
+    __ Lsr(out, out, 1u);
+  }
+}
+
+void LocationsBuilderARMVIXL::VisitIntermediateAddress(HIntermediateAddress* instruction) {
+  // The read barrier instrumentation does not support the HIntermediateAddress instruction yet.
+  DCHECK(!kEmitCompilerReadBarrier);
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RegisterOrConstant(instruction->GetOffset()));
+  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitIntermediateAddress(HIntermediateAddress* instruction) {
+  vixl32::Register out = OutputRegister(instruction);
+  vixl32::Register first = InputRegisterAt(instruction, 0);
+  Location second = instruction->GetLocations()->InAt(1);
+
+  // The read barrier instrumentation does not support the HIntermediateAddress instruction yet.
+  DCHECK(!kEmitCompilerReadBarrier);
+
+  if (second.IsRegister()) {
+    __ Add(out, first, RegisterFrom(second));
+  } else {
+    __ Add(out, first, second.GetConstant()->AsIntConstant()->GetValue());
+  }
+}
+
+void LocationsBuilderARMVIXL::VisitBoundsCheck(HBoundsCheck* instruction) {
+  RegisterSet caller_saves = RegisterSet::Empty();
+  InvokeRuntimeCallingConventionARMVIXL calling_convention;
+  caller_saves.Add(LocationFrom(calling_convention.GetRegisterAt(0)));
+  caller_saves.Add(LocationFrom(calling_convention.GetRegisterAt(1)));
+  LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction, caller_saves);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitBoundsCheck(HBoundsCheck* instruction) {
+  SlowPathCodeARMVIXL* slow_path =
+      new (GetGraph()->GetArena()) BoundsCheckSlowPathARMVIXL(instruction);
+  codegen_->AddSlowPath(slow_path);
+
+  vixl32::Register index = InputRegisterAt(instruction, 0);
+  vixl32::Register length = InputRegisterAt(instruction, 1);
+
+  __ Cmp(index, length);
+  __ B(hs, slow_path->GetEntryLabel());
 }
 
 void CodeGeneratorARMVIXL::MarkGCCard(vixl32::Register temp,
@@ -3404,11 +4669,11 @@
                                       bool can_be_null) {
   vixl32::Label is_null;
   if (can_be_null) {
-    __ Cbz(value, &is_null);
+    __ CompareAndBranchIfZero(value, &is_null);
   }
   GetAssembler()->LoadFromOffset(
       kLoadWord, card, tr, Thread::CardTableOffset<kArmPointerSize>().Int32Value());
-  __ Lsr(temp, object, gc::accounting::CardTable::kCardShift);
+  __ Lsr(temp, object, Operand::From(gc::accounting::CardTable::kCardShift));
   __ Strb(card, MemOperand(card, temp));
   if (can_be_null) {
     __ Bind(&is_null);
@@ -3424,8 +4689,9 @@
 }
 
 void LocationsBuilderARMVIXL::VisitSuspendCheck(HSuspendCheck* instruction) {
-  new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath);
-  // TODO(VIXL): https://android-review.googlesource.com/#/c/275337/ and related.
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath);
+  locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
 }
 
 void InstructionCodeGeneratorARMVIXL::VisitSuspendCheck(HSuspendCheck* instruction) {
@@ -3463,10 +4729,10 @@
   GetAssembler()->LoadFromOffset(
       kLoadUnsignedHalfword, temp, tr, Thread::ThreadFlagsOffset<kArmPointerSize>().Int32Value());
   if (successor == nullptr) {
-    __ Cbnz(temp, slow_path->GetEntryLabel());
+    __ CompareAndBranchIfNonZero(temp, slow_path->GetEntryLabel());
     __ Bind(slow_path->GetReturnLabel());
   } else {
-    __ Cbz(temp, codegen_->GetLabelOf(successor));
+    __ CompareAndBranchIfZero(temp, codegen_->GetLabelOf(successor));
     __ B(slow_path->GetEntryLabel());
   }
 }
@@ -3509,7 +4775,7 @@
     }
   } else if (source.IsFpuRegister()) {
     if (destination.IsRegister()) {
-      TODO_VIXL32(FATAL);
+      __ Vmov(RegisterFrom(destination), SRegisterFrom(source));
     } else if (destination.IsFpuRegister()) {
       __ Vmov(SRegisterFrom(destination), SRegisterFrom(source));
     } else {
@@ -3534,9 +4800,7 @@
       __ Mov(LowRegisterFrom(destination), LowRegisterFrom(source));
       __ Mov(HighRegisterFrom(destination), HighRegisterFrom(source));
     } else if (destination.IsFpuRegisterPair()) {
-      __ Vmov(FromLowSToD(LowSRegisterFrom(destination)),
-              LowRegisterFrom(source),
-              HighRegisterFrom(source));
+      __ Vmov(DRegisterFrom(destination), LowRegisterFrom(source), HighRegisterFrom(source));
     } else {
       DCHECK(destination.IsDoubleStackSlot()) << destination;
       DCHECK(ExpectedPairLayout(source));
@@ -3547,7 +4811,7 @@
     }
   } else if (source.IsFpuRegisterPair()) {
     if (destination.IsRegisterPair()) {
-      TODO_VIXL32(FATAL);
+      __ Vmov(LowRegisterFrom(destination), HighRegisterFrom(destination), DRegisterFrom(source));
     } else if (destination.IsFpuRegisterPair()) {
       __ Vmov(DRegisterFrom(destination), DRegisterFrom(source));
     } else {
@@ -3586,7 +4850,7 @@
     } else if (constant->IsDoubleConstant()) {
       double value = constant->AsDoubleConstant()->GetValue();
       if (destination.IsFpuRegisterPair()) {
-        __ Vmov(FromLowSToD(LowSRegisterFrom(destination)), value);
+        __ Vmov(DRegisterFrom(destination), value);
       } else {
         DCHECK(destination.IsDoubleStackSlot()) << destination;
         uint64_t int_value = bit_cast<uint64_t, double>(value);
@@ -3652,9 +4916,12 @@
   } else if (source.IsStackSlot() && destination.IsRegister()) {
     Exchange(RegisterFrom(destination), source.GetStackIndex());
   } else if (source.IsStackSlot() && destination.IsStackSlot()) {
-    TODO_VIXL32(FATAL);
+    Exchange(source.GetStackIndex(), destination.GetStackIndex());
   } else if (source.IsFpuRegister() && destination.IsFpuRegister()) {
-    TODO_VIXL32(FATAL);
+    vixl32::SRegister temp = temps.AcquireS();
+    __ Vmov(temp, SRegisterFrom(source));
+    __ Vmov(SRegisterFrom(source), SRegisterFrom(destination));
+    __ Vmov(SRegisterFrom(destination), temp);
   } else if (source.IsRegisterPair() && destination.IsRegisterPair()) {
     vixl32::DRegister temp = temps.AcquireD();
     __ Vmov(temp, LowRegisterFrom(source), HighRegisterFrom(source));
@@ -3670,11 +4937,34 @@
     GetAssembler()->LoadFromOffset(kLoadWordPair, low_reg, sp, mem);
     GetAssembler()->StoreDToOffset(temp, sp, mem);
   } else if (source.IsFpuRegisterPair() && destination.IsFpuRegisterPair()) {
-    TODO_VIXL32(FATAL);
+    vixl32::DRegister first = DRegisterFrom(source);
+    vixl32::DRegister second = DRegisterFrom(destination);
+    vixl32::DRegister temp = temps.AcquireD();
+    __ Vmov(temp, first);
+    __ Vmov(first, second);
+    __ Vmov(second, temp);
   } else if (source.IsFpuRegisterPair() || destination.IsFpuRegisterPair()) {
-    TODO_VIXL32(FATAL);
+    vixl32::DRegister reg = source.IsFpuRegisterPair()
+        ? DRegisterFrom(source)
+        : DRegisterFrom(destination);
+    int mem = source.IsFpuRegisterPair()
+        ? destination.GetStackIndex()
+        : source.GetStackIndex();
+    vixl32::DRegister temp = temps.AcquireD();
+    __ Vmov(temp, reg);
+    GetAssembler()->LoadDFromOffset(reg, sp, mem);
+    GetAssembler()->StoreDToOffset(temp, sp, mem);
   } else if (source.IsFpuRegister() || destination.IsFpuRegister()) {
-    TODO_VIXL32(FATAL);
+    vixl32::SRegister reg = source.IsFpuRegister()
+        ? SRegisterFrom(source)
+        : SRegisterFrom(destination);
+    int mem = source.IsFpuRegister()
+        ? destination.GetStackIndex()
+        : source.GetStackIndex();
+    vixl32::Register temp = temps.Acquire();
+    __ Vmov(temp, reg);
+    GetAssembler()->LoadSFromOffset(reg, sp, mem);
+    GetAssembler()->StoreToOffset(kStoreWord, temp, sp, mem);
   } else if (source.IsDoubleStackSlot() && destination.IsDoubleStackSlot()) {
     vixl32::DRegister temp1 = temps.AcquireD();
     vixl32::DRegister temp2 = temps.AcquireD();
@@ -3731,7 +5021,7 @@
 void InstructionCodeGeneratorARMVIXL::VisitLoadClass(HLoadClass* cls) {
   LocationSummary* locations = cls->GetLocations();
   if (cls->NeedsAccessCheck()) {
-    codegen_->MoveConstant(locations->GetTemp(0), cls->GetTypeIndex());
+    codegen_->MoveConstant(locations->GetTemp(0), cls->GetTypeIndex().index_);
     codegen_->InvokeRuntime(kQuickInitializeTypeAndVerifyAccess, cls, cls->GetDexPc());
     CheckEntrypointTypes<kQuickInitializeTypeAndVerifyAccess, void*, uint32_t>();
     return;
@@ -3763,7 +5053,7 @@
           ArtMethod::DexCacheResolvedTypesOffset(kArmPointerSize).Int32Value();
       GetAssembler()->LoadFromOffset(kLoadWord, out, current_method, resolved_types_offset);
       // /* GcRoot<mirror::Class> */ out = out[type_index]
-      size_t offset = CodeGenerator::GetCacheOffset(cls->GetTypeIndex());
+      size_t offset = CodeGenerator::GetCacheOffset(cls->GetTypeIndex().index_);
       GenerateGcRootFieldLoad(cls, out_loc, out, offset, kEmitCompilerReadBarrier);
       generate_null_check = !cls->IsInDexCache();
       break;
@@ -3778,7 +5068,7 @@
         cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck());
     codegen_->AddSlowPath(slow_path);
     if (generate_null_check) {
-      __ Cbz(out, slow_path->GetEntryLabel());
+      __ CompareAndBranchIfZero(out, slow_path->GetEntryLabel());
     }
     if (cls->MustGenerateClinitCheck()) {
       GenerateClassInitializationCheck(slow_path, out);
@@ -3859,7 +5149,7 @@
   // TODO: Re-add the compiler code to do string dex cache lookup again.
   DCHECK_EQ(load->GetLoadKind(), HLoadString::LoadKind::kDexCacheViaMethod);
   InvokeRuntimeCallingConventionARMVIXL calling_convention;
-  __ Mov(calling_convention.GetRegisterAt(0), load->GetStringIndex());
+  __ Mov(calling_convention.GetRegisterAt(0), load->GetStringIndex().index_);
   codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc());
   CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
 }
@@ -3903,6 +5193,509 @@
   CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
 }
 
+// Temp is used for read barrier.
+static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) {
+  if (kEmitCompilerReadBarrier &&
+       (kUseBakerReadBarrier ||
+          type_check_kind == TypeCheckKind::kAbstractClassCheck ||
+          type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
+          type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
+    return 1;
+  }
+  return 0;
+}
+
+// Interface case has 3 temps, one for holding the number of interfaces, one for the current
+// interface pointer, one for loading the current interface.
+// The other checks have one temp for loading the object's class.
+static size_t NumberOfCheckCastTemps(TypeCheckKind type_check_kind) {
+  if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
+    return 3;
+  }
+  return 1 + NumberOfInstanceOfTemps(type_check_kind);
+}
+
+void LocationsBuilderARMVIXL::VisitInstanceOf(HInstanceOf* instruction) {
+  LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
+  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
+  bool baker_read_barrier_slow_path = false;
+  switch (type_check_kind) {
+    case TypeCheckKind::kExactCheck:
+    case TypeCheckKind::kAbstractClassCheck:
+    case TypeCheckKind::kClassHierarchyCheck:
+    case TypeCheckKind::kArrayObjectCheck:
+      call_kind =
+          kEmitCompilerReadBarrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
+      baker_read_barrier_slow_path = kUseBakerReadBarrier;
+      break;
+    case TypeCheckKind::kArrayCheck:
+    case TypeCheckKind::kUnresolvedCheck:
+    case TypeCheckKind::kInterfaceCheck:
+      call_kind = LocationSummary::kCallOnSlowPath;
+      break;
+  }
+
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
+  if (baker_read_barrier_slow_path) {
+    locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
+  }
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+  // The "out" register is used as a temporary, so it overlaps with the inputs.
+  // Note that TypeCheckSlowPathARM uses this register too.
+  locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+  locations->AddRegisterTemps(NumberOfInstanceOfTemps(type_check_kind));
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction) {
+  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
+  LocationSummary* locations = instruction->GetLocations();
+  Location obj_loc = locations->InAt(0);
+  vixl32::Register obj = InputRegisterAt(instruction, 0);
+  vixl32::Register cls = InputRegisterAt(instruction, 1);
+  Location out_loc = locations->Out();
+  vixl32::Register out = OutputRegister(instruction);
+  const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind);
+  DCHECK_LE(num_temps, 1u);
+  Location maybe_temp_loc = (num_temps >= 1) ? locations->GetTemp(0) : Location::NoLocation();
+  uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
+  uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
+  uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
+  uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
+  vixl32::Label done, zero;
+  SlowPathCodeARMVIXL* slow_path = nullptr;
+
+  // Return 0 if `obj` is null.
+  // avoid null check if we know obj is not null.
+  if (instruction->MustDoNullCheck()) {
+    __ CompareAndBranchIfZero(obj, &zero, /* far_target */ false);
+  }
+
+  switch (type_check_kind) {
+    case TypeCheckKind::kExactCheck: {
+      // /* HeapReference<Class> */ out = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        out_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp_loc,
+                                        kCompilerReadBarrierOption);
+      __ Cmp(out, cls);
+      // Classes must be equal for the instanceof to succeed.
+      __ B(ne, &zero);
+      __ Mov(out, 1);
+      __ B(&done);
+      break;
+    }
+
+    case TypeCheckKind::kAbstractClassCheck: {
+      // /* HeapReference<Class> */ out = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        out_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp_loc,
+                                        kCompilerReadBarrierOption);
+      // If the class is abstract, we eagerly fetch the super class of the
+      // object to avoid doing a comparison we know will fail.
+      vixl32::Label loop;
+      __ Bind(&loop);
+      // /* HeapReference<Class> */ out = out->super_class_
+      GenerateReferenceLoadOneRegister(instruction,
+                                       out_loc,
+                                       super_offset,
+                                       maybe_temp_loc,
+                                       kCompilerReadBarrierOption);
+      // If `out` is null, we use it for the result, and jump to `done`.
+      __ CompareAndBranchIfZero(out, &done, /* far_target */ false);
+      __ Cmp(out, cls);
+      __ B(ne, &loop);
+      __ Mov(out, 1);
+      if (zero.IsReferenced()) {
+        __ B(&done);
+      }
+      break;
+    }
+
+    case TypeCheckKind::kClassHierarchyCheck: {
+      // /* HeapReference<Class> */ out = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        out_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp_loc,
+                                        kCompilerReadBarrierOption);
+      // Walk over the class hierarchy to find a match.
+      vixl32::Label loop, success;
+      __ Bind(&loop);
+      __ Cmp(out, cls);
+      __ B(eq, &success);
+      // /* HeapReference<Class> */ out = out->super_class_
+      GenerateReferenceLoadOneRegister(instruction,
+                                       out_loc,
+                                       super_offset,
+                                       maybe_temp_loc,
+                                       kCompilerReadBarrierOption);
+      __ CompareAndBranchIfNonZero(out, &loop);
+      // If `out` is null, we use it for the result, and jump to `done`.
+      __ B(&done);
+      __ Bind(&success);
+      __ Mov(out, 1);
+      if (zero.IsReferenced()) {
+        __ B(&done);
+      }
+      break;
+    }
+
+    case TypeCheckKind::kArrayObjectCheck: {
+      // /* HeapReference<Class> */ out = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        out_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp_loc,
+                                        kCompilerReadBarrierOption);
+      // Do an exact check.
+      vixl32::Label exact_check;
+      __ Cmp(out, cls);
+      __ B(eq, &exact_check);
+      // Otherwise, we need to check that the object's class is a non-primitive array.
+      // /* HeapReference<Class> */ out = out->component_type_
+      GenerateReferenceLoadOneRegister(instruction,
+                                       out_loc,
+                                       component_offset,
+                                       maybe_temp_loc,
+                                       kCompilerReadBarrierOption);
+      // If `out` is null, we use it for the result, and jump to `done`.
+      __ CompareAndBranchIfZero(out, &done, /* far_target */ false);
+      GetAssembler()->LoadFromOffset(kLoadUnsignedHalfword, out, out, primitive_offset);
+      static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
+      __ CompareAndBranchIfNonZero(out, &zero, /* far_target */ false);
+      __ Bind(&exact_check);
+      __ Mov(out, 1);
+      __ B(&done);
+      break;
+    }
+
+    case TypeCheckKind::kArrayCheck: {
+      // No read barrier since the slow path will retry upon failure.
+      // /* HeapReference<Class> */ out = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        out_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp_loc,
+                                        kWithoutReadBarrier);
+      __ Cmp(out, cls);
+      DCHECK(locations->OnlyCallsOnSlowPath());
+      slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARMVIXL(instruction,
+                                                                        /* is_fatal */ false);
+      codegen_->AddSlowPath(slow_path);
+      __ B(ne, slow_path->GetEntryLabel());
+      __ Mov(out, 1);
+      if (zero.IsReferenced()) {
+        __ B(&done);
+      }
+      break;
+    }
+
+    case TypeCheckKind::kUnresolvedCheck:
+    case TypeCheckKind::kInterfaceCheck: {
+      // Note that we indeed only call on slow path, but we always go
+      // into the slow path for the unresolved and interface check
+      // cases.
+      //
+      // We cannot directly call the InstanceofNonTrivial runtime
+      // entry point without resorting to a type checking slow path
+      // here (i.e. by calling InvokeRuntime directly), as it would
+      // require to assign fixed registers for the inputs of this
+      // HInstanceOf instruction (following the runtime calling
+      // convention), which might be cluttered by the potential first
+      // read barrier emission at the beginning of this method.
+      //
+      // TODO: Introduce a new runtime entry point taking the object
+      // to test (instead of its class) as argument, and let it deal
+      // with the read barrier issues. This will let us refactor this
+      // case of the `switch` code as it was previously (with a direct
+      // call to the runtime not using a type checking slow path).
+      // This should also be beneficial for the other cases above.
+      DCHECK(locations->OnlyCallsOnSlowPath());
+      slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARMVIXL(instruction,
+                                                                        /* is_fatal */ false);
+      codegen_->AddSlowPath(slow_path);
+      __ B(slow_path->GetEntryLabel());
+      if (zero.IsReferenced()) {
+        __ B(&done);
+      }
+      break;
+    }
+  }
+
+  if (zero.IsReferenced()) {
+    __ Bind(&zero);
+    __ Mov(out, 0);
+  }
+
+  if (done.IsReferenced()) {
+    __ Bind(&done);
+  }
+
+  if (slow_path != nullptr) {
+    __ Bind(slow_path->GetExitLabel());
+  }
+}
+
+void LocationsBuilderARMVIXL::VisitCheckCast(HCheckCast* instruction) {
+  LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
+  bool throws_into_catch = instruction->CanThrowIntoCatchBlock();
+
+  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
+  switch (type_check_kind) {
+    case TypeCheckKind::kExactCheck:
+    case TypeCheckKind::kAbstractClassCheck:
+    case TypeCheckKind::kClassHierarchyCheck:
+    case TypeCheckKind::kArrayObjectCheck:
+      call_kind = (throws_into_catch || kEmitCompilerReadBarrier) ?
+          LocationSummary::kCallOnSlowPath :
+          LocationSummary::kNoCall;  // In fact, call on a fatal (non-returning) slow path.
+      break;
+    case TypeCheckKind::kArrayCheck:
+    case TypeCheckKind::kUnresolvedCheck:
+    case TypeCheckKind::kInterfaceCheck:
+      call_kind = LocationSummary::kCallOnSlowPath;
+      break;
+  }
+
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+  locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind));
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitCheckCast(HCheckCast* instruction) {
+  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
+  LocationSummary* locations = instruction->GetLocations();
+  Location obj_loc = locations->InAt(0);
+  vixl32::Register obj = InputRegisterAt(instruction, 0);
+  vixl32::Register cls = InputRegisterAt(instruction, 1);
+  Location temp_loc = locations->GetTemp(0);
+  vixl32::Register temp = RegisterFrom(temp_loc);
+  const size_t num_temps = NumberOfCheckCastTemps(type_check_kind);
+  DCHECK_LE(num_temps, 3u);
+  Location maybe_temp2_loc = (num_temps >= 2) ? locations->GetTemp(1) : Location::NoLocation();
+  Location maybe_temp3_loc = (num_temps >= 3) ? locations->GetTemp(2) : Location::NoLocation();
+  const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
+  const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
+  const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
+  const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
+  const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value();
+  const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value();
+  const uint32_t object_array_data_offset =
+      mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
+
+  // Always false for read barriers since we may need to go to the entrypoint for non-fatal cases
+  // from false negatives. The false negatives may come from avoiding read barriers below. Avoiding
+  // read barriers is done for performance and code size reasons.
+  bool is_type_check_slow_path_fatal = false;
+  if (!kEmitCompilerReadBarrier) {
+    is_type_check_slow_path_fatal =
+        (type_check_kind == TypeCheckKind::kExactCheck ||
+         type_check_kind == TypeCheckKind::kAbstractClassCheck ||
+         type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
+         type_check_kind == TypeCheckKind::kArrayObjectCheck) &&
+        !instruction->CanThrowIntoCatchBlock();
+  }
+  SlowPathCodeARMVIXL* type_check_slow_path =
+      new (GetGraph()->GetArena()) TypeCheckSlowPathARMVIXL(instruction,
+                                                            is_type_check_slow_path_fatal);
+  codegen_->AddSlowPath(type_check_slow_path);
+
+  vixl32::Label done;
+  // Avoid null check if we know obj is not null.
+  if (instruction->MustDoNullCheck()) {
+    __ CompareAndBranchIfZero(obj, &done, /* far_target */ false);
+  }
+
+  switch (type_check_kind) {
+    case TypeCheckKind::kExactCheck:
+    case TypeCheckKind::kArrayCheck: {
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        temp_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp2_loc,
+                                        kWithoutReadBarrier);
+
+      __ Cmp(temp, cls);
+      // Jump to slow path for throwing the exception or doing a
+      // more involved array check.
+      __ B(ne, type_check_slow_path->GetEntryLabel());
+      break;
+    }
+
+    case TypeCheckKind::kAbstractClassCheck: {
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        temp_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp2_loc,
+                                        kWithoutReadBarrier);
+
+      // If the class is abstract, we eagerly fetch the super class of the
+      // object to avoid doing a comparison we know will fail.
+      vixl32::Label loop;
+      __ Bind(&loop);
+      // /* HeapReference<Class> */ temp = temp->super_class_
+      GenerateReferenceLoadOneRegister(instruction,
+                                       temp_loc,
+                                       super_offset,
+                                       maybe_temp2_loc,
+                                       kWithoutReadBarrier);
+
+      // If the class reference currently in `temp` is null, jump to the slow path to throw the
+      // exception.
+      __ CompareAndBranchIfZero(temp, type_check_slow_path->GetEntryLabel());
+
+      // Otherwise, compare the classes.
+      __ Cmp(temp, cls);
+      __ B(ne, &loop);
+      break;
+    }
+
+    case TypeCheckKind::kClassHierarchyCheck: {
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        temp_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp2_loc,
+                                        kWithoutReadBarrier);
+
+      // Walk over the class hierarchy to find a match.
+      vixl32::Label loop;
+      __ Bind(&loop);
+      __ Cmp(temp, cls);
+      __ B(eq, &done);
+
+      // /* HeapReference<Class> */ temp = temp->super_class_
+      GenerateReferenceLoadOneRegister(instruction,
+                                       temp_loc,
+                                       super_offset,
+                                       maybe_temp2_loc,
+                                       kWithoutReadBarrier);
+
+      // If the class reference currently in `temp` is null, jump to the slow path to throw the
+      // exception.
+      __ CompareAndBranchIfZero(temp, type_check_slow_path->GetEntryLabel());
+      // Otherwise, jump to the beginning of the loop.
+      __ B(&loop);
+      break;
+    }
+
+    case TypeCheckKind::kArrayObjectCheck:  {
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        temp_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp2_loc,
+                                        kWithoutReadBarrier);
+
+      // Do an exact check.
+      __ Cmp(temp, cls);
+      __ B(eq, &done);
+
+      // Otherwise, we need to check that the object's class is a non-primitive array.
+      // /* HeapReference<Class> */ temp = temp->component_type_
+      GenerateReferenceLoadOneRegister(instruction,
+                                       temp_loc,
+                                       component_offset,
+                                       maybe_temp2_loc,
+                                       kWithoutReadBarrier);
+      // If the component type is null, jump to the slow path to throw the exception.
+      __ CompareAndBranchIfZero(temp, type_check_slow_path->GetEntryLabel());
+      // Otherwise,the object is indeed an array, jump to label `check_non_primitive_component_type`
+      // to further check that this component type is not a primitive type.
+      GetAssembler()->LoadFromOffset(kLoadUnsignedHalfword, temp, temp, primitive_offset);
+      static_assert(Primitive::kPrimNot == 0, "Expected 0 for art::Primitive::kPrimNot");
+      __ CompareAndBranchIfNonZero(temp, type_check_slow_path->GetEntryLabel());
+      break;
+    }
+
+    case TypeCheckKind::kUnresolvedCheck:
+      // We always go into the type check slow path for the unresolved check case.
+      // We cannot directly call the CheckCast runtime entry point
+      // without resorting to a type checking slow path here (i.e. by
+      // calling InvokeRuntime directly), as it would require to
+      // assign fixed registers for the inputs of this HInstanceOf
+      // instruction (following the runtime calling convention), which
+      // might be cluttered by the potential first read barrier
+      // emission at the beginning of this method.
+
+      __ B(type_check_slow_path->GetEntryLabel());
+      break;
+
+    case TypeCheckKind::kInterfaceCheck: {
+      // Avoid read barriers to improve performance of the fast path. We can not get false
+      // positives by doing this.
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        temp_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp2_loc,
+                                        kWithoutReadBarrier);
+
+      // /* HeapReference<Class> */ temp = temp->iftable_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        temp_loc,
+                                        temp_loc,
+                                        iftable_offset,
+                                        maybe_temp2_loc,
+                                        kWithoutReadBarrier);
+      // Iftable is never null.
+      __ Ldr(RegisterFrom(maybe_temp2_loc), MemOperand(temp, array_length_offset));
+      // Loop through the iftable and check if any class matches.
+      vixl32::Label start_loop;
+      __ Bind(&start_loop);
+      __ CompareAndBranchIfZero(RegisterFrom(maybe_temp2_loc),
+                                type_check_slow_path->GetEntryLabel());
+      __ Ldr(RegisterFrom(maybe_temp3_loc), MemOperand(temp, object_array_data_offset));
+      GetAssembler()->MaybeUnpoisonHeapReference(RegisterFrom(maybe_temp3_loc));
+      // Go to next interface.
+      __ Add(temp, temp, Operand::From(2 * kHeapReferenceSize));
+      __ Sub(RegisterFrom(maybe_temp2_loc), RegisterFrom(maybe_temp2_loc), 2);
+      // Compare the classes and continue the loop if they do not match.
+      __ Cmp(cls, RegisterFrom(maybe_temp3_loc));
+      __ B(ne, &start_loop);
+      break;
+    }
+  }
+  __ Bind(&done);
+
+  __ Bind(type_check_slow_path->GetExitLabel());
+}
+
+void LocationsBuilderARMVIXL::VisitMonitorOperation(HMonitorOperation* instruction) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
+  InvokeRuntimeCallingConventionARMVIXL calling_convention;
+  locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitMonitorOperation(HMonitorOperation* instruction) {
+  codegen_->InvokeRuntime(instruction->IsEnter() ? kQuickLockObject : kQuickUnlockObject,
+                          instruction,
+                          instruction->GetDexPc());
+  if (instruction->IsEnter()) {
+    CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
+  } else {
+    CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>();
+  }
+}
+
 void LocationsBuilderARMVIXL::VisitAnd(HAnd* instruction) {
   HandleBitwiseOperation(instruction, AND);
 }
@@ -3938,6 +5731,70 @@
   HandleBitwiseOperation(instruction);
 }
 
+void LocationsBuilderARMVIXL::VisitBitwiseNegatedRight(HBitwiseNegatedRight* instruction) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+  DCHECK(instruction->GetResultType() == Primitive::kPrimInt
+         || instruction->GetResultType() == Primitive::kPrimLong);
+
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitBitwiseNegatedRight(HBitwiseNegatedRight* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  Location first = locations->InAt(0);
+  Location second = locations->InAt(1);
+  Location out = locations->Out();
+
+  if (instruction->GetResultType() == Primitive::kPrimInt) {
+    vixl32::Register first_reg = RegisterFrom(first);
+    vixl32::Register second_reg = RegisterFrom(second);
+    vixl32::Register out_reg = RegisterFrom(out);
+
+    switch (instruction->GetOpKind()) {
+      case HInstruction::kAnd:
+        __ Bic(out_reg, first_reg, second_reg);
+        break;
+      case HInstruction::kOr:
+        __ Orn(out_reg, first_reg, second_reg);
+        break;
+      // There is no EON on arm.
+      case HInstruction::kXor:
+      default:
+        LOG(FATAL) << "Unexpected instruction " << instruction->DebugName();
+        UNREACHABLE();
+    }
+    return;
+
+  } else {
+    DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimLong);
+    vixl32::Register first_low = LowRegisterFrom(first);
+    vixl32::Register first_high = HighRegisterFrom(first);
+    vixl32::Register second_low = LowRegisterFrom(second);
+    vixl32::Register second_high = HighRegisterFrom(second);
+    vixl32::Register out_low = LowRegisterFrom(out);
+    vixl32::Register out_high = HighRegisterFrom(out);
+
+    switch (instruction->GetOpKind()) {
+      case HInstruction::kAnd:
+        __ Bic(out_low, first_low, second_low);
+        __ Bic(out_high, first_high, second_high);
+        break;
+      case HInstruction::kOr:
+        __ Orn(out_low, first_low, second_low);
+        __ Orn(out_high, first_high, second_high);
+        break;
+      // There is no EON on arm.
+      case HInstruction::kXor:
+      default:
+        LOG(FATAL) << "Unexpected instruction " << instruction->DebugName();
+        UNREACHABLE();
+    }
+  }
+}
+
 // TODO(VIXL): Remove optimizations in the helper when they are implemented in vixl.
 void InstructionCodeGeneratorARMVIXL::GenerateAndConst(vixl32::Register out,
                                                        vixl32::Register first,
@@ -3998,6 +5855,33 @@
   __ Eor(out, first, value);
 }
 
+void InstructionCodeGeneratorARMVIXL::GenerateAddLongConst(Location out,
+                                                           Location first,
+                                                           uint64_t value) {
+  vixl32::Register out_low = LowRegisterFrom(out);
+  vixl32::Register out_high = HighRegisterFrom(out);
+  vixl32::Register first_low = LowRegisterFrom(first);
+  vixl32::Register first_high = HighRegisterFrom(first);
+  uint32_t value_low = Low32Bits(value);
+  uint32_t value_high = High32Bits(value);
+  if (value_low == 0u) {
+    if (!out_low.Is(first_low)) {
+      __ Mov(out_low, first_low);
+    }
+    __ Add(out_high, first_high, value_high);
+    return;
+  }
+  __ Adds(out_low, first_low, value_low);
+  if (GetAssembler()->ShifterOperandCanHold(ADC, value_high, kCcKeep)) {
+    __ Adc(out_high, first_high, value_high);
+  } else if (GetAssembler()->ShifterOperandCanHold(SBC, ~value_high, kCcKeep)) {
+    __ Sbc(out_high, first_high, ~value_high);
+  } else {
+    LOG(FATAL) << "Unexpected constant " << value_high;
+    UNREACHABLE();
+  }
+}
+
 void InstructionCodeGeneratorARMVIXL::HandleBitwiseOperation(HBinaryOperation* instruction) {
   LocationSummary* locations = instruction->GetLocations();
   Location first = locations->InAt(0);
@@ -4074,6 +5958,42 @@
   }
 }
 
+void InstructionCodeGeneratorARMVIXL::GenerateReferenceLoadOneRegister(
+    HInstruction* instruction ATTRIBUTE_UNUSED,
+    Location out,
+    uint32_t offset,
+    Location maybe_temp ATTRIBUTE_UNUSED,
+    ReadBarrierOption read_barrier_option ATTRIBUTE_UNUSED) {
+  vixl32::Register out_reg = RegisterFrom(out);
+  if (kEmitCompilerReadBarrier) {
+    TODO_VIXL32(FATAL);
+  } else {
+    // Plain load with no read barrier.
+    // /* HeapReference<Object> */ out = *(out + offset)
+    GetAssembler()->LoadFromOffset(kLoadWord, out_reg, out_reg, offset);
+    GetAssembler()->MaybeUnpoisonHeapReference(out_reg);
+  }
+}
+
+void InstructionCodeGeneratorARMVIXL::GenerateReferenceLoadTwoRegisters(
+    HInstruction* instruction ATTRIBUTE_UNUSED,
+    Location out,
+    Location obj,
+    uint32_t offset,
+    Location maybe_temp ATTRIBUTE_UNUSED,
+    ReadBarrierOption read_barrier_option ATTRIBUTE_UNUSED) {
+  vixl32::Register out_reg = RegisterFrom(out);
+  vixl32::Register obj_reg = RegisterFrom(obj);
+  if (kEmitCompilerReadBarrier) {
+    TODO_VIXL32(FATAL);
+  } else {
+    // Plain load with no read barrier.
+    // /* HeapReference<Object> */ out = *(obj + offset)
+    GetAssembler()->LoadFromOffset(kLoadWord, out_reg, obj_reg, offset);
+    GetAssembler()->MaybeUnpoisonHeapReference(out_reg);
+  }
+}
+
 void InstructionCodeGeneratorARMVIXL::GenerateGcRootFieldLoad(
     HInstruction* instruction ATTRIBUTE_UNUSED,
     Location root,
@@ -4092,6 +6012,39 @@
   }
 }
 
+void CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier(
+    HInstruction* instruction ATTRIBUTE_UNUSED,
+    Location ref ATTRIBUTE_UNUSED,
+    vixl::aarch32::Register obj ATTRIBUTE_UNUSED,
+    uint32_t offset ATTRIBUTE_UNUSED,
+    Location temp ATTRIBUTE_UNUSED,
+    bool needs_null_check ATTRIBUTE_UNUSED) {
+  TODO_VIXL32(FATAL);
+}
+
+void CodeGeneratorARMVIXL::GenerateReferenceLoadWithBakerReadBarrier(
+    HInstruction* instruction ATTRIBUTE_UNUSED,
+    Location ref ATTRIBUTE_UNUSED,
+    vixl::aarch32::Register obj ATTRIBUTE_UNUSED,
+    uint32_t offset ATTRIBUTE_UNUSED,
+    Location index ATTRIBUTE_UNUSED,
+    ScaleFactor scale_factor ATTRIBUTE_UNUSED,
+    Location temp ATTRIBUTE_UNUSED,
+    bool needs_null_check ATTRIBUTE_UNUSED,
+    bool always_update_field ATTRIBUTE_UNUSED,
+    vixl::aarch32::Register* temp2 ATTRIBUTE_UNUSED) {
+  TODO_VIXL32(FATAL);
+}
+
+void CodeGeneratorARMVIXL::GenerateReadBarrierSlow(HInstruction* instruction ATTRIBUTE_UNUSED,
+                                                   Location out ATTRIBUTE_UNUSED,
+                                                   Location ref ATTRIBUTE_UNUSED,
+                                                   Location obj ATTRIBUTE_UNUSED,
+                                                   uint32_t offset ATTRIBUTE_UNUSED,
+                                                   Location index ATTRIBUTE_UNUSED) {
+  TODO_VIXL32(FATAL);
+}
+
 void CodeGeneratorARMVIXL::MaybeGenerateReadBarrierSlow(HInstruction* instruction ATTRIBUTE_UNUSED,
                                                         Location out,
                                                         Location ref ATTRIBUTE_UNUSED,
@@ -4165,7 +6118,10 @@
       if (current_method.IsRegister()) {
         method_reg = RegisterFrom(current_method);
       } else {
-        TODO_VIXL32(FATAL);
+        DCHECK(invoke->GetLocations()->Intrinsified());
+        DCHECK(!current_method.IsValid());
+        method_reg = temp_reg;
+        GetAssembler()->LoadFromOffset(kLoadWord, temp_reg, sp, kCurrentMethodStackOffset);
       }
       // /* ArtMethod*[] */ temp = temp.ptr_sized_fields_->dex_cache_resolved_methods_;
       GetAssembler()->LoadFromOffset(
@@ -4235,10 +6191,186 @@
   __ Blx(lr);
 }
 
+void LocationsBuilderARMVIXL::VisitMultiplyAccumulate(HMultiplyAccumulate* instr) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(instr, LocationSummary::kNoCall);
+  locations->SetInAt(HMultiplyAccumulate::kInputAccumulatorIndex,
+                     Location::RequiresRegister());
+  locations->SetInAt(HMultiplyAccumulate::kInputMulLeftIndex, Location::RequiresRegister());
+  locations->SetInAt(HMultiplyAccumulate::kInputMulRightIndex, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitMultiplyAccumulate(HMultiplyAccumulate* instr) {
+  vixl32::Register res = OutputRegister(instr);
+  vixl32::Register accumulator =
+      InputRegisterAt(instr, HMultiplyAccumulate::kInputAccumulatorIndex);
+  vixl32::Register mul_left =
+      InputRegisterAt(instr, HMultiplyAccumulate::kInputMulLeftIndex);
+  vixl32::Register mul_right =
+      InputRegisterAt(instr, HMultiplyAccumulate::kInputMulRightIndex);
+
+  if (instr->GetOpKind() == HInstruction::kAdd) {
+    __ Mla(res, mul_left, mul_right, accumulator);
+  } else {
+    __ Mls(res, mul_left, mul_right, accumulator);
+  }
+}
+
+void LocationsBuilderARMVIXL::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
+  // Nothing to do, this should be removed during prepare for register allocator.
+  LOG(FATAL) << "Unreachable";
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
+  // Nothing to do, this should be removed during prepare for register allocator.
+  LOG(FATAL) << "Unreachable";
+}
+
+// Simple implementation of packed switch - generate cascaded compare/jumps.
+void LocationsBuilderARMVIXL::VisitPackedSwitch(HPackedSwitch* switch_instr) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(switch_instr, LocationSummary::kNoCall);
+  locations->SetInAt(0, Location::RequiresRegister());
+  if (switch_instr->GetNumEntries() > kPackedSwitchCompareJumpThreshold &&
+      codegen_->GetAssembler()->GetVIXLAssembler()->IsUsingT32()) {
+    locations->AddTemp(Location::RequiresRegister());  // We need a temp for the table base.
+    if (switch_instr->GetStartValue() != 0) {
+      locations->AddTemp(Location::RequiresRegister());  // We need a temp for the bias.
+    }
+  }
+}
+
+// TODO(VIXL): Investigate and reach the parity with old arm codegen.
+void InstructionCodeGeneratorARMVIXL::VisitPackedSwitch(HPackedSwitch* switch_instr) {
+  int32_t lower_bound = switch_instr->GetStartValue();
+  uint32_t num_entries = switch_instr->GetNumEntries();
+  LocationSummary* locations = switch_instr->GetLocations();
+  vixl32::Register value_reg = InputRegisterAt(switch_instr, 0);
+  HBasicBlock* default_block = switch_instr->GetDefaultBlock();
+
+  if (num_entries <= kPackedSwitchCompareJumpThreshold ||
+      !codegen_->GetAssembler()->GetVIXLAssembler()->IsUsingT32()) {
+    // Create a series of compare/jumps.
+    UseScratchRegisterScope temps(GetAssembler()->GetVIXLAssembler());
+    vixl32::Register temp_reg = temps.Acquire();
+    // Note: It is fine for the below AddConstantSetFlags() using IP register to temporarily store
+    // the immediate, because IP is used as the destination register. For the other
+    // AddConstantSetFlags() and GenerateCompareWithImmediate(), the immediate values are constant,
+    // and they can be encoded in the instruction without making use of IP register.
+    __ Adds(temp_reg, value_reg, -lower_bound);
+
+    const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
+    // Jump to successors[0] if value == lower_bound.
+    __ B(eq, codegen_->GetLabelOf(successors[0]));
+    int32_t last_index = 0;
+    for (; num_entries - last_index > 2; last_index += 2) {
+      __ Adds(temp_reg, temp_reg, -2);
+      // Jump to successors[last_index + 1] if value < case_value[last_index + 2].
+      __ B(lo, codegen_->GetLabelOf(successors[last_index + 1]));
+      // Jump to successors[last_index + 2] if value == case_value[last_index + 2].
+      __ B(eq, codegen_->GetLabelOf(successors[last_index + 2]));
+    }
+    if (num_entries - last_index == 2) {
+      // The last missing case_value.
+      __ Cmp(temp_reg, 1);
+      __ B(eq, codegen_->GetLabelOf(successors[last_index + 1]));
+    }
+
+    // And the default for any other value.
+    if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) {
+      __ B(codegen_->GetLabelOf(default_block));
+    }
+  } else {
+    // Create a table lookup.
+    vixl32::Register table_base = RegisterFrom(locations->GetTemp(0));
+
+    JumpTableARMVIXL* jump_table = codegen_->CreateJumpTable(switch_instr);
+
+    // Remove the bias.
+    vixl32::Register key_reg;
+    if (lower_bound != 0) {
+      key_reg = RegisterFrom(locations->GetTemp(1));
+      __ Sub(key_reg, value_reg, lower_bound);
+    } else {
+      key_reg = value_reg;
+    }
+
+    // Check whether the value is in the table, jump to default block if not.
+    __ Cmp(key_reg, num_entries - 1);
+    __ B(hi, codegen_->GetLabelOf(default_block));
+
+    UseScratchRegisterScope temps(GetAssembler()->GetVIXLAssembler());
+    vixl32::Register jump_offset = temps.Acquire();
+
+    // Load jump offset from the table.
+    __ Adr(table_base, jump_table->GetTableStartLabel());
+    __ Ldr(jump_offset, MemOperand(table_base, key_reg, vixl32::LSL, 2));
+
+    // Jump to target block by branching to table_base(pc related) + offset.
+    vixl32::Register target_address = table_base;
+    __ Add(target_address, table_base, jump_offset);
+    __ Bx(target_address);
+
+    jump_table->EmitTable(codegen_);
+  }
+}
+
 // Copy the result of a call into the given target.
-void CodeGeneratorARMVIXL::MoveFromReturnRegister(Location trg ATTRIBUTE_UNUSED,
-                                                  Primitive::Type type ATTRIBUTE_UNUSED) {
-  TODO_VIXL32(FATAL);
+void CodeGeneratorARMVIXL::MoveFromReturnRegister(Location trg, Primitive::Type type) {
+  if (!trg.IsValid()) {
+    DCHECK_EQ(type, Primitive::kPrimVoid);
+    return;
+  }
+
+  DCHECK_NE(type, Primitive::kPrimVoid);
+
+  Location return_loc = InvokeDexCallingConventionVisitorARM().GetReturnLocation(type);
+  if (return_loc.Equals(trg)) {
+    return;
+  }
+
+  // TODO: Consider pairs in the parallel move resolver, then this could be nicely merged
+  //       with the last branch.
+  if (type == Primitive::kPrimLong) {
+    TODO_VIXL32(FATAL);
+  } else if (type == Primitive::kPrimDouble) {
+    TODO_VIXL32(FATAL);
+  } else {
+    // Let the parallel move resolver take care of all of this.
+    HParallelMove parallel_move(GetGraph()->GetArena());
+    parallel_move.AddMove(return_loc, trg, type, nullptr);
+    GetMoveResolver()->EmitNativeCode(&parallel_move);
+  }
+}
+
+void LocationsBuilderARMVIXL::VisitClassTableGet(HClassTableGet* instruction) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister());
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitClassTableGet(HClassTableGet* instruction) {
+  if (instruction->GetTableKind() == HClassTableGet::TableKind::kVTable) {
+    uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
+        instruction->GetIndex(), kArmPointerSize).SizeValue();
+    GetAssembler()->LoadFromOffset(kLoadWord,
+                                   OutputRegister(instruction),
+                                   InputRegisterAt(instruction, 0),
+                                   method_offset);
+  } else {
+    uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
+        instruction->GetIndex(), kArmPointerSize));
+    GetAssembler()->LoadFromOffset(kLoadWord,
+                                   OutputRegister(instruction),
+                                   InputRegisterAt(instruction, 0),
+                                   mirror::Class::ImtPtrOffset(kArmPointerSize).Uint32Value());
+    GetAssembler()->LoadFromOffset(kLoadWord,
+                                   OutputRegister(instruction),
+                                   OutputRegister(instruction),
+                                   method_offset);
+  }
 }
 
 #undef __
diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h
index 02bf960..bd91127 100644
--- a/compiler/optimizing/code_generator_arm_vixl.h
+++ b/compiler/optimizing/code_generator_arm_vixl.h
@@ -18,6 +18,7 @@
 #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_ARM_VIXL_H_
 
 #include "code_generator_arm.h"
+#include "common_arm.h"
 #include "utils/arm/assembler_arm_vixl.h"
 
 // TODO(VIXL): make vixl clean wrt -Wshadow.
@@ -106,13 +107,22 @@
   M(AboveOrEqual)                               \
   M(Add)                                        \
   M(And)                                        \
+  M(ArrayGet)                                   \
   M(ArrayLength)                                \
+  M(ArraySet)                                   \
   M(Below)                                      \
   M(BelowOrEqual)                               \
+  M(BitwiseNegatedRight)                        \
+  M(BooleanNot)                                 \
+  M(BoundsCheck)                                \
+  M(BoundType)                                  \
+  M(CheckCast)                                  \
+  M(ClassTableGet)                              \
   M(ClearException)                             \
   M(ClinitCheck)                                \
   M(Compare)                                    \
   M(CurrentMethod)                              \
+  M(Deoptimize)                                 \
   M(Div)                                        \
   M(DivZeroCheck)                               \
   M(DoubleConstant)                             \
@@ -125,8 +135,12 @@
   M(If)                                         \
   M(InstanceFieldGet)                           \
   M(InstanceFieldSet)                           \
+  M(InstanceOf)                                 \
   M(IntConstant)                                \
+  M(IntermediateAddress)                        \
+  M(InvokeInterface)                            \
   M(InvokeStaticOrDirect)                       \
+  M(InvokeUnresolved)                           \
   M(InvokeVirtual)                              \
   M(LessThan)                                   \
   M(LessThanOrEqual)                            \
@@ -135,7 +149,10 @@
   M(LoadString)                                 \
   M(LongConstant)                               \
   M(MemoryBarrier)                              \
+  M(MonitorOperation)                           \
   M(Mul)                                        \
+  M(MultiplyAccumulate)                         \
+  M(NativeDebugInfo)                            \
   M(Neg)                                        \
   M(NewArray)                                   \
   M(NewInstance)                                \
@@ -144,9 +161,11 @@
   M(NullCheck)                                  \
   M(NullConstant)                               \
   M(Or)                                         \
+  M(PackedSwitch)                               \
   M(ParallelMove)                               \
   M(ParameterValue)                             \
   M(Phi)                                        \
+  M(Rem)                                        \
   M(Return)                                     \
   M(ReturnVoid)                                 \
   M(Ror)                                        \
@@ -154,39 +173,53 @@
   M(Shl)                                        \
   M(Shr)                                        \
   M(StaticFieldGet)                             \
+  M(StaticFieldSet)                             \
   M(Sub)                                        \
   M(SuspendCheck)                               \
   M(Throw)                                      \
   M(TryBoundary)                                \
   M(TypeConversion)                             \
+  M(UnresolvedInstanceFieldGet)                 \
+  M(UnresolvedInstanceFieldSet)                 \
+  M(UnresolvedStaticFieldGet)                   \
+  M(UnresolvedStaticFieldSet)                   \
   M(UShr)                                       \
   M(Xor)                                        \
 
 // TODO: Remove once the VIXL32 backend is implemented completely.
 #define FOR_EACH_UNIMPLEMENTED_INSTRUCTION(M)   \
-  M(ArrayGet)                                   \
-  M(ArraySet)                                   \
-  M(BooleanNot)                                 \
-  M(BoundsCheck)                                \
-  M(BoundType)                                  \
-  M(CheckCast)                                  \
-  M(ClassTableGet)                              \
-  M(Deoptimize)                                 \
-  M(InstanceOf)                                 \
-  M(InvokeInterface)                            \
-  M(InvokeUnresolved)                           \
-  M(MonitorOperation)                           \
-  M(NativeDebugInfo)                            \
-  M(PackedSwitch)                               \
-  M(Rem)                                        \
-  M(StaticFieldSet)                             \
-  M(UnresolvedInstanceFieldGet)                 \
-  M(UnresolvedInstanceFieldSet)                 \
-  M(UnresolvedStaticFieldGet)                   \
-  M(UnresolvedStaticFieldSet)                   \
+  M(ArmDexCacheArraysBase)                      \
 
 class CodeGeneratorARMVIXL;
 
+class JumpTableARMVIXL : public DeletableArenaObject<kArenaAllocSwitchTable> {
+ public:
+  typedef vixl::aarch32::Literal<int32_t> IntLiteral;
+
+  explicit JumpTableARMVIXL(HPackedSwitch* switch_instr)
+      : switch_instr_(switch_instr),
+        table_start_(),
+        bb_addresses_(switch_instr->GetArena()->Adapter(kArenaAllocCodeGenerator)) {
+    uint32_t num_entries = switch_instr_->GetNumEntries();
+    for (uint32_t i = 0; i < num_entries; i++) {
+      IntLiteral *lit = new IntLiteral(0);
+      bb_addresses_.emplace_back(lit);
+    }
+  }
+
+  vixl::aarch32::Label* GetTableStartLabel() { return &table_start_; }
+
+  void EmitTable(CodeGeneratorARMVIXL* codegen);
+  void FixTable(CodeGeneratorARMVIXL* codegen);
+
+ private:
+  HPackedSwitch* const switch_instr_;
+  vixl::aarch32::Label table_start_;
+  ArenaVector<std::unique_ptr<IntLiteral>> bb_addresses_;
+
+  DISALLOW_COPY_AND_ASSIGN(JumpTableARMVIXL);
+};
+
 class InvokeRuntimeCallingConventionARMVIXL
     : public CallingConvention<vixl::aarch32::Register, vixl::aarch32::SRegister> {
  public:
@@ -215,6 +248,38 @@
   DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionARMVIXL);
 };
 
+class FieldAccessCallingConventionARMVIXL : public FieldAccessCallingConvention {
+ public:
+  FieldAccessCallingConventionARMVIXL() {}
+
+  Location GetObjectLocation() const OVERRIDE {
+    return helpers::LocationFrom(vixl::aarch32::r1);
+  }
+  Location GetFieldIndexLocation() const OVERRIDE {
+    return helpers::LocationFrom(vixl::aarch32::r0);
+  }
+  Location GetReturnLocation(Primitive::Type type) const OVERRIDE {
+    return Primitive::Is64BitType(type)
+        ? helpers::LocationFrom(vixl::aarch32::r0, vixl::aarch32::r1)
+        : helpers::LocationFrom(vixl::aarch32::r0);
+  }
+  Location GetSetValueLocation(Primitive::Type type, bool is_instance) const OVERRIDE {
+    return Primitive::Is64BitType(type)
+        ? helpers::LocationFrom(vixl::aarch32::r2, vixl::aarch32::r3)
+        : (is_instance
+            ? helpers::LocationFrom(vixl::aarch32::r2)
+            : helpers::LocationFrom(vixl::aarch32::r1));
+  }
+  Location GetFpuLocation(Primitive::Type type) const OVERRIDE {
+    return Primitive::Is64BitType(type)
+        ? helpers::LocationFrom(vixl::aarch32::s0, vixl::aarch32::s1)
+        : helpers::LocationFrom(vixl::aarch32::s0);
+  }
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(FieldAccessCallingConventionARMVIXL);
+};
+
 class SlowPathCodeARMVIXL : public SlowPathCode {
  public:
   explicit SlowPathCodeARMVIXL(HInstruction* instruction)
@@ -304,7 +369,7 @@
   FOR_EACH_UNIMPLEMENTED_INSTRUCTION(DEFINE_UNIMPLEMENTED_INSTRUCTION_VISITOR)
 
   ArmVIXLAssembler* GetAssembler() const { return assembler_; }
-  vixl::aarch32::MacroAssembler* GetVIXLAssembler() { return GetAssembler()->GetVIXLAssembler(); }
+  ArmVIXLMacroAssembler* GetVIXLAssembler() { return GetAssembler()->GetVIXLAssembler(); }
 
  private:
   void VisitUnimplemementedInstruction(HInstruction* instruction) {
@@ -321,6 +386,7 @@
   void GenerateAndConst(vixl::aarch32::Register out, vixl::aarch32::Register first, uint32_t value);
   void GenerateOrrConst(vixl::aarch32::Register out, vixl::aarch32::Register first, uint32_t value);
   void GenerateEorConst(vixl::aarch32::Register out, vixl::aarch32::Register first, uint32_t value);
+  void GenerateAddLongConst(Location out, Location first, uint64_t value);
   void HandleBitwiseOperation(HBinaryOperation* operation);
   void HandleCondition(HCondition* condition);
   void HandleIntegerRotate(HRor* ror);
@@ -344,6 +410,37 @@
                       bool value_can_be_null);
   void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
 
+  // Generate a heap reference load using one register `out`:
+  //
+  //   out <- *(out + offset)
+  //
+  // while honoring heap poisoning and/or read barriers (if any).
+  //
+  // Location `maybe_temp` is used when generating a read barrier and
+  // shall be a register in that case; it may be an invalid location
+  // otherwise.
+  void GenerateReferenceLoadOneRegister(HInstruction* instruction,
+                                        Location out,
+                                        uint32_t offset,
+                                        Location maybe_temp,
+                                        ReadBarrierOption read_barrier_option);
+  // Generate a heap reference load using two different registers
+  // `out` and `obj`:
+  //
+  //   out <- *(obj + offset)
+  //
+  // while honoring heap poisoning and/or read barriers (if any).
+  //
+  // Location `maybe_temp` is used when generating a Baker's (fast
+  // path) read barrier and shall be a register in that case; it may
+  // be an invalid location otherwise.
+  void GenerateReferenceLoadTwoRegisters(HInstruction* instruction,
+                                         Location out,
+                                         Location obj,
+                                         uint32_t offset,
+                                         Location maybe_temp,
+                                         ReadBarrierOption read_barrier_option);
+
   // Generate a GC root reference load:
   //
   //   root <- *(obj + offset)
@@ -357,7 +454,8 @@
   void GenerateTestAndBranch(HInstruction* instruction,
                              size_t condition_input_index,
                              vixl::aarch32::Label* true_target,
-                             vixl::aarch32::Label* false_target);
+                             vixl::aarch32::Label* false_target,
+                             bool far_target = true);
   void GenerateCompareTestAndBranch(HCondition* condition,
                                     vixl::aarch32::Label* true_target,
                                     vixl::aarch32::Label* false_target);
@@ -410,7 +508,7 @@
 
   const ArmVIXLAssembler& GetAssembler() const OVERRIDE { return assembler_; }
 
-  vixl::aarch32::MacroAssembler* GetVIXLAssembler() { return GetAssembler()->GetVIXLAssembler(); }
+  ArmVIXLMacroAssembler* GetVIXLAssembler() { return GetAssembler()->GetVIXLAssembler(); }
 
   size_t GetWordSize() const OVERRIDE { return kArmWordSize; }
 
@@ -422,10 +520,16 @@
     return block_entry_label->GetLocation();
   }
 
+  JumpTableARMVIXL* CreateJumpTable(HPackedSwitch* switch_instr) {
+    jump_tables_.emplace_back(new (GetGraph()->GetArena()) JumpTableARMVIXL(switch_instr));
+    return jump_tables_.back().get();
+  }
+
   HGraphVisitor* GetLocationBuilder() OVERRIDE { return &location_builder_; }
 
   HGraphVisitor* GetInstructionVisitor() OVERRIDE { return &instruction_visitor_; }
 
+  void FixJumpTables();
   void GenerateMemoryBarrier(MemBarrierKind kind);
   void Finalize(CodeAllocator* allocator) OVERRIDE;
   void SetupBlockedRegisters() const OVERRIDE;
@@ -438,6 +542,17 @@
   // Helper method to move a 32-bit value between two locations.
   void Move32(Location destination, Location source);
 
+  void LoadFromShiftedRegOffset(Primitive::Type type,
+                                Location out_loc,
+                                vixl::aarch32::Register base,
+                                vixl::aarch32::Register reg_index,
+                                vixl::aarch32::Condition cond = vixl::aarch32::al);
+  void StoreToShiftedRegOffset(Primitive::Type type,
+                               Location out_loc,
+                               vixl::aarch32::Register base,
+                               vixl::aarch32::Register reg_index,
+                               vixl::aarch32::Condition cond = vixl::aarch32::al);
+
   const ArmInstructionSetFeatures& GetInstructionSetFeatures() const { return isa_features_; }
 
   vixl::aarch32::Label* GetFrameEntryLabel() { return &frame_entry_label_; }
@@ -462,11 +577,7 @@
     return 0;
   }
 
-  size_t RestoreFloatingPointRegister(size_t stack_index ATTRIBUTE_UNUSED,
-                                      uint32_t reg_id ATTRIBUTE_UNUSED) OVERRIDE {
-    UNIMPLEMENTED(INFO) << "TODO: RestoreFloatingPointRegister";
-    return 0;
-  }
+  size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) OVERRIDE;
 
   bool NeedsTwoRegisters(Primitive::Type type) const OVERRIDE {
     return type == Primitive::kPrimDouble || type == Primitive::kPrimLong;
@@ -502,6 +613,62 @@
                   vixl::aarch32::Register value,
                   bool can_be_null);
 
+  // Fast path implementation of ReadBarrier::Barrier for a heap
+  // reference field load when Baker's read barriers are used.
+  void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
+                                             Location ref,
+                                             vixl::aarch32::Register obj,
+                                             uint32_t offset,
+                                             Location temp,
+                                             bool needs_null_check);
+
+  // Factored implementation, used by GenerateFieldLoadWithBakerReadBarrier,
+  // GenerateArrayLoadWithBakerReadBarrier and some intrinsics.
+  //
+  // Load the object reference located at the address
+  // `obj + offset + (index << scale_factor)`, held by object `obj`, into
+  // `ref`, and mark it if needed.
+  //
+  // If `always_update_field` is true, the value of the reference is
+  // atomically updated in the holder (`obj`).  This operation
+  // requires an extra temporary register, which must be provided as a
+  // non-null pointer (`temp2`).
+  void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
+                                                 Location ref,
+                                                 vixl::aarch32::Register obj,
+                                                 uint32_t offset,
+                                                 Location index,
+                                                 ScaleFactor scale_factor,
+                                                 Location temp,
+                                                 bool needs_null_check,
+                                                 bool always_update_field = false,
+                                                 vixl::aarch32::Register* temp2 = nullptr);
+
+  // Generate a read barrier for a heap reference within `instruction`
+  // using a slow path.
+  //
+  // A read barrier for an object reference read from the heap is
+  // implemented as a call to the artReadBarrierSlow runtime entry
+  // point, which is passed the values in locations `ref`, `obj`, and
+  // `offset`:
+  //
+  //   mirror::Object* artReadBarrierSlow(mirror::Object* ref,
+  //                                      mirror::Object* obj,
+  //                                      uint32_t offset);
+  //
+  // The `out` location contains the value returned by
+  // artReadBarrierSlow.
+  //
+  // When `index` is provided (i.e. for array accesses), the offset
+  // value passed to artReadBarrierSlow is adjusted to take `index`
+  // into account.
+  void GenerateReadBarrierSlow(HInstruction* instruction,
+                               Location out,
+                               Location ref,
+                               Location obj,
+                               uint32_t offset,
+                               Location index = Location::NoLocation());
+
   // If read barriers are enabled, generate a read barrier for a heap
   // reference using a slow path. If heap poisoning is enabled, also
   // unpoison the reference in `out`.
@@ -544,6 +711,7 @@
   ArenaDeque<vixl::aarch32::Label> block_labels_;  // Indexed by block id.
   vixl::aarch32::Label frame_entry_label_;
 
+  ArenaVector<std::unique_ptr<JumpTableARMVIXL>> jump_tables_;
   LocationsBuilderARMVIXL location_builder_;
   InstructionCodeGeneratorARMVIXL instruction_visitor_;
   ParallelMoveResolverARMVIXL move_resolver_;
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index f4a804f..61dabfa 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -224,7 +224,7 @@
     SaveLiveRegisters(codegen, locations);
 
     InvokeRuntimeCallingConvention calling_convention;
-    __ LoadConst32(calling_convention.GetRegisterAt(0), cls_->GetTypeIndex());
+    __ LoadConst32(calling_convention.GetRegisterAt(0), cls_->GetTypeIndex().index_);
 
     QuickEntrypointEnum entrypoint = do_clinit_ ? kQuickInitializeStaticStorage
                                                 : kQuickInitializeType;
@@ -280,7 +280,7 @@
 
     InvokeRuntimeCallingConvention calling_convention;
     HLoadString* load = instruction_->AsLoadString();
-    const uint32_t string_index = load->GetStringIndex();
+    const uint32_t string_index = load->GetStringIndex().index_;
     __ LoadConst32(calling_convention.GetRegisterAt(0), string_index);
     mips_codegen->InvokeRuntime(kQuickResolveString, instruction_, instruction_->GetDexPc(), this);
     CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
@@ -378,7 +378,6 @@
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     LocationSummary* locations = instruction_->GetLocations();
-    Location object_class = instruction_->IsCheckCast() ? locations->GetTemp(0) : locations->Out();
     uint32_t dex_pc = instruction_->GetDexPc();
     DCHECK(instruction_->IsCheckCast()
            || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
@@ -390,24 +389,22 @@
     // We're moving two locations to locations that could overlap, so we need a parallel
     // move resolver.
     InvokeRuntimeCallingConvention calling_convention;
-    codegen->EmitParallelMoves(locations->InAt(1),
+    codegen->EmitParallelMoves(locations->InAt(0),
                                Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
                                Primitive::kPrimNot,
-                               object_class,
+                               locations->InAt(1),
                                Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
                                Primitive::kPrimNot);
-
     if (instruction_->IsInstanceOf()) {
       mips_codegen->InvokeRuntime(kQuickInstanceofNonTrivial, instruction_, dex_pc, this);
-      CheckEntrypointTypes<
-          kQuickInstanceofNonTrivial, size_t, const mirror::Class*, const mirror::Class*>();
+      CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t, mirror::Object*, mirror::Class*>();
       Primitive::Type ret_type = instruction_->GetType();
       Location ret_loc = calling_convention.GetReturnLocation(ret_type);
       mips_codegen->MoveLocation(locations->Out(), ret_loc, ret_type);
     } else {
       DCHECK(instruction_->IsCheckCast());
-      mips_codegen->InvokeRuntime(kQuickCheckCast, instruction_, dex_pc, this);
-      CheckEntrypointTypes<kQuickCheckCast, void, const mirror::Class*, const mirror::Class*>();
+      mips_codegen->InvokeRuntime(kQuickCheckInstanceOf, instruction_, dex_pc, this);
+      CheckEntrypointTypes<kQuickCheckInstanceOf, void, mirror::Object*, mirror::Class*>();
     }
 
     RestoreLiveRegisters(codegen, locations);
@@ -563,8 +560,7 @@
     DCHECK_EQ(type, Primitive::kPrimFloat);  // Can only swap a float.
     FRegister f1 = loc1.IsFpuRegister() ? loc1.AsFpuRegister<FRegister>()
                                         : loc2.AsFpuRegister<FRegister>();
-    Register r2 = loc1.IsRegister() ? loc1.AsRegister<Register>()
-                                    : loc2.AsRegister<Register>();
+    Register r2 = loc1.IsRegister() ? loc1.AsRegister<Register>() : loc2.AsRegister<Register>();
     __ Move(TMP, r2);
     __ Mfc1(r2, f1);
     __ Mtc1(TMP, f1);
@@ -605,10 +601,8 @@
     Exchange(loc1.GetStackIndex(), loc2.GetStackIndex(), /* double_slot */ true);
   } else if ((loc1.IsRegister() && loc2.IsStackSlot()) ||
              (loc1.IsStackSlot() && loc2.IsRegister())) {
-    Register reg = loc1.IsRegister() ? loc1.AsRegister<Register>()
-                                     : loc2.AsRegister<Register>();
-    intptr_t offset = loc1.IsStackSlot() ? loc1.GetStackIndex()
-                                         : loc2.GetStackIndex();
+    Register reg = loc1.IsRegister() ? loc1.AsRegister<Register>() : loc2.AsRegister<Register>();
+    intptr_t offset = loc1.IsStackSlot() ? loc1.GetStackIndex() : loc2.GetStackIndex();
     __ Move(TMP, reg);
     __ LoadFromOffset(kLoadWord, reg, SP, offset);
     __ StoreToOffset(kStoreWord, TMP, SP, offset);
@@ -618,8 +612,7 @@
                                            : loc2.AsRegisterPairLow<Register>();
     Register reg_h = loc1.IsRegisterPair() ? loc1.AsRegisterPairHigh<Register>()
                                            : loc2.AsRegisterPairHigh<Register>();
-    intptr_t offset_l = loc1.IsDoubleStackSlot() ? loc1.GetStackIndex()
-                                                 : loc2.GetStackIndex();
+    intptr_t offset_l = loc1.IsDoubleStackSlot() ? loc1.GetStackIndex() : loc2.GetStackIndex();
     intptr_t offset_h = loc1.IsDoubleStackSlot() ? loc1.GetHighStackIndex(kMipsWordSize)
                                                  : loc2.GetHighStackIndex(kMipsWordSize);
     __ Move(TMP, reg_l);
@@ -628,6 +621,20 @@
     __ Move(TMP, reg_h);
     __ LoadFromOffset(kLoadWord, reg_h, SP, offset_h);
     __ StoreToOffset(kStoreWord, TMP, SP, offset_h);
+  } else if (loc1.IsFpuRegister() || loc2.IsFpuRegister()) {
+    FRegister reg = loc1.IsFpuRegister() ? loc1.AsFpuRegister<FRegister>()
+                                         : loc2.AsFpuRegister<FRegister>();
+    intptr_t offset = loc1.IsFpuRegister() ? loc2.GetStackIndex() : loc1.GetStackIndex();
+    if (type == Primitive::kPrimFloat) {
+      __ MovS(FTMP, reg);
+      __ LoadSFromOffset(reg, SP, offset);
+      __ StoreSToOffset(FTMP, SP, offset);
+    } else {
+      DCHECK_EQ(type, Primitive::kPrimDouble);
+      __ MovD(FTMP, reg);
+      __ LoadDFromOffset(reg, SP, offset);
+      __ StoreDToOffset(FTMP, SP, offset);
+    }
   } else {
     LOG(FATAL) << "Swap between " << loc1 << " and " << loc2 << " is unsupported";
   }
@@ -1040,7 +1047,7 @@
     uint32_t literal_offset = __ GetLabelLocation(literal->GetLabel());
     linker_patches->push_back(LinkerPatch::StringPatch(literal_offset,
                                                        target_string.dex_file,
-                                                       target_string.string_index));
+                                                       target_string.string_index.index_));
   }
   for (const auto& entry : boot_image_type_patches_) {
     const TypeReference& target_type = entry.first;
@@ -1049,7 +1056,7 @@
     uint32_t literal_offset = __ GetLabelLocation(literal->GetLabel());
     linker_patches->push_back(LinkerPatch::TypePatch(literal_offset,
                                                      target_type.dex_file,
-                                                     target_type.type_index));
+                                                     target_type.type_index.index_));
   }
   for (const auto& entry : boot_image_address_patches_) {
     DCHECK(GetCompilerOptions().GetIncludePatchInformation());
@@ -1066,8 +1073,8 @@
 }
 
 CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewPcRelativeTypePatch(
-    const DexFile& dex_file, uint32_t type_index) {
-  return NewPcRelativePatch(dex_file, type_index, &pc_relative_type_patches_);
+    const DexFile& dex_file, dex::TypeIndex type_index) {
+  return NewPcRelativePatch(dex_file, type_index.index_, &pc_relative_type_patches_);
 }
 
 CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewPcRelativeDexCacheArrayPatch(
@@ -1103,14 +1110,14 @@
 }
 
 Literal* CodeGeneratorMIPS::DeduplicateBootImageStringLiteral(const DexFile& dex_file,
-                                                              uint32_t string_index) {
+                                                              dex::StringIndex string_index) {
   return boot_image_string_patches_.GetOrCreate(
       StringReference(&dex_file, string_index),
       [this]() { return __ NewLiteral<uint32_t>(/* placeholder */ 0u); });
 }
 
 Literal* CodeGeneratorMIPS::DeduplicateBootImageTypeLiteral(const DexFile& dex_file,
-                                                            uint32_t type_index) {
+                                                            dex::TypeIndex type_index) {
   return boot_image_type_patches_.GetOrCreate(
       TypeReference(&dex_file, type_index),
       [this]() { return __ NewLiteral<uint32_t>(/* placeholder */ 0u); });
@@ -4681,6 +4688,16 @@
   }
 }
 
+void LocationsBuilderMIPS::VisitShouldDeoptimizeFlag(
+    HShouldDeoptimizeFlag* flag ATTRIBUTE_UNUSED) {
+  // TODO: to be implemented.
+}
+
+void InstructionCodeGeneratorMIPS::VisitShouldDeoptimizeFlag(
+    HShouldDeoptimizeFlag* flag ATTRIBUTE_UNUSED) {
+  // TODO: to be implemented.
+}
+
 void LocationsBuilderMIPS::VisitSelect(HSelect* select) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(select);
   CanMoveConditionally(select, codegen_->GetInstructionSetFeatures().IsR6(), locations);
@@ -5194,16 +5211,17 @@
       break;
     case HLoadString::LoadKind::kBootImageAddress:
       break;
-    case HLoadString::LoadKind::kDexCacheAddress:
-      DCHECK(Runtime::Current()->UseJitCompilation());
-      fallback_load = false;
-      break;
     case HLoadString::LoadKind::kBssEntry:
       DCHECK(!Runtime::Current()->UseJitCompilation());
       break;
     case HLoadString::LoadKind::kDexCacheViaMethod:
       fallback_load = false;
       break;
+    case HLoadString::LoadKind::kJitTableAddress:
+      DCHECK(Runtime::Current()->UseJitCompilation());
+      // TODO: implement.
+      fallback_load = true;
+      break;
   }
   if (fallback_load) {
     desired_string_load_kind = HLoadString::LoadKind::kDexCacheViaMethod;
@@ -5531,7 +5549,7 @@
 void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) {
   LocationSummary* locations = cls->GetLocations();
   if (cls->NeedsAccessCheck()) {
-    codegen_->MoveConstant(locations->GetTemp(0), cls->GetTypeIndex());
+    codegen_->MoveConstant(locations->GetTemp(0), cls->GetTypeIndex().index_);
     codegen_->InvokeRuntime(kQuickInitializeTypeAndVerifyAccess, cls, cls->GetDexPc());
     CheckEntrypointTypes<kQuickInitializeTypeAndVerifyAccess, void*, uint32_t>();
     return;
@@ -5625,7 +5643,7 @@
                         base_or_current_method_reg,
                         ArtMethod::DexCacheResolvedTypesOffset(kArmPointerSize).Int32Value());
       // /* GcRoot<mirror::Class> */ out = out[type_index]
-      size_t offset = CodeGenerator::GetCacheOffset(cls->GetTypeIndex());
+      size_t offset = CodeGenerator::GetCacheOffset(cls->GetTypeIndex().index_);
       GenerateGcRootFieldLoad(cls, out_loc, out, offset);
       generate_null_check = !cls->IsInDexCache();
     }
@@ -5695,7 +5713,12 @@
     default:
       break;
   }
-  locations->SetOut(Location::RequiresRegister());
+  if (load_kind == HLoadString::LoadKind::kDexCacheViaMethod) {
+    InvokeRuntimeCallingConvention calling_convention;
+    locations->SetOut(calling_convention.GetReturnLocation(load->GetType()));
+  } else {
+    locations->SetOut(Location::RequiresRegister());
+  }
 }
 
 void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) {
@@ -5730,7 +5753,7 @@
       DCHECK(!kEmitCompilerReadBarrier);
       DCHECK(codegen_->GetCompilerOptions().IsBootImage());
       CodeGeneratorMIPS::PcRelativePatchInfo* info =
-          codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex());
+          codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex().index_);
       codegen_->EmitPcRelativeAddressPlaceholder(info, out, base_or_current_method_reg);
       return;  // No dex cache slow path.
     }
@@ -5746,7 +5769,7 @@
     case HLoadString::LoadKind::kBssEntry: {
       DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
       CodeGeneratorMIPS::PcRelativePatchInfo* info =
-          codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex());
+          codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex().index_);
       codegen_->EmitPcRelativeAddressPlaceholder(info, out, base_or_current_method_reg);
       __ LoadFromOffset(kLoadWord, out, out, 0);
       SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathMIPS(load);
@@ -5762,7 +5785,7 @@
   // TODO: Re-add the compiler code to do string dex cache lookup again.
   DCHECK(load_kind == HLoadString::LoadKind::kDexCacheViaMethod);
   InvokeRuntimeCallingConvention calling_convention;
-  __ LoadConst32(calling_convention.GetRegisterAt(0), load->GetStringIndex());
+  __ LoadConst32(calling_convention.GetRegisterAt(0), load->GetStringIndex().index_);
   codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc());
   CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
 }
@@ -5962,7 +5985,7 @@
   Register current_method_register = calling_convention.GetRegisterAt(2);
   __ Lw(current_method_register, SP, kCurrentMethodStackOffset);
   // Move an uint16_t value to a register.
-  __ LoadConst32(calling_convention.GetRegisterAt(0), instruction->GetTypeIndex());
+  __ LoadConst32(calling_convention.GetRegisterAt(0), instruction->GetTypeIndex().index_);
   codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc());
   CheckEntrypointTypes<kQuickAllocArrayWithAccessCheck,
                        void*, uint32_t, int32_t, ArtMethod*>();
diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h
index e132819..2273e52 100644
--- a/compiler/optimizing/code_generator_mips.h
+++ b/compiler/optimizing/code_generator_mips.h
@@ -18,6 +18,7 @@
 #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_MIPS_H_
 
 #include "code_generator.h"
+#include "dex_file_types.h"
 #include "driver/compiler_options.h"
 #include "nodes.h"
 #include "parallel_move_resolver.h"
@@ -449,11 +450,12 @@
   };
 
   PcRelativePatchInfo* NewPcRelativeStringPatch(const DexFile& dex_file, uint32_t string_index);
-  PcRelativePatchInfo* NewPcRelativeTypePatch(const DexFile& dex_file, uint32_t type_index);
+  PcRelativePatchInfo* NewPcRelativeTypePatch(const DexFile& dex_file, dex::TypeIndex type_index);
   PcRelativePatchInfo* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file,
                                                        uint32_t element_offset);
-  Literal* DeduplicateBootImageStringLiteral(const DexFile& dex_file, uint32_t string_index);
-  Literal* DeduplicateBootImageTypeLiteral(const DexFile& dex_file, uint32_t type_index);
+  Literal* DeduplicateBootImageStringLiteral(const DexFile& dex_file,
+                                             dex::StringIndex string_index);
+  Literal* DeduplicateBootImageTypeLiteral(const DexFile& dex_file, dex::TypeIndex type_index);
   Literal* DeduplicateBootImageAddressLiteral(uint32_t address);
 
   void EmitPcRelativeAddressPlaceholder(PcRelativePatchInfo* info, Register out, Register base);
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index 010bf24..b1f9b1d 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -180,7 +180,7 @@
     SaveLiveRegisters(codegen, locations);
 
     InvokeRuntimeCallingConvention calling_convention;
-    __ LoadConst32(calling_convention.GetRegisterAt(0), cls_->GetTypeIndex());
+    __ LoadConst32(calling_convention.GetRegisterAt(0), cls_->GetTypeIndex().index_);
     QuickEntrypointEnum entrypoint = do_clinit_ ? kQuickInitializeStaticStorage
                                                 : kQuickInitializeType;
     mips64_codegen->InvokeRuntime(entrypoint, at_, dex_pc_, this);
@@ -234,7 +234,7 @@
     SaveLiveRegisters(codegen, locations);
 
     InvokeRuntimeCallingConvention calling_convention;
-    const uint32_t string_index = instruction_->AsLoadString()->GetStringIndex();
+    const uint32_t string_index = instruction_->AsLoadString()->GetStringIndex().index_;
     __ LoadConst32(calling_convention.GetRegisterAt(0), string_index);
     mips64_codegen->InvokeRuntime(kQuickResolveString,
                                   instruction_,
@@ -322,7 +322,7 @@
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     LocationSummary* locations = instruction_->GetLocations();
-    Location object_class = instruction_->IsCheckCast() ? locations->GetTemp(0) : locations->Out();
+
     uint32_t dex_pc = instruction_->GetDexPc();
     DCHECK(instruction_->IsCheckCast()
            || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
@@ -334,24 +334,22 @@
     // We're moving two locations to locations that could overlap, so we need a parallel
     // move resolver.
     InvokeRuntimeCallingConvention calling_convention;
-    codegen->EmitParallelMoves(locations->InAt(1),
+    codegen->EmitParallelMoves(locations->InAt(0),
                                Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
                                Primitive::kPrimNot,
-                               object_class,
+                               locations->InAt(1),
                                Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
                                Primitive::kPrimNot);
-
     if (instruction_->IsInstanceOf()) {
       mips64_codegen->InvokeRuntime(kQuickInstanceofNonTrivial, instruction_, dex_pc, this);
-      CheckEntrypointTypes<
-          kQuickInstanceofNonTrivial, size_t, const mirror::Class*, const mirror::Class*>();
+      CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t, mirror::Object*, mirror::Class*>();
       Primitive::Type ret_type = instruction_->GetType();
       Location ret_loc = calling_convention.GetReturnLocation(ret_type);
       mips64_codegen->MoveLocation(locations->Out(), ret_loc, ret_type);
     } else {
       DCHECK(instruction_->IsCheckCast());
-      mips64_codegen->InvokeRuntime(kQuickCheckCast, instruction_, dex_pc, this);
-      CheckEntrypointTypes<kQuickCheckCast, void, const mirror::Class*, const mirror::Class*>();
+      mips64_codegen->InvokeRuntime(kQuickCheckInstanceOf, instruction_, dex_pc, this);
+      CheckEntrypointTypes<kQuickCheckInstanceOf, void, mirror::Object*, mirror::Class*>();
     }
 
     RestoreLiveRegisters(codegen, locations);
@@ -2638,6 +2636,16 @@
                         /* false_target */ nullptr);
 }
 
+void LocationsBuilderMIPS64::VisitShouldDeoptimizeFlag(
+    HShouldDeoptimizeFlag* flag ATTRIBUTE_UNUSED) {
+  // TODO: to be implemented.
+}
+
+void InstructionCodeGeneratorMIPS64::VisitShouldDeoptimizeFlag(
+    HShouldDeoptimizeFlag* flag ATTRIBUTE_UNUSED) {
+  // TODO: to be implemented.
+}
+
 void LocationsBuilderMIPS64::VisitSelect(HSelect* select) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(select);
   if (Primitive::IsFloatingPointType(select->GetType())) {
@@ -3159,7 +3167,7 @@
 void InstructionCodeGeneratorMIPS64::VisitLoadClass(HLoadClass* cls) {
   LocationSummary* locations = cls->GetLocations();
   if (cls->NeedsAccessCheck()) {
-    codegen_->MoveConstant(locations->GetTemp(0), cls->GetTypeIndex());
+    codegen_->MoveConstant(locations->GetTemp(0), cls->GetTypeIndex().index_);
     codegen_->InvokeRuntime(kQuickInitializeTypeAndVerifyAccess, cls, cls->GetDexPc());
     CheckEntrypointTypes<kQuickInitializeTypeAndVerifyAccess, void*, uint32_t>();
     return;
@@ -3176,7 +3184,7 @@
     __ LoadFromOffset(kLoadDoubleword, out, current_method,
                       ArtMethod::DexCacheResolvedTypesOffset(kMips64PointerSize).Int32Value());
     __ LoadFromOffset(
-        kLoadUnsignedWord, out, out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex()));
+        kLoadUnsignedWord, out, out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex().index_));
     // TODO: We will need a read barrier here.
     if (!cls->IsInDexCache() || cls->MustGenerateClinitCheck()) {
       DCHECK(cls->CanCallRuntime());
@@ -3384,7 +3392,8 @@
 void InstructionCodeGeneratorMIPS64::VisitNewArray(HNewArray* instruction) {
   LocationSummary* locations = instruction->GetLocations();
   // Move an uint16_t value to a register.
-  __ LoadConst32(locations->GetTemp(0).AsRegister<GpuRegister>(), instruction->GetTypeIndex());
+  __ LoadConst32(locations->GetTemp(0).AsRegister<GpuRegister>(),
+                 instruction->GetTypeIndex().index_);
   codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc());
   CheckEntrypointTypes<kQuickAllocArrayWithAccessCheck, void*, uint32_t, int32_t, ArtMethod*>();
 }
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index efd33c7..d6e92cc 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -151,7 +151,7 @@
       }
       __ movl(length_loc.AsRegister<Register>(), array_len);
       if (mirror::kUseStringCompression) {
-        __ andl(length_loc.AsRegister<Register>(), Immediate(INT32_MAX));
+        __ shrl(length_loc.AsRegister<Register>(), Immediate(1));
       }
     }
     x86_codegen->EmitParallelMoves(
@@ -225,7 +225,7 @@
     SaveLiveRegisters(codegen, locations);
 
     InvokeRuntimeCallingConvention calling_convention;
-    const uint32_t string_index = instruction_->AsLoadString()->GetStringIndex();
+    const uint32_t string_index = instruction_->AsLoadString()->GetStringIndex().index_;
     __ movl(calling_convention.GetRegisterAt(0), Immediate(string_index));
     x86_codegen->InvokeRuntime(kQuickResolveString, instruction_, instruction_->GetDexPc(), this);
     CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
@@ -265,7 +265,7 @@
     SaveLiveRegisters(codegen, locations);
 
     InvokeRuntimeCallingConvention calling_convention;
-    __ movl(calling_convention.GetRegisterAt(0), Immediate(cls_->GetTypeIndex()));
+    __ movl(calling_convention.GetRegisterAt(0), Immediate(cls_->GetTypeIndex().index_));
     x86_codegen->InvokeRuntime(do_clinit_ ? kQuickInitializeStaticStorage
                                           : kQuickInitializeType,
                                at_, dex_pc_, this);
@@ -312,8 +312,6 @@
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     LocationSummary* locations = instruction_->GetLocations();
-    Location object_class = instruction_->IsCheckCast() ? locations->GetTemp(0)
-                                                        : locations->Out();
     DCHECK(instruction_->IsCheckCast()
            || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
 
@@ -327,25 +325,25 @@
     // We're moving two locations to locations that could overlap, so we need a parallel
     // move resolver.
     InvokeRuntimeCallingConvention calling_convention;
-    x86_codegen->EmitParallelMoves(
-        locations->InAt(1),
-        Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
-        Primitive::kPrimNot,
-        object_class,
-        Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
-        Primitive::kPrimNot);
-
+    x86_codegen->EmitParallelMoves(locations->InAt(0),
+                                   Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
+                                   Primitive::kPrimNot,
+                                   locations->InAt(1),
+                                   Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
+                                   Primitive::kPrimNot);
     if (instruction_->IsInstanceOf()) {
       x86_codegen->InvokeRuntime(kQuickInstanceofNonTrivial,
                                  instruction_,
                                  instruction_->GetDexPc(),
                                  this);
-      CheckEntrypointTypes<
-          kQuickInstanceofNonTrivial, size_t, const mirror::Class*, const mirror::Class*>();
+      CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t, mirror::Object*, mirror::Class*>();
     } else {
       DCHECK(instruction_->IsCheckCast());
-      x86_codegen->InvokeRuntime(kQuickCheckCast, instruction_, instruction_->GetDexPc(), this);
-      CheckEntrypointTypes<kQuickCheckCast, void, const mirror::Class*, const mirror::Class*>();
+      x86_codegen->InvokeRuntime(kQuickCheckInstanceOf,
+                                 instruction_,
+                                 instruction_->GetDexPc(),
+                                 this);
+      CheckEntrypointTypes<kQuickCheckInstanceOf, void, mirror::Object*, mirror::Class*>();
     }
 
     if (!is_fatal_) {
@@ -1014,6 +1012,7 @@
       simple_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      jit_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       constant_area_start_(-1),
       fixups_to_jump_tables_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       method_address_offset_(-1) {
@@ -1060,6 +1059,11 @@
     }
   }
 
+  if (GetGraph()->HasShouldDeoptimizeFlag()) {
+    // Initialize should_deoptimize flag to 0.
+    __ movl(Address(ESP, -kShouldDeoptimizeFlagSize), Immediate(0));
+  }
+
   int adjust = GetFrameSize() - FrameEntrySpillSize();
   __ subl(ESP, Immediate(adjust));
   __ cfi().AdjustCFAOffset(adjust);
@@ -1677,6 +1681,17 @@
                                /* false_target */ nullptr);
 }
 
+void LocationsBuilderX86::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
+  LocationSummary* locations = new (GetGraph()->GetArena())
+      LocationSummary(flag, LocationSummary::kNoCall);
+  locations->SetOut(Location::RequiresRegister());
+}
+
+void InstructionCodeGeneratorX86::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
+  __ movl(flag->GetLocations()->Out().AsRegister<Register>(),
+          Address(ESP, codegen_->GetStackOffsetOfShouldDeoptimizeFlag()));
+}
+
 static bool SelectCanUseCMOV(HSelect* select) {
   // There are no conditional move instructions for XMMs.
   if (Primitive::IsFloatingPointType(select->GetType())) {
@@ -4169,7 +4184,7 @@
 
 void InstructionCodeGeneratorX86::VisitNewArray(HNewArray* instruction) {
   InvokeRuntimeCallingConvention calling_convention;
-  __ movl(calling_convention.GetRegisterAt(0), Immediate(instruction->GetTypeIndex()));
+  __ movl(calling_convention.GetRegisterAt(0), Immediate(instruction->GetTypeIndex().index_));
   // Note: if heap poisoning is enabled, the entry point takes cares
   // of poisoning the reference.
   codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc());
@@ -4608,18 +4623,18 @@
 
 void CodeGeneratorX86::RecordBootStringPatch(HLoadString* load_string) {
   DCHECK(GetCompilerOptions().IsBootImage());
-  string_patches_.emplace_back(load_string->GetDexFile(), load_string->GetStringIndex());
+  string_patches_.emplace_back(load_string->GetDexFile(), load_string->GetStringIndex().index_);
   __ Bind(&string_patches_.back().label);
 }
 
 void CodeGeneratorX86::RecordTypePatch(HLoadClass* load_class) {
-  type_patches_.emplace_back(load_class->GetDexFile(), load_class->GetTypeIndex());
+  type_patches_.emplace_back(load_class->GetDexFile(), load_class->GetTypeIndex().index_);
   __ Bind(&type_patches_.back().label);
 }
 
 Label* CodeGeneratorX86::NewStringBssEntryPatch(HLoadString* load_string) {
   DCHECK(!GetCompilerOptions().IsBootImage());
-  string_patches_.emplace_back(load_string->GetDexFile(), load_string->GetStringIndex());
+  string_patches_.emplace_back(load_string->GetDexFile(), load_string->GetStringIndex().index_);
   return &string_patches_.back().label;
 }
 
@@ -5237,9 +5252,11 @@
         // Branch cases into compressed and uncompressed for each index's type.
         uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
         NearLabel done, not_compressed;
-        __ cmpl(Address(obj, count_offset), Immediate(0));
+        __ testl(Address(obj, count_offset), Immediate(1));
         codegen_->MaybeRecordImplicitNullCheck(instruction);
-        __ j(kGreaterEqual, &not_compressed);
+        static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
+                      "Expecting 0=compressed, 1=uncompressed");
+        __ j(kNotZero, &not_compressed);
         __ movzxb(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_1, data_offset));
         __ jmp(&done);
         __ Bind(&not_compressed);
@@ -5589,7 +5606,7 @@
   codegen_->MaybeRecordImplicitNullCheck(instruction);
   // Mask out most significant bit in case the array is String's array of char.
   if (mirror::kUseStringCompression && instruction->IsStringLength()) {
-    __ andl(out, Immediate(INT32_MAX));
+    __ shrl(out, Immediate(1));
   }
 }
 
@@ -5648,10 +5665,12 @@
       Location array_loc = array_length->GetLocations()->InAt(0);
       Address array_len(array_loc.AsRegister<Register>(), len_offset);
       if (is_string_compressed_char_at) {
+        // TODO: if index_loc.IsConstant(), compare twice the index (to compensate for
+        // the string compression flag) with the in-memory length and avoid the temporary.
         Register length_reg = locations->GetTemp(0).AsRegister<Register>();
         __ movl(length_reg, array_len);
         codegen_->MaybeRecordImplicitNullCheck(array_length);
-        __ andl(length_reg, Immediate(INT32_MAX));
+        __ shrl(length_reg, Immediate(1));
         codegen_->GenerateIntCompare(length_reg, index_loc);
       } else {
         // Checking bounds for general case:
@@ -6057,7 +6076,7 @@
 void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) {
   LocationSummary* locations = cls->GetLocations();
   if (cls->NeedsAccessCheck()) {
-    codegen_->MoveConstant(locations->GetTemp(0), cls->GetTypeIndex());
+    codegen_->MoveConstant(locations->GetTemp(0), cls->GetTypeIndex().index_);
     codegen_->InvokeRuntime(kQuickInitializeTypeAndVerifyAccess, cls, cls->GetDexPc());
     CheckEntrypointTypes<kQuickInitializeTypeAndVerifyAccess, void*, uint32_t>();
     return;
@@ -6067,7 +6086,9 @@
   Register out = out_loc.AsRegister<Register>();
 
   bool generate_null_check = false;
-  const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage();
+  const ReadBarrierOption read_barrier_option = cls->IsInBootImage()
+      ? kWithoutReadBarrier
+      : kCompilerReadBarrierOption;
   switch (cls->GetLoadKind()) {
     case HLoadClass::LoadKind::kReferrersClass: {
       DCHECK(!cls->CanCallRuntime());
@@ -6079,24 +6100,24 @@
           out_loc,
           Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()),
           /* fixup_label */ nullptr,
-          requires_read_barrier);
+          read_barrier_option);
       break;
     }
     case HLoadClass::LoadKind::kBootImageLinkTimeAddress: {
-      DCHECK(!requires_read_barrier);
+      DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
       __ movl(out, Immediate(/* placeholder */ 0));
       codegen_->RecordTypePatch(cls);
       break;
     }
     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: {
-      DCHECK(!requires_read_barrier);
+      DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
       Register method_address = locations->InAt(0).AsRegister<Register>();
       __ leal(out, Address(method_address, CodeGeneratorX86::kDummy32BitOffset));
       codegen_->RecordTypePatch(cls);
       break;
     }
     case HLoadClass::LoadKind::kBootImageAddress: {
-      DCHECK(!requires_read_barrier);
+      DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
       DCHECK_NE(cls->GetAddress(), 0u);
       uint32_t address = dchecked_integral_cast<uint32_t>(cls->GetAddress());
       __ movl(out, Immediate(address));
@@ -6111,7 +6132,7 @@
                               out_loc,
                               Address::Absolute(address),
                               /* fixup_label */ nullptr,
-                              requires_read_barrier);
+                              read_barrier_option);
       generate_null_check = !cls->IsInDexCache();
       break;
     }
@@ -6124,7 +6145,7 @@
                               out_loc,
                               Address(base_reg, CodeGeneratorX86::kDummy32BitOffset),
                               fixup_label,
-                              requires_read_barrier);
+                              read_barrier_option);
       generate_null_check = !cls->IsInDexCache();
       break;
     }
@@ -6137,9 +6158,10 @@
       // /* GcRoot<mirror::Class> */ out = out[type_index]
       GenerateGcRootFieldLoad(cls,
                               out_loc,
-                              Address(out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex())),
+                              Address(out,
+                                      CodeGenerator::GetCacheOffset(cls->GetTypeIndex().index_)),
                               /* fixup_label */ nullptr,
-                              requires_read_barrier);
+                              read_barrier_option);
       generate_null_check = !cls->IsInDexCache();
       break;
     }
@@ -6212,21 +6234,17 @@
       break;
     case HLoadString::LoadKind::kBootImageAddress:
       break;
-    case HLoadString::LoadKind::kDexCacheAddress:
-      DCHECK(Runtime::Current()->UseJitCompilation());
-      break;
     case HLoadString::LoadKind::kDexCacheViaMethod:
       break;
+    case HLoadString::LoadKind::kJitTableAddress:
+      DCHECK(Runtime::Current()->UseJitCompilation());
+      break;
   }
   return desired_string_load_kind;
 }
 
 void LocationsBuilderX86::VisitLoadString(HLoadString* load) {
-  LocationSummary::CallKind call_kind = (load->NeedsEnvironment() || kEmitCompilerReadBarrier)
-      ? ((load->GetLoadKind() == HLoadString::LoadKind::kDexCacheViaMethod)
-          ? LocationSummary::kCallOnMainOnly
-          : LocationSummary::kCallOnSlowPath)
-      : LocationSummary::kNoCall;
+  LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load);
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind);
   HLoadString::LoadKind load_kind = load->GetLoadKind();
   if (load_kind == HLoadString::LoadKind::kBootImageLinkTimePcRelative ||
@@ -6251,6 +6269,15 @@
   }
 }
 
+Label* CodeGeneratorX86::NewJitRootStringPatch(const DexFile& dex_file,
+                                               dex::StringIndex dex_index) {
+  jit_string_roots_.Overwrite(StringReference(&dex_file, dex_index), /* placeholder */ 0u);
+  // Add a patch entry and return the label.
+  jit_string_patches_.emplace_back(dex_file, dex_index.index_);
+  PatchInfo<Label>* info = &jit_string_patches_.back();
+  return &info->label;
+}
+
 void InstructionCodeGeneratorX86::VisitLoadString(HLoadString* load) {
   LocationSummary* locations = load->GetLocations();
   Location out_loc = locations->Out();
@@ -6279,8 +6306,8 @@
       Register method_address = locations->InAt(0).AsRegister<Register>();
       Address address = Address(method_address, CodeGeneratorX86::kDummy32BitOffset);
       Label* fixup_label = codegen_->NewStringBssEntryPatch(load);
-      // /* GcRoot<mirror::Class> */ out = *address  /* PC-relative */
-      GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, kEmitCompilerReadBarrier);
+      // /* GcRoot<mirror::String> */ out = *address  /* PC-relative */
+      GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, kCompilerReadBarrierOption);
       SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathX86(load);
       codegen_->AddSlowPath(slow_path);
       __ testl(out, out);
@@ -6288,6 +6315,14 @@
       __ Bind(slow_path->GetExitLabel());
       return;
     }
+    case HLoadString::LoadKind::kJitTableAddress: {
+      Address address = Address::Absolute(CodeGeneratorX86::kDummy32BitOffset);
+      Label* fixup_label = codegen_->NewJitRootStringPatch(
+          load->GetDexFile(), load->GetStringIndex());
+      // /* GcRoot<mirror::String> */ out = *address
+      GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, kCompilerReadBarrierOption);
+      return;
+    }
     default:
       break;
   }
@@ -6295,7 +6330,7 @@
   // TODO: Re-add the compiler code to do string dex cache lookup again.
   InvokeRuntimeCallingConvention calling_convention;
   DCHECK_EQ(calling_convention.GetRegisterAt(0), out);
-  __ movl(calling_convention.GetRegisterAt(0), Immediate(load->GetStringIndex()));
+  __ movl(calling_convention.GetRegisterAt(0), Immediate(load->GetStringIndex().index_));
   codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc());
   CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
 }
@@ -6334,12 +6369,26 @@
   CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
 }
 
-static bool TypeCheckNeedsATemporary(TypeCheckKind type_check_kind) {
-  return kEmitCompilerReadBarrier &&
+// Temp is used for read barrier.
+static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) {
+  if (kEmitCompilerReadBarrier &&
       !kUseBakerReadBarrier &&
       (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
        type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
-       type_check_kind == TypeCheckKind::kArrayObjectCheck);
+       type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
+    return 1;
+  }
+  return 0;
+}
+
+// Interface case has 3 temps, one for holding the number of interfaces, one for the current
+// interface pointer, one for loading the current interface.
+// The other checks have one temp for loading the object's class.
+static size_t NumberOfCheckCastTemps(TypeCheckKind type_check_kind) {
+  if (type_check_kind == TypeCheckKind::kInterfaceCheck && !kPoisonHeapReferences) {
+    return 2;
+  }
+  return 1 + NumberOfInstanceOfTemps(type_check_kind);
 }
 
 void LocationsBuilderX86::VisitInstanceOf(HInstanceOf* instruction) {
@@ -6370,11 +6419,8 @@
   locations->SetInAt(1, Location::Any());
   // Note that TypeCheckSlowPathX86 uses this "out" register too.
   locations->SetOut(Location::RequiresRegister());
-  // When read barriers are enabled, we need a temporary register for
-  // some cases.
-  if (TypeCheckNeedsATemporary(type_check_kind)) {
-    locations->AddTemp(Location::RequiresRegister());
-  }
+  // When read barriers are enabled, we need a temporary register for some cases.
+  locations->AddRegisterTemps(NumberOfInstanceOfTemps(type_check_kind));
 }
 
 void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) {
@@ -6385,9 +6431,9 @@
   Location cls = locations->InAt(1);
   Location out_loc = locations->Out();
   Register out = out_loc.AsRegister<Register>();
-  Location maybe_temp_loc = TypeCheckNeedsATemporary(type_check_kind) ?
-      locations->GetTemp(0) :
-      Location::NoLocation();
+  const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind);
+  DCHECK_LE(num_temps, 1u);
+  Location maybe_temp_loc = (num_temps >= 1) ? locations->GetTemp(0) : Location::NoLocation();
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
@@ -6402,11 +6448,14 @@
     __ j(kEqual, &zero);
   }
 
-  // /* HeapReference<Class> */ out = obj->klass_
-  GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset);
-
   switch (type_check_kind) {
     case TypeCheckKind::kExactCheck: {
+      // /* HeapReference<Class> */ out = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        out_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        kCompilerReadBarrierOption);
       if (cls.IsRegister()) {
         __ cmpl(out, cls.AsRegister<Register>());
       } else {
@@ -6422,12 +6471,22 @@
     }
 
     case TypeCheckKind::kAbstractClassCheck: {
+      // /* HeapReference<Class> */ out = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        out_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        kCompilerReadBarrierOption);
       // If the class is abstract, we eagerly fetch the super class of the
       // object to avoid doing a comparison we know will fail.
       NearLabel loop;
       __ Bind(&loop);
       // /* HeapReference<Class> */ out = out->super_class_
-      GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, maybe_temp_loc);
+      GenerateReferenceLoadOneRegister(instruction,
+                                       out_loc,
+                                       super_offset,
+                                       maybe_temp_loc,
+                                       kCompilerReadBarrierOption);
       __ testl(out, out);
       // If `out` is null, we use it for the result, and jump to `done`.
       __ j(kEqual, &done);
@@ -6446,6 +6505,12 @@
     }
 
     case TypeCheckKind::kClassHierarchyCheck: {
+      // /* HeapReference<Class> */ out = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        out_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        kCompilerReadBarrierOption);
       // Walk over the class hierarchy to find a match.
       NearLabel loop, success;
       __ Bind(&loop);
@@ -6457,7 +6522,11 @@
       }
       __ j(kEqual, &success);
       // /* HeapReference<Class> */ out = out->super_class_
-      GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, maybe_temp_loc);
+      GenerateReferenceLoadOneRegister(instruction,
+                                       out_loc,
+                                       super_offset,
+                                       maybe_temp_loc,
+                                       kCompilerReadBarrierOption);
       __ testl(out, out);
       __ j(kNotEqual, &loop);
       // If `out` is null, we use it for the result, and jump to `done`.
@@ -6471,6 +6540,12 @@
     }
 
     case TypeCheckKind::kArrayObjectCheck: {
+      // /* HeapReference<Class> */ out = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        out_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        kCompilerReadBarrierOption);
       // Do an exact check.
       NearLabel exact_check;
       if (cls.IsRegister()) {
@@ -6482,7 +6557,11 @@
       __ j(kEqual, &exact_check);
       // Otherwise, we need to check that the object's class is a non-primitive array.
       // /* HeapReference<Class> */ out = out->component_type_
-      GenerateReferenceLoadOneRegister(instruction, out_loc, component_offset, maybe_temp_loc);
+      GenerateReferenceLoadOneRegister(instruction,
+                                       out_loc,
+                                       component_offset,
+                                       maybe_temp_loc,
+                                       kCompilerReadBarrierOption);
       __ testl(out, out);
       // If `out` is null, we use it for the result, and jump to `done`.
       __ j(kEqual, &done);
@@ -6495,6 +6574,13 @@
     }
 
     case TypeCheckKind::kArrayCheck: {
+      // No read barrier since the slow path will retry upon failure.
+      // /* HeapReference<Class> */ out = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        out_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        kWithoutReadBarrier);
       if (cls.IsRegister()) {
         __ cmpl(out, cls.AsRegister<Register>());
       } else {
@@ -6559,35 +6645,43 @@
   }
 }
 
+static bool IsTypeCheckSlowPathFatal(TypeCheckKind type_check_kind, bool throws_into_catch) {
+  switch (type_check_kind) {
+  case TypeCheckKind::kExactCheck:
+  case TypeCheckKind::kAbstractClassCheck:
+  case TypeCheckKind::kClassHierarchyCheck:
+  case TypeCheckKind::kArrayObjectCheck:
+    return !throws_into_catch && !kEmitCompilerReadBarrier;
+  case TypeCheckKind::kInterfaceCheck:
+    return !throws_into_catch && !kEmitCompilerReadBarrier && !kPoisonHeapReferences;
+  case TypeCheckKind::kArrayCheck:
+  case TypeCheckKind::kUnresolvedCheck:
+    return false;
+  }
+  LOG(FATAL) << "Unreachable";
+  UNREACHABLE();
+}
+
 void LocationsBuilderX86::VisitCheckCast(HCheckCast* instruction) {
-  LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
   bool throws_into_catch = instruction->CanThrowIntoCatchBlock();
   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
-  switch (type_check_kind) {
-    case TypeCheckKind::kExactCheck:
-    case TypeCheckKind::kAbstractClassCheck:
-    case TypeCheckKind::kClassHierarchyCheck:
-    case TypeCheckKind::kArrayObjectCheck:
-      call_kind = (throws_into_catch || kEmitCompilerReadBarrier) ?
-          LocationSummary::kCallOnSlowPath :
-          LocationSummary::kNoCall;  // In fact, call on a fatal (non-returning) slow path.
-      break;
-    case TypeCheckKind::kArrayCheck:
-    case TypeCheckKind::kUnresolvedCheck:
-    case TypeCheckKind::kInterfaceCheck:
-      call_kind = LocationSummary::kCallOnSlowPath;
-      break;
-  }
+  LocationSummary::CallKind call_kind =
+      IsTypeCheckSlowPathFatal(type_check_kind, throws_into_catch)
+          ? LocationSummary::kNoCall
+          : LocationSummary::kCallOnSlowPath;
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
   locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::Any());
+  if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
+    // Require a register for the interface check since there is a loop that compares the class to
+    // a memory address.
+    locations->SetInAt(1, Location::RequiresRegister());
+  } else {
+    locations->SetInAt(1, Location::Any());
+  }
   // Note that TypeCheckSlowPathX86 uses this "temp" register too.
   locations->AddTemp(Location::RequiresRegister());
-  // When read barriers are enabled, we need an additional temporary
-  // register for some cases.
-  if (TypeCheckNeedsATemporary(type_check_kind)) {
-    locations->AddTemp(Location::RequiresRegister());
-  }
+  // When read barriers are enabled, we need an additional temporary register for some cases.
+  locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind));
 }
 
 void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) {
@@ -6598,20 +6692,25 @@
   Location cls = locations->InAt(1);
   Location temp_loc = locations->GetTemp(0);
   Register temp = temp_loc.AsRegister<Register>();
-  Location maybe_temp2_loc = TypeCheckNeedsATemporary(type_check_kind) ?
-      locations->GetTemp(1) :
-      Location::NoLocation();
-  uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
-  uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
-  uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
-  uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
+  const size_t num_temps = NumberOfCheckCastTemps(type_check_kind);
+  DCHECK_GE(num_temps, 1u);
+  DCHECK_LE(num_temps, 2u);
+  Location maybe_temp2_loc = (num_temps >= 2) ? locations->GetTemp(1) : Location::NoLocation();
+  const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
+  const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
+  const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
+  const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
+  const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value();
+  const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value();
+  const uint32_t object_array_data_offset =
+      mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
 
+  // Always false for read barriers since we may need to go to the entrypoint for non-fatal cases
+  // from false negatives. The false negatives may come from avoiding read barriers below. Avoiding
+  // read barriers is done for performance and code size reasons.
   bool is_type_check_slow_path_fatal =
-      (type_check_kind == TypeCheckKind::kExactCheck ||
-       type_check_kind == TypeCheckKind::kAbstractClassCheck ||
-       type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
-       type_check_kind == TypeCheckKind::kArrayObjectCheck) &&
-      !instruction->CanThrowIntoCatchBlock();
+      IsTypeCheckSlowPathFatal(type_check_kind, instruction->CanThrowIntoCatchBlock());
+
   SlowPathCode* type_check_slow_path =
       new (GetGraph()->GetArena()) TypeCheckSlowPathX86(instruction,
                                                         is_type_check_slow_path_fatal);
@@ -6624,12 +6723,16 @@
     __ j(kEqual, &done);
   }
 
-  // /* HeapReference<Class> */ temp = obj->klass_
-  GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset);
-
   switch (type_check_kind) {
     case TypeCheckKind::kExactCheck:
     case TypeCheckKind::kArrayCheck: {
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        temp_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        kWithoutReadBarrier);
+
       if (cls.IsRegister()) {
         __ cmpl(temp, cls.AsRegister<Register>());
       } else {
@@ -6643,28 +6746,30 @@
     }
 
     case TypeCheckKind::kAbstractClassCheck: {
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        temp_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        kWithoutReadBarrier);
+
       // If the class is abstract, we eagerly fetch the super class of the
       // object to avoid doing a comparison we know will fail.
-      NearLabel loop, compare_classes;
+      NearLabel loop;
       __ Bind(&loop);
       // /* HeapReference<Class> */ temp = temp->super_class_
-      GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, maybe_temp2_loc);
+      GenerateReferenceLoadOneRegister(instruction,
+                                       temp_loc,
+                                       super_offset,
+                                       maybe_temp2_loc,
+                                       kWithoutReadBarrier);
 
-      // If the class reference currently in `temp` is not null, jump
-      // to the `compare_classes` label to compare it with the checked
-      // class.
+      // If the class reference currently in `temp` is null, jump to the slow path to throw the
+      // exception.
       __ testl(temp, temp);
-      __ j(kNotEqual, &compare_classes);
-      // Otherwise, jump to the slow path to throw the exception.
-      //
-      // But before, move back the object's class into `temp` before
-      // going into the slow path, as it has been overwritten in the
-      // meantime.
-      // /* HeapReference<Class> */ temp = obj->klass_
-      GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset);
-      __ jmp(type_check_slow_path->GetEntryLabel());
+      __ j(kZero, type_check_slow_path->GetEntryLabel());
 
-      __ Bind(&compare_classes);
+      // Otherwise, compare the classes
       if (cls.IsRegister()) {
         __ cmpl(temp, cls.AsRegister<Register>());
       } else {
@@ -6676,6 +6781,13 @@
     }
 
     case TypeCheckKind::kClassHierarchyCheck: {
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        temp_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        kWithoutReadBarrier);
+
       // Walk over the class hierarchy to find a match.
       NearLabel loop;
       __ Bind(&loop);
@@ -6688,26 +6800,30 @@
       __ j(kEqual, &done);
 
       // /* HeapReference<Class> */ temp = temp->super_class_
-      GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, maybe_temp2_loc);
+      GenerateReferenceLoadOneRegister(instruction,
+                                       temp_loc,
+                                       super_offset,
+                                       maybe_temp2_loc,
+                                       kWithoutReadBarrier);
 
       // If the class reference currently in `temp` is not null, jump
       // back at the beginning of the loop.
       __ testl(temp, temp);
-      __ j(kNotEqual, &loop);
-      // Otherwise, jump to the slow path to throw the exception.
-      //
-      // But before, move back the object's class into `temp` before
-      // going into the slow path, as it has been overwritten in the
-      // meantime.
-      // /* HeapReference<Class> */ temp = obj->klass_
-      GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset);
+      __ j(kNotZero, &loop);
+      // Otherwise, jump to the slow path to throw the exception.;
       __ jmp(type_check_slow_path->GetEntryLabel());
       break;
     }
 
     case TypeCheckKind::kArrayObjectCheck: {
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        temp_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        kWithoutReadBarrier);
+
       // Do an exact check.
-      NearLabel check_non_primitive_component_type;
       if (cls.IsRegister()) {
         __ cmpl(temp, cls.AsRegister<Register>());
       } else {
@@ -6718,38 +6834,24 @@
 
       // Otherwise, we need to check that the object's class is a non-primitive array.
       // /* HeapReference<Class> */ temp = temp->component_type_
-      GenerateReferenceLoadOneRegister(instruction, temp_loc, component_offset, maybe_temp2_loc);
+      GenerateReferenceLoadOneRegister(instruction,
+                                       temp_loc,
+                                       component_offset,
+                                       maybe_temp2_loc,
+                                       kWithoutReadBarrier);
 
-      // If the component type is not null (i.e. the object is indeed
-      // an array), jump to label `check_non_primitive_component_type`
-      // to further check that this component type is not a primitive
-      // type.
+      // If the component type is null (i.e. the object not an array),  jump to the slow path to
+      // throw the exception. Otherwise proceed with the check.
       __ testl(temp, temp);
-      __ j(kNotEqual, &check_non_primitive_component_type);
-      // Otherwise, jump to the slow path to throw the exception.
-      //
-      // But before, move back the object's class into `temp` before
-      // going into the slow path, as it has been overwritten in the
-      // meantime.
-      // /* HeapReference<Class> */ temp = obj->klass_
-      GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset);
-      __ jmp(type_check_slow_path->GetEntryLabel());
+      __ j(kZero, type_check_slow_path->GetEntryLabel());
 
-      __ Bind(&check_non_primitive_component_type);
       __ cmpw(Address(temp, primitive_offset), Immediate(Primitive::kPrimNot));
-      __ j(kEqual, &done);
-      // Same comment as above regarding `temp` and the slow path.
-      // /* HeapReference<Class> */ temp = obj->klass_
-      GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset);
-      __ jmp(type_check_slow_path->GetEntryLabel());
+      __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
       break;
     }
 
     case TypeCheckKind::kUnresolvedCheck:
-    case TypeCheckKind::kInterfaceCheck:
-      // We always go into the type check slow path for the unresolved
-      // and interface check cases.
-      //
+      // We always go into the type check slow path for the unresolved check case.
       // We cannot directly call the CheckCast runtime entry point
       // without resorting to a type checking slow path here (i.e. by
       // calling InvokeRuntime directly), as it would require to
@@ -6757,15 +6859,50 @@
       // instruction (following the runtime calling convention), which
       // might be cluttered by the potential first read barrier
       // emission at the beginning of this method.
-      //
-      // TODO: Introduce a new runtime entry point taking the object
-      // to test (instead of its class) as argument, and let it deal
-      // with the read barrier issues. This will let us refactor this
-      // case of the `switch` code as it was previously (with a direct
-      // call to the runtime not using a type checking slow path).
-      // This should also be beneficial for the other cases above.
       __ jmp(type_check_slow_path->GetEntryLabel());
       break;
+
+    case TypeCheckKind::kInterfaceCheck: {
+      // Fast path for the interface check. Since we compare with a memory location in the inner
+      // loop we would need to have cls poisoned. However unpoisoning cls would reset the
+      // conditional flags and cause the conditional jump to be incorrect. Therefore we just jump
+      // to the slow path if we are running under poisoning.
+      if (!kPoisonHeapReferences) {
+        // Try to avoid read barriers to improve the fast path. We can not get false positives by
+        // doing this.
+        // /* HeapReference<Class> */ temp = obj->klass_
+        GenerateReferenceLoadTwoRegisters(instruction,
+                                          temp_loc,
+                                          obj_loc,
+                                          class_offset,
+                                          kWithoutReadBarrier);
+
+        // /* HeapReference<Class> */ temp = temp->iftable_
+        GenerateReferenceLoadTwoRegisters(instruction,
+                                          temp_loc,
+                                          temp_loc,
+                                          iftable_offset,
+                                          kWithoutReadBarrier);
+        // Iftable is never null.
+        __ movl(maybe_temp2_loc.AsRegister<Register>(), Address(temp, array_length_offset));
+        // Loop through the iftable and check if any class matches.
+        NearLabel start_loop;
+        __ Bind(&start_loop);
+        // Need to subtract first to handle the empty array case.
+        __ subl(maybe_temp2_loc.AsRegister<Register>(), Immediate(2));
+        __ j(kNegative, type_check_slow_path->GetEntryLabel());
+        // Go to next interface if the classes do not match.
+        __ cmpl(cls.AsRegister<Register>(),
+                CodeGeneratorX86::ArrayAddress(temp,
+                                               maybe_temp2_loc,
+                                               TIMES_4,
+                                               object_array_data_offset));
+        __ j(kNotEqual, &start_loop);
+      } else {
+        __ jmp(type_check_slow_path->GetEntryLabel());
+      }
+      break;
+    }
   }
   __ Bind(&done);
 
@@ -6924,12 +7061,15 @@
   }
 }
 
-void InstructionCodeGeneratorX86::GenerateReferenceLoadOneRegister(HInstruction* instruction,
-                                                                   Location out,
-                                                                   uint32_t offset,
-                                                                   Location maybe_temp) {
+void InstructionCodeGeneratorX86::GenerateReferenceLoadOneRegister(
+    HInstruction* instruction,
+    Location out,
+    uint32_t offset,
+    Location maybe_temp,
+    ReadBarrierOption read_barrier_option) {
   Register out_reg = out.AsRegister<Register>();
-  if (kEmitCompilerReadBarrier) {
+  if (read_barrier_option == kWithReadBarrier) {
+    CHECK(kEmitCompilerReadBarrier);
     if (kUseBakerReadBarrier) {
       // Load with fast path based Baker's read barrier.
       // /* HeapReference<Object> */ out = *(out + offset)
@@ -6954,13 +7094,16 @@
   }
 }
 
-void InstructionCodeGeneratorX86::GenerateReferenceLoadTwoRegisters(HInstruction* instruction,
-                                                                    Location out,
-                                                                    Location obj,
-                                                                    uint32_t offset) {
+void InstructionCodeGeneratorX86::GenerateReferenceLoadTwoRegisters(
+    HInstruction* instruction,
+    Location out,
+    Location obj,
+    uint32_t offset,
+    ReadBarrierOption read_barrier_option) {
   Register out_reg = out.AsRegister<Register>();
   Register obj_reg = obj.AsRegister<Register>();
-  if (kEmitCompilerReadBarrier) {
+  if (read_barrier_option == kWithReadBarrier) {
+    CHECK(kEmitCompilerReadBarrier);
     if (kUseBakerReadBarrier) {
       // Load with fast path based Baker's read barrier.
       // /* HeapReference<Object> */ out = *(obj + offset)
@@ -6980,13 +7123,14 @@
   }
 }
 
-void InstructionCodeGeneratorX86::GenerateGcRootFieldLoad(HInstruction* instruction,
-                                                          Location root,
-                                                          const Address& address,
-                                                          Label* fixup_label,
-                                                          bool requires_read_barrier) {
+void InstructionCodeGeneratorX86::GenerateGcRootFieldLoad(
+    HInstruction* instruction,
+    Location root,
+    const Address& address,
+    Label* fixup_label,
+    ReadBarrierOption read_barrier_option) {
   Register root_reg = root.AsRegister<Register>();
-  if (requires_read_barrier) {
+  if (read_barrier_option == kWithReadBarrier) {
     DCHECK(kEmitCompilerReadBarrier);
     if (kUseBakerReadBarrier) {
       // Fast path implementation of art::ReadBarrier::BarrierForRoot when
@@ -7093,7 +7237,7 @@
   //   uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
   //   lfence;  // Load fence or artificial data dependency to prevent load-load reordering
   //   HeapReference<Object> ref = *src;  // Original reference load.
-  //   bool is_gray = (rb_state == ReadBarrier::gray_ptr_);
+  //   bool is_gray = (rb_state == ReadBarrier::GrayState());
   //   if (is_gray) {
   //     ref = ReadBarrier::Mark(ref);  // Performed by runtime entrypoint slow path.
   //   }
@@ -7111,14 +7255,13 @@
   uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
 
   // Given the numeric representation, it's enough to check the low bit of the rb_state.
-  static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0");
-  static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1");
-  static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2");
+  static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
+  static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
   constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte;
   constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte;
   constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position);
 
-  // if (rb_state == ReadBarrier::gray_ptr_)
+  // if (rb_state == ReadBarrier::GrayState())
   //   ref = ReadBarrier::Mark(ref);
   // At this point, just do the "if" and make sure that flags are preserved until the branch.
   __ testb(Address(obj, monitor_offset + gray_byte_position), Immediate(test_value));
@@ -7453,7 +7596,7 @@
     // The value to patch is the distance from the offset in the constant area
     // from the address computed by the HX86ComputeBaseMethodAddress instruction.
     int32_t constant_offset = codegen_->ConstantAreaStart() + offset_into_constant_area_;
-    int32_t relative_position = constant_offset - codegen_->GetMethodAddressOffset();;
+    int32_t relative_position = constant_offset - codegen_->GetMethodAddressOffset();
 
     // Patch in the right value.
     region.StoreUnaligned<int32_t>(pos - 4, relative_position);
@@ -7627,6 +7770,21 @@
   }
 }
 
+void CodeGeneratorX86::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
+  for (const PatchInfo<Label>& info : jit_string_patches_) {
+    const auto& it = jit_string_roots_.find(StringReference(&info.dex_file,
+                                                            dex::StringIndex(info.index)));
+    DCHECK(it != jit_string_roots_.end());
+    size_t index_in_table = it->second;
+    uint32_t code_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
+    uintptr_t address =
+        reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>);
+    typedef __attribute__((__aligned__(1))) uint32_t unaligned_uint32_t;
+    reinterpret_cast<unaligned_uint32_t*>(code + code_offset)[0] =
+       dchecked_integral_cast<uint32_t>(address);
+  }
+}
+
 #undef __
 
 }  // namespace x86
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index 1b51999..2ae3670 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -20,6 +20,7 @@
 #include "arch/x86/instruction_set_features_x86.h"
 #include "base/enums.h"
 #include "code_generator.h"
+#include "dex_file_types.h"
 #include "driver/compiler_options.h"
 #include "nodes.h"
 #include "parallel_move_resolver.h"
@@ -240,7 +241,8 @@
   void GenerateReferenceLoadOneRegister(HInstruction* instruction,
                                         Location out,
                                         uint32_t offset,
-                                        Location maybe_temp);
+                                        Location maybe_temp,
+                                        ReadBarrierOption read_barrier_option);
   // Generate a heap reference load using two different registers
   // `out` and `obj`:
   //
@@ -254,17 +256,18 @@
   void GenerateReferenceLoadTwoRegisters(HInstruction* instruction,
                                          Location out,
                                          Location obj,
-                                         uint32_t offset);
+                                         uint32_t offset,
+                                         ReadBarrierOption read_barrier_option);
   // Generate a GC root reference load:
   //
   //   root <- *address
   //
-  // while honoring read barriers if `requires_read_barrier` is true.
+  // while honoring read barriers based on read_barrier_option.
   void GenerateGcRootFieldLoad(HInstruction* instruction,
                                Location root,
                                const Address& address,
                                Label* fixup_label,
-                               bool requires_read_barrier);
+                               ReadBarrierOption read_barrier_option);
 
   // Push value to FPU stack. `is_fp` specifies whether the value is floating point or not.
   // `is_wide` specifies whether it is long/double or not.
@@ -412,12 +415,15 @@
   void RecordTypePatch(HLoadClass* load_class);
   Label* NewStringBssEntryPatch(HLoadString* load_string);
   Label* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file, uint32_t element_offset);
+  Label* NewJitRootStringPatch(const DexFile& dex_file, dex::StringIndex dex_index);
 
   void MoveFromReturnRegister(Location trg, Primitive::Type type) OVERRIDE;
 
   // Emit linker patches.
   void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) OVERRIDE;
 
+  void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) OVERRIDE;
+
   // Emit a write barrier.
   void MarkGCCard(Register temp,
                   Register card,
@@ -614,6 +620,9 @@
   // Type patch locations.
   ArenaDeque<PatchInfo<Label>> type_patches_;
 
+  // Patches for string root accesses in JIT compiled code.
+  ArenaDeque<PatchInfo<Label>> jit_string_patches_;
+
   // Offset to the start of the constant area in the assembled code.
   // Used for fixups to the constant area.
   int32_t constant_area_start_;
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index fcabeea..4474dec 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -199,7 +199,7 @@
       }
       __ movl(length_loc.AsRegister<CpuRegister>(), array_len);
       if (mirror::kUseStringCompression) {
-        __ andl(length_loc.AsRegister<CpuRegister>(), Immediate(INT32_MAX));
+        __ shrl(length_loc.AsRegister<CpuRegister>(), Immediate(1));
       }
     }
 
@@ -246,7 +246,8 @@
     SaveLiveRegisters(codegen, locations);
 
     InvokeRuntimeCallingConvention calling_convention;
-    __ movl(CpuRegister(calling_convention.GetRegisterAt(0)), Immediate(cls_->GetTypeIndex()));
+    __ movl(CpuRegister(calling_convention.GetRegisterAt(0)),
+            Immediate(cls_->GetTypeIndex().index_));
     x86_64_codegen->InvokeRuntime(do_clinit_ ? kQuickInitializeStaticStorage : kQuickInitializeType,
                                   at_,
                                   dex_pc_,
@@ -299,7 +300,7 @@
     __ Bind(GetEntryLabel());
     SaveLiveRegisters(codegen, locations);
 
-    const uint32_t string_index = instruction_->AsLoadString()->GetStringIndex();
+    const uint32_t string_index = instruction_->AsLoadString()->GetStringIndex().index_;
     // Custom calling convention: RAX serves as both input and output.
     __ movl(CpuRegister(RAX), Immediate(string_index));
     x86_64_codegen->InvokeRuntime(kQuickResolveString,
@@ -332,8 +333,6 @@
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     LocationSummary* locations = instruction_->GetLocations();
-    Location object_class = instruction_->IsCheckCast() ? locations->GetTemp(0)
-                                                        : locations->Out();
     uint32_t dex_pc = instruction_->GetDexPc();
     DCHECK(instruction_->IsCheckCast()
            || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
@@ -348,22 +347,19 @@
     // We're moving two locations to locations that could overlap, so we need a parallel
     // move resolver.
     InvokeRuntimeCallingConvention calling_convention;
-    codegen->EmitParallelMoves(
-        locations->InAt(1),
-        Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
-        Primitive::kPrimNot,
-        object_class,
-        Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
-        Primitive::kPrimNot);
-
+    codegen->EmitParallelMoves(locations->InAt(0),
+                               Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
+                               Primitive::kPrimNot,
+                               locations->InAt(1),
+                               Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
+                               Primitive::kPrimNot);
     if (instruction_->IsInstanceOf()) {
       x86_64_codegen->InvokeRuntime(kQuickInstanceofNonTrivial, instruction_, dex_pc, this);
-      CheckEntrypointTypes<
-          kQuickInstanceofNonTrivial, size_t, const mirror::Class*, const mirror::Class*>();
+      CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t, mirror::Object*, mirror::Class*>();
     } else {
       DCHECK(instruction_->IsCheckCast());
-      x86_64_codegen->InvokeRuntime(kQuickCheckCast, instruction_, dex_pc, this);
-      CheckEntrypointTypes<kQuickCheckCast, void, const mirror::Class*, const mirror::Class*>();
+      x86_64_codegen->InvokeRuntime(kQuickCheckInstanceOf, instruction_, dex_pc, this);
+      CheckEntrypointTypes<kQuickCheckInstanceOf, void, mirror::Object*, mirror::Class*>();
     }
 
     if (!is_fatal_) {
@@ -1110,18 +1106,18 @@
 
 void CodeGeneratorX86_64::RecordBootStringPatch(HLoadString* load_string) {
   DCHECK(GetCompilerOptions().IsBootImage());
-  string_patches_.emplace_back(load_string->GetDexFile(), load_string->GetStringIndex());
+  string_patches_.emplace_back(load_string->GetDexFile(), load_string->GetStringIndex().index_);
   __ Bind(&string_patches_.back().label);
 }
 
 void CodeGeneratorX86_64::RecordTypePatch(HLoadClass* load_class) {
-  type_patches_.emplace_back(load_class->GetDexFile(), load_class->GetTypeIndex());
+  type_patches_.emplace_back(load_class->GetDexFile(), load_class->GetTypeIndex().index_);
   __ Bind(&type_patches_.back().label);
 }
 
 Label* CodeGeneratorX86_64::NewStringBssEntryPatch(HLoadString* load_string) {
   DCHECK(!GetCompilerOptions().IsBootImage());
-  string_patches_.emplace_back(load_string->GetDexFile(), load_string->GetStringIndex());
+  string_patches_.emplace_back(load_string->GetDexFile(), load_string->GetStringIndex().index_);
   return &string_patches_.back().label;
 }
 
@@ -1263,7 +1259,8 @@
         simple_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
         string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
         type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
-        fixups_to_jump_tables_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {
+        fixups_to_jump_tables_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+        jit_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {
   AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister));
 }
 
@@ -1329,6 +1326,12 @@
     }
   }
 
+  if (GetGraph()->HasShouldDeoptimizeFlag()) {
+    // Initialize should_deoptimize flag to 0.
+    __ movl(Address(CpuRegister(RSP), xmm_spill_location - kShouldDeoptimizeFlagSize),
+            Immediate(0));
+  }
+
   // Save the current method if we need it. Note that we do not
   // do this in HCurrentMethod, as the instruction might have been removed
   // in the SSA graph.
@@ -1750,6 +1753,17 @@
                                /* false_target */ nullptr);
 }
 
+void LocationsBuilderX86_64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
+  LocationSummary* locations = new (GetGraph()->GetArena())
+      LocationSummary(flag, LocationSummary::kNoCall);
+  locations->SetOut(Location::RequiresRegister());
+}
+
+void InstructionCodeGeneratorX86_64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
+  __ movl(flag->GetLocations()->Out().AsRegister<CpuRegister>(),
+          Address(CpuRegister(RSP), codegen_->GetStackOffsetOfShouldDeoptimizeFlag()));
+}
+
 static bool SelectCanUseCMOV(HSelect* select) {
   // There are no conditional move instructions for XMMs.
   if (Primitive::IsFloatingPointType(select->GetType())) {
@@ -4102,7 +4116,7 @@
 void InstructionCodeGeneratorX86_64::VisitNewArray(HNewArray* instruction) {
   InvokeRuntimeCallingConvention calling_convention;
   codegen_->Load64BitValue(CpuRegister(calling_convention.GetRegisterAt(0)),
-                           instruction->GetTypeIndex());
+                           instruction->GetTypeIndex().index_);
   // Note: if heap poisoning is enabled, the entry point takes cares
   // of poisoning the reference.
   codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc());
@@ -4729,9 +4743,11 @@
         // Branch cases into compressed and uncompressed for each index's type.
         uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
         NearLabel done, not_compressed;
-        __ cmpl(Address(obj, count_offset), Immediate(0));
+        __ testl(Address(obj, count_offset), Immediate(1));
         codegen_->MaybeRecordImplicitNullCheck(instruction);
-        __ j(kGreaterEqual, &not_compressed);
+        static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
+                      "Expecting 0=compressed, 1=uncompressed");
+        __ j(kNotZero, &not_compressed);
         __ movzxb(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_1, data_offset));
         __ jmp(&done);
         __ Bind(&not_compressed);
@@ -5063,7 +5079,7 @@
   codegen_->MaybeRecordImplicitNullCheck(instruction);
   // Mask out most significant bit in case the array is String's array of char.
   if (mirror::kUseStringCompression && instruction->IsStringLength()) {
-    __ andl(out, Immediate(INT32_MAX));
+    __ shrl(out, Immediate(1));
   }
 }
 
@@ -5115,10 +5131,12 @@
       Location array_loc = array_length->GetLocations()->InAt(0);
       Address array_len(array_loc.AsRegister<CpuRegister>(), len_offset);
       if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
+        // TODO: if index_loc.IsConstant(), compare twice the index (to compensate for
+        // the string compression flag) with the in-memory length and avoid the temporary.
         CpuRegister length_reg = CpuRegister(TMP);
         __ movl(length_reg, array_len);
         codegen_->MaybeRecordImplicitNullCheck(array_length);
-        __ andl(length_reg, Immediate(INT32_MAX));
+        __ shrl(length_reg, Immediate(1));
         codegen_->GenerateIntCompare(length_reg, index_loc);
       } else {
         // Checking the bound for general case:
@@ -5485,7 +5503,7 @@
 void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) {
   LocationSummary* locations = cls->GetLocations();
   if (cls->NeedsAccessCheck()) {
-    codegen_->MoveConstant(locations->GetTemp(0), cls->GetTypeIndex());
+    codegen_->MoveConstant(locations->GetTemp(0), cls->GetTypeIndex().index_);
     codegen_->InvokeRuntime(kQuickInitializeTypeAndVerifyAccess, cls, cls->GetDexPc());
     CheckEntrypointTypes<kQuickInitializeTypeAndVerifyAccess, void*, uint32_t>();
     return;
@@ -5494,7 +5512,9 @@
   Location out_loc = locations->Out();
   CpuRegister out = out_loc.AsRegister<CpuRegister>();
 
-  const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage();
+  const ReadBarrierOption read_barrier_option = cls->IsInBootImage()
+      ? kWithoutReadBarrier
+      : kCompilerReadBarrierOption;
   bool generate_null_check = false;
   switch (cls->GetLoadKind()) {
     case HLoadClass::LoadKind::kReferrersClass: {
@@ -5507,16 +5527,16 @@
           out_loc,
           Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()),
           /* fixup_label */ nullptr,
-          requires_read_barrier);
+          read_barrier_option);
       break;
     }
     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
-      DCHECK(!requires_read_barrier);
+      DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
       __ leal(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip */ false));
       codegen_->RecordTypePatch(cls);
       break;
     case HLoadClass::LoadKind::kBootImageAddress: {
-      DCHECK(!requires_read_barrier);
+      DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
       DCHECK_NE(cls->GetAddress(), 0u);
       uint32_t address = dchecked_integral_cast<uint32_t>(cls->GetAddress());
       __ movl(out, Immediate(address));  // Zero-extended.
@@ -5532,7 +5552,7 @@
                                 out_loc,
                                 address,
                                 /* fixup_label */ nullptr,
-                                requires_read_barrier);
+                                read_barrier_option);
       } else {
         // TODO: Consider using opcode A1, i.e. movl eax, moff32 (with 64-bit address).
         __ movq(out, Immediate(cls->GetAddress()));
@@ -5540,7 +5560,7 @@
                                 out_loc,
                                 Address(out, 0),
                                 /* fixup_label */ nullptr,
-                                requires_read_barrier);
+                                read_barrier_option);
       }
       generate_null_check = !cls->IsInDexCache();
       break;
@@ -5551,7 +5571,7 @@
       Address address = Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset,
                                           /* no_rip */ false);
       // /* GcRoot<mirror::Class> */ out = *address  /* PC-relative */
-      GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, requires_read_barrier);
+      GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, read_barrier_option);
       generate_null_check = !cls->IsInDexCache();
       break;
     }
@@ -5566,9 +5586,9 @@
       GenerateGcRootFieldLoad(
           cls,
           out_loc,
-          Address(out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex())),
+          Address(out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex().index_)),
           /* fixup_label */ nullptr,
-          requires_read_barrier);
+          read_barrier_option);
       generate_null_check = !cls->IsInDexCache();
       break;
     }
@@ -5624,24 +5644,20 @@
       break;
     case HLoadString::LoadKind::kBootImageAddress:
       break;
-    case HLoadString::LoadKind::kDexCacheAddress:
-      DCHECK(Runtime::Current()->UseJitCompilation());
-      break;
     case HLoadString::LoadKind::kBssEntry:
       DCHECK(!Runtime::Current()->UseJitCompilation());
       break;
     case HLoadString::LoadKind::kDexCacheViaMethod:
       break;
+    case HLoadString::LoadKind::kJitTableAddress:
+      DCHECK(Runtime::Current()->UseJitCompilation());
+      break;
   }
   return desired_string_load_kind;
 }
 
 void LocationsBuilderX86_64::VisitLoadString(HLoadString* load) {
-  LocationSummary::CallKind call_kind = load->NeedsEnvironment()
-      ? ((load->GetLoadKind() == HLoadString::LoadKind::kDexCacheViaMethod)
-          ? LocationSummary::kCallOnMainOnly
-          : LocationSummary::kCallOnSlowPath)
-      : LocationSummary::kNoCall;
+  LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load);
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind);
   if (load->GetLoadKind() == HLoadString::LoadKind::kDexCacheViaMethod) {
     locations->SetOut(Location::RegisterLocation(RAX));
@@ -5661,6 +5677,15 @@
   }
 }
 
+Label* CodeGeneratorX86_64::NewJitRootStringPatch(const DexFile& dex_file,
+                                                  dex::StringIndex dex_index) {
+  jit_string_roots_.Overwrite(StringReference(&dex_file, dex_index), /* placeholder */ 0u);
+  // Add a patch entry and return the label.
+  jit_string_patches_.emplace_back(dex_file, dex_index.index_);
+  PatchInfo<Label>* info = &jit_string_patches_.back();
+  return &info->label;
+}
+
 void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) {
   LocationSummary* locations = load->GetLocations();
   Location out_loc = locations->Out();
@@ -5684,7 +5709,7 @@
                                           /* no_rip */ false);
       Label* fixup_label = codegen_->NewStringBssEntryPatch(load);
       // /* GcRoot<mirror::Class> */ out = *address  /* PC-relative */
-      GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, kEmitCompilerReadBarrier);
+      GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, kCompilerReadBarrierOption);
       SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathX86_64(load);
       codegen_->AddSlowPath(slow_path);
       __ testl(out, out);
@@ -5692,13 +5717,22 @@
       __ Bind(slow_path->GetExitLabel());
       return;
     }
+    case HLoadString::LoadKind::kJitTableAddress: {
+      Address address = Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset,
+                                          /* no_rip */ true);
+      Label* fixup_label =
+          codegen_->NewJitRootStringPatch(load->GetDexFile(), load->GetStringIndex());
+      // /* GcRoot<mirror::String> */ out = *address
+      GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, kCompilerReadBarrierOption);
+      return;
+    }
     default:
       break;
   }
 
   // TODO: Re-add the compiler code to do string dex cache lookup again.
   // Custom calling convention: RAX serves as both input and output.
-  __ movl(CpuRegister(RAX), Immediate(load->GetStringIndex()));
+  __ movl(CpuRegister(RAX), Immediate(load->GetStringIndex().index_));
   codegen_->InvokeRuntime(kQuickResolveString,
                           load,
                           load->GetDexPc());
@@ -5740,7 +5774,19 @@
   CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
 }
 
-static bool TypeCheckNeedsATemporary(TypeCheckKind type_check_kind) {
+static bool CheckCastTypeCheckNeedsATemporary(TypeCheckKind type_check_kind) {
+  if (type_check_kind == TypeCheckKind::kInterfaceCheck && !kPoisonHeapReferences) {
+    // We need a temporary for holding the iftable length.
+    return true;
+  }
+  return kEmitCompilerReadBarrier &&
+      !kUseBakerReadBarrier &&
+      (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
+       type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
+       type_check_kind == TypeCheckKind::kArrayObjectCheck);
+}
+
+static bool InstanceOfTypeCheckNeedsATemporary(TypeCheckKind type_check_kind) {
   return kEmitCompilerReadBarrier &&
       !kUseBakerReadBarrier &&
       (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
@@ -5778,7 +5824,7 @@
   locations->SetOut(Location::RequiresRegister());
   // When read barriers are enabled, we need a temporary register for
   // some cases.
-  if (TypeCheckNeedsATemporary(type_check_kind)) {
+  if (InstanceOfTypeCheckNeedsATemporary(type_check_kind)) {
     locations->AddTemp(Location::RequiresRegister());
   }
 }
@@ -5791,7 +5837,7 @@
   Location cls = locations->InAt(1);
   Location out_loc =  locations->Out();
   CpuRegister out = out_loc.AsRegister<CpuRegister>();
-  Location maybe_temp_loc = TypeCheckNeedsATemporary(type_check_kind) ?
+  Location maybe_temp_loc = InstanceOfTypeCheckNeedsATemporary(type_check_kind) ?
       locations->GetTemp(0) :
       Location::NoLocation();
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
@@ -5808,11 +5854,14 @@
     __ j(kEqual, &zero);
   }
 
-  // /* HeapReference<Class> */ out = obj->klass_
-  GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset);
-
   switch (type_check_kind) {
     case TypeCheckKind::kExactCheck: {
+      // /* HeapReference<Class> */ out = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        out_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        kCompilerReadBarrierOption);
       if (cls.IsRegister()) {
         __ cmpl(out, cls.AsRegister<CpuRegister>());
       } else {
@@ -5833,12 +5882,22 @@
     }
 
     case TypeCheckKind::kAbstractClassCheck: {
+      // /* HeapReference<Class> */ out = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        out_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        kCompilerReadBarrierOption);
       // If the class is abstract, we eagerly fetch the super class of the
       // object to avoid doing a comparison we know will fail.
       NearLabel loop, success;
       __ Bind(&loop);
       // /* HeapReference<Class> */ out = out->super_class_
-      GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, maybe_temp_loc);
+      GenerateReferenceLoadOneRegister(instruction,
+                                       out_loc,
+                                       super_offset,
+                                       maybe_temp_loc,
+                                       kCompilerReadBarrierOption);
       __ testl(out, out);
       // If `out` is null, we use it for the result, and jump to `done`.
       __ j(kEqual, &done);
@@ -5857,6 +5916,12 @@
     }
 
     case TypeCheckKind::kClassHierarchyCheck: {
+      // /* HeapReference<Class> */ out = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        out_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        kCompilerReadBarrierOption);
       // Walk over the class hierarchy to find a match.
       NearLabel loop, success;
       __ Bind(&loop);
@@ -5868,7 +5933,11 @@
       }
       __ j(kEqual, &success);
       // /* HeapReference<Class> */ out = out->super_class_
-      GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, maybe_temp_loc);
+      GenerateReferenceLoadOneRegister(instruction,
+                                       out_loc,
+                                       super_offset,
+                                       maybe_temp_loc,
+                                       kCompilerReadBarrierOption);
       __ testl(out, out);
       __ j(kNotEqual, &loop);
       // If `out` is null, we use it for the result, and jump to `done`.
@@ -5882,6 +5951,12 @@
     }
 
     case TypeCheckKind::kArrayObjectCheck: {
+      // /* HeapReference<Class> */ out = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        out_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        kCompilerReadBarrierOption);
       // Do an exact check.
       NearLabel exact_check;
       if (cls.IsRegister()) {
@@ -5893,7 +5968,11 @@
       __ j(kEqual, &exact_check);
       // Otherwise, we need to check that the object's class is a non-primitive array.
       // /* HeapReference<Class> */ out = out->component_type_
-      GenerateReferenceLoadOneRegister(instruction, out_loc, component_offset, maybe_temp_loc);
+      GenerateReferenceLoadOneRegister(instruction,
+                                       out_loc,
+                                       component_offset,
+                                       maybe_temp_loc,
+                                       kCompilerReadBarrierOption);
       __ testl(out, out);
       // If `out` is null, we use it for the result, and jump to `done`.
       __ j(kEqual, &done);
@@ -5906,6 +5985,13 @@
     }
 
     case TypeCheckKind::kArrayCheck: {
+      // No read barrier since the slow path will retry upon failure.
+      // /* HeapReference<Class> */ out = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        out_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        kWithoutReadBarrier);
       if (cls.IsRegister()) {
         __ cmpl(out, cls.AsRegister<CpuRegister>());
       } else {
@@ -5970,33 +6056,45 @@
   }
 }
 
-void LocationsBuilderX86_64::VisitCheckCast(HCheckCast* instruction) {
-  LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
-  bool throws_into_catch = instruction->CanThrowIntoCatchBlock();
-  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
+static bool IsTypeCheckSlowPathFatal(TypeCheckKind type_check_kind, bool throws_into_catch) {
   switch (type_check_kind) {
     case TypeCheckKind::kExactCheck:
     case TypeCheckKind::kAbstractClassCheck:
     case TypeCheckKind::kClassHierarchyCheck:
     case TypeCheckKind::kArrayObjectCheck:
-      call_kind = (throws_into_catch || kEmitCompilerReadBarrier) ?
-          LocationSummary::kCallOnSlowPath :
-          LocationSummary::kNoCall;  // In fact, call on a fatal (non-returning) slow path.
-      break;
+      return !throws_into_catch && !kEmitCompilerReadBarrier;
+    case TypeCheckKind::kInterfaceCheck:
+      return !throws_into_catch && !kEmitCompilerReadBarrier && !kPoisonHeapReferences;
     case TypeCheckKind::kArrayCheck:
     case TypeCheckKind::kUnresolvedCheck:
-    case TypeCheckKind::kInterfaceCheck:
-      call_kind = LocationSummary::kCallOnSlowPath;
-      break;
+      return false;
   }
+  LOG(FATAL) << "Unreachable";
+  UNREACHABLE();
+}
+
+void LocationsBuilderX86_64::VisitCheckCast(HCheckCast* instruction) {
+  bool throws_into_catch = instruction->CanThrowIntoCatchBlock();
+  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
+  bool is_fatal_slow_path = IsTypeCheckSlowPathFatal(type_check_kind, throws_into_catch);
+  LocationSummary::CallKind call_kind = is_fatal_slow_path
+                                            ? LocationSummary::kNoCall
+                                            : LocationSummary::kCallOnSlowPath;
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
   locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::Any());
+  if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
+    // Require a register for the interface check since there is a loop that compares the class to
+    // a memory address.
+    locations->SetInAt(1, Location::RequiresRegister());
+  } else {
+    locations->SetInAt(1, Location::Any());
+  }
+
   // Note that TypeCheckSlowPathX86_64 uses this "temp" register too.
   locations->AddTemp(Location::RequiresRegister());
   // When read barriers are enabled, we need an additional temporary
   // register for some cases.
-  if (TypeCheckNeedsATemporary(type_check_kind)) {
+  if (CheckCastTypeCheckNeedsATemporary(type_check_kind)) {
     locations->AddTemp(Location::RequiresRegister());
   }
 }
@@ -6009,38 +6107,45 @@
   Location cls = locations->InAt(1);
   Location temp_loc = locations->GetTemp(0);
   CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
-  Location maybe_temp2_loc = TypeCheckNeedsATemporary(type_check_kind) ?
+  Location maybe_temp2_loc = CheckCastTypeCheckNeedsATemporary(type_check_kind) ?
       locations->GetTemp(1) :
       Location::NoLocation();
-  uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
-  uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
-  uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
-  uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
+  const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
+  const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
+  const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
+  const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
+  const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value();
+  const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value();
+  const uint32_t object_array_data_offset =
+      mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
 
+  // Always false for read barriers since we may need to go to the entrypoint for non-fatal cases
+  // from false negatives. The false negatives may come from avoiding read barriers below. Avoiding
+  // read barriers is done for performance and code size reasons.
   bool is_type_check_slow_path_fatal =
-      (type_check_kind == TypeCheckKind::kExactCheck ||
-       type_check_kind == TypeCheckKind::kAbstractClassCheck ||
-       type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
-       type_check_kind == TypeCheckKind::kArrayObjectCheck) &&
-      !instruction->CanThrowIntoCatchBlock();
+      IsTypeCheckSlowPathFatal(type_check_kind, instruction->CanThrowIntoCatchBlock());
   SlowPathCode* type_check_slow_path =
       new (GetGraph()->GetArena()) TypeCheckSlowPathX86_64(instruction,
                                                            is_type_check_slow_path_fatal);
   codegen_->AddSlowPath(type_check_slow_path);
 
+
+  NearLabel done;
+  // Avoid null check if we know obj is not null.
+  if (instruction->MustDoNullCheck()) {
+    __ testl(obj, obj);
+    __ j(kEqual, &done);
+  }
+
   switch (type_check_kind) {
     case TypeCheckKind::kExactCheck:
     case TypeCheckKind::kArrayCheck: {
-      NearLabel done;
-      // Avoid null check if we know obj is not null.
-      if (instruction->MustDoNullCheck()) {
-        __ testl(obj, obj);
-        __ j(kEqual, &done);
-      }
-
       // /* HeapReference<Class> */ temp = obj->klass_
-      GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset);
-
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        temp_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        kWithoutReadBarrier);
       if (cls.IsRegister()) {
         __ cmpl(temp, cls.AsRegister<CpuRegister>());
       } else {
@@ -6050,43 +6155,32 @@
       // Jump to slow path for throwing the exception or doing a
       // more involved array check.
       __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
-      __ Bind(&done);
       break;
     }
 
     case TypeCheckKind::kAbstractClassCheck: {
-      NearLabel done;
-      // Avoid null check if we know obj is not null.
-      if (instruction->MustDoNullCheck()) {
-        __ testl(obj, obj);
-        __ j(kEqual, &done);
-      }
-
       // /* HeapReference<Class> */ temp = obj->klass_
-      GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset);
-
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        temp_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        kWithoutReadBarrier);
       // If the class is abstract, we eagerly fetch the super class of the
       // object to avoid doing a comparison we know will fail.
-      NearLabel loop, compare_classes;
+      NearLabel loop;
       __ Bind(&loop);
       // /* HeapReference<Class> */ temp = temp->super_class_
-      GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, maybe_temp2_loc);
+      GenerateReferenceLoadOneRegister(instruction,
+                                       temp_loc,
+                                       super_offset,
+                                       maybe_temp2_loc,
+                                       kWithoutReadBarrier);
 
-      // If the class reference currently in `temp` is not null, jump
-      // to the `compare_classes` label to compare it with the checked
-      // class.
+      // If the class reference currently in `temp` is null, jump to the slow path to throw the
+      // exception.
       __ testl(temp, temp);
-      __ j(kNotEqual, &compare_classes);
-      // Otherwise, jump to the slow path to throw the exception.
-      //
-      // But before, move back the object's class into `temp` before
-      // going into the slow path, as it has been overwritten in the
-      // meantime.
-      // /* HeapReference<Class> */ temp = obj->klass_
-      GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset);
-      __ jmp(type_check_slow_path->GetEntryLabel());
-
-      __ Bind(&compare_classes);
+      // Otherwise, compare the classes.
+      __ j(kZero, type_check_slow_path->GetEntryLabel());
       if (cls.IsRegister()) {
         __ cmpl(temp, cls.AsRegister<CpuRegister>());
       } else {
@@ -6094,21 +6188,16 @@
         __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
       }
       __ j(kNotEqual, &loop);
-      __ Bind(&done);
       break;
     }
 
     case TypeCheckKind::kClassHierarchyCheck: {
-      NearLabel done;
-      // Avoid null check if we know obj is not null.
-      if (instruction->MustDoNullCheck()) {
-        __ testl(obj, obj);
-        __ j(kEqual, &done);
-      }
-
       // /* HeapReference<Class> */ temp = obj->klass_
-      GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset);
-
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        temp_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        kWithoutReadBarrier);
       // Walk over the class hierarchy to find a match.
       NearLabel loop;
       __ Bind(&loop);
@@ -6121,39 +6210,28 @@
       __ j(kEqual, &done);
 
       // /* HeapReference<Class> */ temp = temp->super_class_
-      GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, maybe_temp2_loc);
+      GenerateReferenceLoadOneRegister(instruction,
+                                       temp_loc,
+                                       super_offset,
+                                       maybe_temp2_loc,
+                                       kWithoutReadBarrier);
 
       // If the class reference currently in `temp` is not null, jump
       // back at the beginning of the loop.
       __ testl(temp, temp);
-      __ j(kNotEqual, &loop);
+      __ j(kNotZero, &loop);
       // Otherwise, jump to the slow path to throw the exception.
-      //
-      // But before, move back the object's class into `temp` before
-      // going into the slow path, as it has been overwritten in the
-      // meantime.
-      // /* HeapReference<Class> */ temp = obj->klass_
-      GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset);
       __ jmp(type_check_slow_path->GetEntryLabel());
-      __ Bind(&done);
       break;
     }
 
     case TypeCheckKind::kArrayObjectCheck: {
-      // We cannot use a NearLabel here, as its range might be too
-      // short in some cases when read barriers are enabled.  This has
-      // been observed for instance when the code emitted for this
-      // case uses high x86-64 registers (R8-R15).
-      Label done;
-      // Avoid null check if we know obj is not null.
-      if (instruction->MustDoNullCheck()) {
-        __ testl(obj, obj);
-        __ j(kEqual, &done);
-      }
-
       // /* HeapReference<Class> */ temp = obj->klass_
-      GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset);
-
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        temp_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        kWithoutReadBarrier);
       // Do an exact check.
       NearLabel check_non_primitive_component_type;
       if (cls.IsRegister()) {
@@ -6166,48 +6244,26 @@
 
       // Otherwise, we need to check that the object's class is a non-primitive array.
       // /* HeapReference<Class> */ temp = temp->component_type_
-      GenerateReferenceLoadOneRegister(instruction, temp_loc, component_offset, maybe_temp2_loc);
+      GenerateReferenceLoadOneRegister(instruction,
+                                       temp_loc,
+                                       component_offset,
+                                       maybe_temp2_loc,
+                                       kWithoutReadBarrier);
 
       // If the component type is not null (i.e. the object is indeed
       // an array), jump to label `check_non_primitive_component_type`
       // to further check that this component type is not a primitive
       // type.
       __ testl(temp, temp);
-      __ j(kNotEqual, &check_non_primitive_component_type);
       // Otherwise, jump to the slow path to throw the exception.
-      //
-      // But before, move back the object's class into `temp` before
-      // going into the slow path, as it has been overwritten in the
-      // meantime.
-      // /* HeapReference<Class> */ temp = obj->klass_
-      GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset);
-      __ jmp(type_check_slow_path->GetEntryLabel());
-
-      __ Bind(&check_non_primitive_component_type);
+      __ j(kZero, type_check_slow_path->GetEntryLabel());
       __ cmpw(Address(temp, primitive_offset), Immediate(Primitive::kPrimNot));
-      __ j(kEqual, &done);
-      // Same comment as above regarding `temp` and the slow path.
-      // /* HeapReference<Class> */ temp = obj->klass_
-      GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset);
-      __ jmp(type_check_slow_path->GetEntryLabel());
-      __ Bind(&done);
+      __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
       break;
     }
 
-    case TypeCheckKind::kUnresolvedCheck:
-    case TypeCheckKind::kInterfaceCheck:
-      NearLabel done;
-      // Avoid null check if we know obj is not null.
-      if (instruction->MustDoNullCheck()) {
-        __ testl(obj, obj);
-        __ j(kEqual, &done);
-      }
-
-      // /* HeapReference<Class> */ temp = obj->klass_
-      GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset);
-
-      // We always go into the type check slow path for the unresolved
-      // and interface check cases.
+    case TypeCheckKind::kUnresolvedCheck: {
+      // We always go into the type check slow path for the unresolved case.
       //
       // We cannot directly call the CheckCast runtime entry point
       // without resorting to a type checking slow path here (i.e. by
@@ -6216,16 +6272,52 @@
       // instruction (following the runtime calling convention), which
       // might be cluttered by the potential first read barrier
       // emission at the beginning of this method.
-      //
-      // TODO: Introduce a new runtime entry point taking the object
-      // to test (instead of its class) as argument, and let it deal
-      // with the read barrier issues. This will let us refactor this
-      // case of the `switch` code as it was previously (with a direct
-      // call to the runtime not using a type checking slow path).
-      // This should also be beneficial for the other cases above.
       __ jmp(type_check_slow_path->GetEntryLabel());
-      __ Bind(&done);
       break;
+    }
+
+    case TypeCheckKind::kInterfaceCheck:
+      // Fast path for the interface check. We always go slow path for heap poisoning since
+      // unpoisoning cls would require an extra temp.
+      if (!kPoisonHeapReferences) {
+        // Try to avoid read barriers to improve the fast path. We can not get false positives by
+        // doing this.
+        // /* HeapReference<Class> */ temp = obj->klass_
+        GenerateReferenceLoadTwoRegisters(instruction,
+                                          temp_loc,
+                                          obj_loc,
+                                          class_offset,
+                                          kWithoutReadBarrier);
+
+        // /* HeapReference<Class> */ temp = temp->iftable_
+        GenerateReferenceLoadTwoRegisters(instruction,
+                                          temp_loc,
+                                          temp_loc,
+                                          iftable_offset,
+                                          kWithoutReadBarrier);
+        // Iftable is never null.
+        __ movl(maybe_temp2_loc.AsRegister<CpuRegister>(), Address(temp, array_length_offset));
+        // Loop through the iftable and check if any class matches.
+        NearLabel start_loop;
+        __ Bind(&start_loop);
+        // Need to subtract first to handle the empty array case.
+        __ subl(maybe_temp2_loc.AsRegister<CpuRegister>(), Immediate(2));
+        __ j(kNegative, type_check_slow_path->GetEntryLabel());
+        // Go to next interface if the classes do not match.
+        __ cmpl(cls.AsRegister<CpuRegister>(),
+                CodeGeneratorX86_64::ArrayAddress(temp,
+                                                  maybe_temp2_loc,
+                                                  TIMES_4,
+                                                  object_array_data_offset));
+        __ j(kNotEqual, &start_loop);  // Return if same class.
+      } else {
+        __ jmp(type_check_slow_path->GetEntryLabel());
+      }
+      break;
+  }
+
+  if (done.IsLinked()) {
+    __ Bind(&done);
   }
 
   __ Bind(type_check_slow_path->GetExitLabel());
@@ -6364,12 +6456,15 @@
   }
 }
 
-void InstructionCodeGeneratorX86_64::GenerateReferenceLoadOneRegister(HInstruction* instruction,
-                                                                      Location out,
-                                                                      uint32_t offset,
-                                                                      Location maybe_temp) {
+void InstructionCodeGeneratorX86_64::GenerateReferenceLoadOneRegister(
+    HInstruction* instruction,
+    Location out,
+    uint32_t offset,
+    Location maybe_temp,
+    ReadBarrierOption read_barrier_option) {
   CpuRegister out_reg = out.AsRegister<CpuRegister>();
-  if (kEmitCompilerReadBarrier) {
+  if (read_barrier_option == kWithReadBarrier) {
+    CHECK(kEmitCompilerReadBarrier);
     if (kUseBakerReadBarrier) {
       // Load with fast path based Baker's read barrier.
       // /* HeapReference<Object> */ out = *(out + offset)
@@ -6394,13 +6489,16 @@
   }
 }
 
-void InstructionCodeGeneratorX86_64::GenerateReferenceLoadTwoRegisters(HInstruction* instruction,
-                                                                       Location out,
-                                                                       Location obj,
-                                                                       uint32_t offset) {
+void InstructionCodeGeneratorX86_64::GenerateReferenceLoadTwoRegisters(
+    HInstruction* instruction,
+    Location out,
+    Location obj,
+    uint32_t offset,
+    ReadBarrierOption read_barrier_option) {
   CpuRegister out_reg = out.AsRegister<CpuRegister>();
   CpuRegister obj_reg = obj.AsRegister<CpuRegister>();
-  if (kEmitCompilerReadBarrier) {
+  if (read_barrier_option == kWithReadBarrier) {
+    CHECK(kEmitCompilerReadBarrier);
     if (kUseBakerReadBarrier) {
       // Load with fast path based Baker's read barrier.
       // /* HeapReference<Object> */ out = *(obj + offset)
@@ -6420,13 +6518,14 @@
   }
 }
 
-void InstructionCodeGeneratorX86_64::GenerateGcRootFieldLoad(HInstruction* instruction,
-                                                             Location root,
-                                                             const Address& address,
-                                                             Label* fixup_label,
-                                                             bool requires_read_barrier) {
+void InstructionCodeGeneratorX86_64::GenerateGcRootFieldLoad(
+    HInstruction* instruction,
+    Location root,
+    const Address& address,
+    Label* fixup_label,
+    ReadBarrierOption read_barrier_option) {
   CpuRegister root_reg = root.AsRegister<CpuRegister>();
-  if (requires_read_barrier) {
+  if (read_barrier_option == kWithReadBarrier) {
     DCHECK(kEmitCompilerReadBarrier);
     if (kUseBakerReadBarrier) {
       // Fast path implementation of art::ReadBarrier::BarrierForRoot when
@@ -6535,7 +6634,7 @@
   //   uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
   //   lfence;  // Load fence or artificial data dependency to prevent load-load reordering
   //   HeapReference<Object> ref = *src;  // Original reference load.
-  //   bool is_gray = (rb_state == ReadBarrier::gray_ptr_);
+  //   bool is_gray = (rb_state == ReadBarrier::GrayState());
   //   if (is_gray) {
   //     ref = ReadBarrier::Mark(ref);  // Performed by runtime entrypoint slow path.
   //   }
@@ -6553,14 +6652,13 @@
   uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
 
   // Given the numeric representation, it's enough to check the low bit of the rb_state.
-  static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0");
-  static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1");
-  static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2");
+  static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
+  static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
   constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte;
   constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte;
   constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position);
 
-  // if (rb_state == ReadBarrier::gray_ptr_)
+  // if (rb_state == ReadBarrier::GrayState())
   //   ref = ReadBarrier::Mark(ref);
   // At this point, just do the "if" and make sure that flags are preserved until the branch.
   __ testb(Address(obj, monitor_offset + gray_byte_position), Immediate(test_value));
@@ -7029,6 +7127,21 @@
   }
 }
 
+void CodeGeneratorX86_64::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
+  for (const PatchInfo<Label>& info : jit_string_patches_) {
+    const auto& it = jit_string_roots_.find(StringReference(&info.dex_file,
+                                                            dex::StringIndex(info.index)));
+    DCHECK(it != jit_string_roots_.end());
+    size_t index_in_table = it->second;
+    uint32_t code_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
+    uintptr_t address =
+        reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>);
+    typedef __attribute__((__aligned__(1))) uint32_t unaligned_uint32_t;
+    reinterpret_cast<unaligned_uint32_t*>(code + code_offset)[0] =
+       dchecked_integral_cast<uint32_t>(address);
+  }
+}
+
 #undef __
 
 }  // namespace x86_64
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index 8b19dad..2f41f73 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -234,7 +234,8 @@
   void GenerateReferenceLoadOneRegister(HInstruction* instruction,
                                         Location out,
                                         uint32_t offset,
-                                        Location maybe_temp);
+                                        Location maybe_temp,
+                                        ReadBarrierOption read_barrier_option);
   // Generate a heap reference load using two different registers
   // `out` and `obj`:
   //
@@ -248,17 +249,18 @@
   void GenerateReferenceLoadTwoRegisters(HInstruction* instruction,
                                          Location out,
                                          Location obj,
-                                         uint32_t offset);
+                                         uint32_t offset,
+                                         ReadBarrierOption read_barrier_option);
   // Generate a GC root reference load:
   //
   //   root <- *address
   //
-  // while honoring read barriers if `requires_read_barrier` is true.
+  // while honoring read barriers based on read_barrier_option.
   void GenerateGcRootFieldLoad(HInstruction* instruction,
                                Location root,
                                const Address& address,
                                Label* fixup_label,
-                               bool requires_read_barrier);
+                               ReadBarrierOption read_barrier_option);
 
   void PushOntoFPStack(Location source, uint32_t temp_offset,
                        uint32_t stack_adjustment, bool is_float);
@@ -410,11 +412,14 @@
   void RecordTypePatch(HLoadClass* load_class);
   Label* NewStringBssEntryPatch(HLoadString* load_string);
   Label* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file, uint32_t element_offset);
+  Label* NewJitRootStringPatch(const DexFile& dex_file, dex::StringIndex dex_index);
 
   void MoveFromReturnRegister(Location trg, Primitive::Type type) OVERRIDE;
 
   void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) OVERRIDE;
 
+  void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) OVERRIDE;
+
   const X86_64InstructionSetFeatures& GetInstructionSetFeatures() const {
     return isa_features_;
   }
@@ -600,6 +605,9 @@
   // Fixups for jump tables need to be handled specially.
   ArenaVector<JumpTableRIPFixup*> fixups_to_jump_tables_;
 
+  // Patches for string literals in JIT compiled code.
+  ArenaDeque<PatchInfo<Label>> jit_string_patches_;
+
   DISALLOW_COPY_AND_ASSIGN(CodeGeneratorX86_64);
 };
 
diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc
index 9ec32df..ac83bd9 100644
--- a/compiler/optimizing/codegen_test.cc
+++ b/compiler/optimizing/codegen_test.cc
@@ -259,7 +259,7 @@
   GraphChecker graph_checker(graph);
   graph_checker.Run();
   if (!graph_checker.IsValid()) {
-    for (auto error : graph_checker.GetErrors()) {
+    for (const auto& error : graph_checker.GetErrors()) {
       std::cout << error << std::endl;
     }
   }
@@ -269,7 +269,7 @@
 template <typename Expected>
 static void RunCodeNoCheck(CodeGenerator* codegen,
                            HGraph* graph,
-                           std::function<void(HGraph*)> hook_before_codegen,
+                           const std::function<void(HGraph*)>& hook_before_codegen,
                            bool has_result,
                            Expected expected) {
   SsaLivenessAnalysis liveness(graph, codegen);
diff --git a/compiler/optimizing/common_arm.h b/compiler/optimizing/common_arm.h
index 5d92bfd..d3623f1 100644
--- a/compiler/optimizing/common_arm.h
+++ b/compiler/optimizing/common_arm.h
@@ -37,29 +37,24 @@
   return dwarf::Reg::ArmFp(static_cast<int>(reg.GetCode()));
 }
 
-inline vixl::aarch32::DRegister FromLowSToD(vixl::aarch32::SRegister reg) {
-  DCHECK_EQ(reg.GetCode() % 2, 0u) << reg;
-  return vixl::aarch32::DRegister(reg.GetCode() / 2);
-}
-
 inline vixl::aarch32::Register HighRegisterFrom(Location location) {
   DCHECK(location.IsRegisterPair()) << location;
-  return vixl::aarch32::Register(location.AsRegisterPairHigh<vixl32::Register>());
+  return vixl::aarch32::Register(location.AsRegisterPairHigh<vixl::aarch32::Register>());
 }
 
 inline vixl::aarch32::DRegister HighDRegisterFrom(Location location) {
   DCHECK(location.IsFpuRegisterPair()) << location;
-  return vixl::aarch32::DRegister(location.AsFpuRegisterPairHigh<vixl32::DRegister>());
+  return vixl::aarch32::DRegister(location.AsFpuRegisterPairHigh<vixl::aarch32::DRegister>());
 }
 
 inline vixl::aarch32::Register LowRegisterFrom(Location location) {
   DCHECK(location.IsRegisterPair()) << location;
-  return vixl::aarch32::Register(location.AsRegisterPairLow<vixl32::Register>());
+  return vixl::aarch32::Register(location.AsRegisterPairLow<vixl::aarch32::Register>());
 }
 
 inline vixl::aarch32::SRegister LowSRegisterFrom(Location location) {
   DCHECK(location.IsFpuRegisterPair()) << location;
-  return vixl::aarch32::SRegister(location.AsFpuRegisterPairLow<vixl32::SRegister>());
+  return vixl::aarch32::SRegister(location.AsFpuRegisterPairLow<vixl::aarch32::SRegister>());
 }
 
 inline vixl::aarch32::Register RegisterFrom(Location location) {
@@ -135,6 +130,26 @@
                       instr->InputAt(input_index)->GetType());
 }
 
+inline vixl::aarch32::Register InputRegister(HInstruction* instr) {
+  DCHECK_EQ(instr->InputCount(), 1u);
+  return InputRegisterAt(instr, 0);
+}
+
+inline int32_t Int32ConstantFrom(Location location) {
+  HConstant* instr = location.GetConstant();
+  if (instr->IsIntConstant()) {
+    return instr->AsIntConstant()->GetValue();
+  } else if (instr->IsNullConstant()) {
+    return 0;
+  } else {
+    DCHECK(instr->IsLongConstant()) << instr->DebugName();
+    const int64_t ret = instr->AsLongConstant()->GetValue();
+    DCHECK_GE(ret, std::numeric_limits<int32_t>::min());
+    DCHECK_LE(ret, std::numeric_limits<int32_t>::max());
+    return ret;
+  }
+}
+
 inline int64_t Int64ConstantFrom(Location location) {
   HConstant* instr = location.GetConstant();
   if (instr->IsIntConstant()) {
@@ -151,7 +166,7 @@
   if (location.IsRegister()) {
     return vixl::aarch32::Operand(RegisterFrom(location, type));
   } else {
-    return vixl::aarch32::Operand(Int64ConstantFrom(location));
+    return vixl::aarch32::Operand(Int32ConstantFrom(location));
   }
 }
 
diff --git a/compiler/optimizing/constant_folding.h b/compiler/optimizing/constant_folding.h
index e10b1d6..05c6df4 100644
--- a/compiler/optimizing/constant_folding.h
+++ b/compiler/optimizing/constant_folding.h
@@ -39,8 +39,7 @@
  */
 class HConstantFolding : public HOptimization {
  public:
-  HConstantFolding(HGraph* graph, const char* name = kConstantFoldingPassName)
-      : HOptimization(graph, name) {}
+  HConstantFolding(HGraph* graph, const char* name) : HOptimization(graph, name) {}
 
   void Run() OVERRIDE;
 
diff --git a/compiler/optimizing/constant_folding_test.cc b/compiler/optimizing/constant_folding_test.cc
index d1a2a26..7ef28ed 100644
--- a/compiler/optimizing/constant_folding_test.cc
+++ b/compiler/optimizing/constant_folding_test.cc
@@ -42,7 +42,7 @@
                 const std::string& expected_before,
                 const std::string& expected_after_cf,
                 const std::string& expected_after_dce,
-                std::function<void(HGraph*)> check_after_cf,
+                const std::function<void(HGraph*)>& check_after_cf,
                 Primitive::Type return_type = Primitive::kPrimInt) {
     graph_ = CreateCFG(&allocator_, data, return_type);
     TestCodeOnReadyGraph(expected_before,
@@ -54,7 +54,7 @@
   void TestCodeOnReadyGraph(const std::string& expected_before,
                             const std::string& expected_after_cf,
                             const std::string& expected_after_dce,
-                            std::function<void(HGraph*)> check_after_cf) {
+                            const std::function<void(HGraph*)>& check_after_cf) {
     ASSERT_NE(graph_, nullptr);
 
     StringPrettyPrinter printer_before(graph_);
@@ -65,7 +65,7 @@
     std::unique_ptr<const X86InstructionSetFeatures> features_x86(
         X86InstructionSetFeatures::FromCppDefines());
     x86::CodeGeneratorX86 codegenX86(graph_, *features_x86.get(), CompilerOptions());
-    HConstantFolding(graph_).Run();
+    HConstantFolding(graph_, "constant_folding").Run();
     GraphChecker graph_checker_cf(graph_);
     graph_checker_cf.Run();
     ASSERT_TRUE(graph_checker_cf.IsValid());
@@ -77,7 +77,7 @@
 
     check_after_cf(graph_);
 
-    HDeadCodeElimination(graph_).Run();
+    HDeadCodeElimination(graph_, nullptr /* stats */, "dead_code_elimination").Run();
     GraphChecker graph_checker_dce(graph_);
     graph_checker_dce.Run();
     ASSERT_TRUE(graph_checker_dce.IsValid());
@@ -756,7 +756,7 @@
 
   // Make various unsigned comparisons with zero against a parameter.
   HInstruction* parameter = new (&allocator_) HParameterValue(
-      graph_->GetDexFile(), 0, 0, Primitive::kPrimInt, true);
+      graph_->GetDexFile(), dex::TypeIndex(0), 0, Primitive::kPrimInt, true);
   entry_block->AddInstruction(parameter);
   entry_block->AddInstruction(new (&allocator_) HGoto());
 
diff --git a/compiler/optimizing/dead_code_elimination.cc b/compiler/optimizing/dead_code_elimination.cc
index 9de521a..c31c66a 100644
--- a/compiler/optimizing/dead_code_elimination.cc
+++ b/compiler/optimizing/dead_code_elimination.cc
@@ -161,8 +161,21 @@
 //        |      |      |
 //       B4      B5    B?
 //
-// This simplification cannot be applied for loop headers, as they
-// contain a suspend check.
+// Note that individual edges can be redirected (for example B2->B3
+// can be redirected as B2->B5) without applying this optimization
+// to other incoming edges.
+//
+// This simplification cannot be applied to catch blocks, because
+// exception handler edges do not represent normal control flow.
+// Though in theory this could still apply to normal control flow
+// going directly to a catch block, we cannot support it at the
+// moment because the catch Phi's inputs do not correspond to the
+// catch block's predecessors, so we cannot identify which
+// predecessor corresponds to a given statically evaluated input.
+//
+// We do not apply this optimization to loop headers as this could
+// create irreducible loops. We rely on the suspend check in the
+// loop header to prevent the pattern match.
 //
 // Note that we rely on the dead code elimination to get rid of B3.
 bool HDeadCodeElimination::SimplifyIfs() {
@@ -172,7 +185,8 @@
   for (HBasicBlock* block : graph_->GetReversePostOrder()) {
     HInstruction* last = block->GetLastInstruction();
     HInstruction* first = block->GetFirstInstruction();
-    if (last->IsIf() &&
+    if (!block->IsCatchBlock() &&
+        last->IsIf() &&
         block->HasSinglePhi() &&
         block->GetFirstPhi()->HasOnlyOneNonEnvironmentUse()) {
       bool has_only_phi_and_if = (last == first) && (last->InputAt(0) == block->GetFirstPhi());
diff --git a/compiler/optimizing/dead_code_elimination.h b/compiler/optimizing/dead_code_elimination.h
index 58e700d..84fd890 100644
--- a/compiler/optimizing/dead_code_elimination.h
+++ b/compiler/optimizing/dead_code_elimination.h
@@ -29,9 +29,7 @@
  */
 class HDeadCodeElimination : public HOptimization {
  public:
-  HDeadCodeElimination(HGraph* graph,
-                       OptimizingCompilerStats* stats = nullptr,
-                       const char* name = kDeadCodeEliminationPassName)
+  HDeadCodeElimination(HGraph* graph, OptimizingCompilerStats* stats, const char* name)
       : HOptimization(graph, name, stats) {}
 
   void Run() OVERRIDE;
diff --git a/compiler/optimizing/dead_code_elimination_test.cc b/compiler/optimizing/dead_code_elimination_test.cc
index fe52aac..fdd77e7 100644
--- a/compiler/optimizing/dead_code_elimination_test.cc
+++ b/compiler/optimizing/dead_code_elimination_test.cc
@@ -44,7 +44,7 @@
   std::unique_ptr<const X86InstructionSetFeatures> features_x86(
       X86InstructionSetFeatures::FromCppDefines());
   x86::CodeGeneratorX86 codegenX86(graph, *features_x86.get(), CompilerOptions());
-  HDeadCodeElimination(graph).Run();
+  HDeadCodeElimination(graph, nullptr /* stats */, "dead_code_elimination").Run();
   GraphChecker graph_checker(graph);
   graph_checker.Run();
   ASSERT_TRUE(graph_checker.IsValid());
diff --git a/compiler/optimizing/emit_swap_mips_test.cc b/compiler/optimizing/emit_swap_mips_test.cc
new file mode 100644
index 0000000..9dc53e6
--- /dev/null
+++ b/compiler/optimizing/emit_swap_mips_test.cc
@@ -0,0 +1,354 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "base/arena_allocator.h"
+#include "code_generator_mips.h"
+#include "optimizing_unit_test.h"
+#include "parallel_move_resolver.h"
+#include "utils/assembler_test_base.h"
+#include "utils/mips/assembler_mips.h"
+
+#include "gtest/gtest.h"
+
+namespace art {
+
+class EmitSwapMipsTest : public ::testing::Test {
+ public:
+  void SetUp() OVERRIDE {
+    allocator_.reset(new ArenaAllocator(&pool_));
+    graph_ = CreateGraph(allocator_.get());
+    isa_features_ = MipsInstructionSetFeatures::FromCppDefines();
+    codegen_ = new (graph_->GetArena()) mips::CodeGeneratorMIPS(graph_,
+                                                                *isa_features_.get(),
+                                                                CompilerOptions());
+    moves_ = new (allocator_.get()) HParallelMove(allocator_.get());
+    test_helper_.reset(
+        new AssemblerTestInfrastructure(GetArchitectureString(),
+                                        GetAssemblerCmdName(),
+                                        GetAssemblerParameters(),
+                                        GetObjdumpCmdName(),
+                                        GetObjdumpParameters(),
+                                        GetDisassembleCmdName(),
+                                        GetDisassembleParameters(),
+                                        GetAssemblyHeader()));
+  }
+
+  void TearDown() OVERRIDE {
+    allocator_.reset();
+    test_helper_.reset();
+  }
+
+  // Get the typically used name for this architecture.
+  std::string GetArchitectureString() {
+    return "mips";
+  }
+
+  // Get the name of the assembler.
+  std::string GetAssemblerCmdName() {
+    return "as";
+  }
+
+  // Switches to the assembler command.
+  std::string GetAssemblerParameters() {
+    return " --no-warn -32 -march=mips32r2";
+  }
+
+  // Get the name of the objdump.
+  std::string GetObjdumpCmdName() {
+    return "objdump";
+  }
+
+  // Switches to the objdump command.
+  std::string GetObjdumpParameters() {
+    return " -h";
+  }
+
+  // Get the name of the objdump.
+  std::string GetDisassembleCmdName() {
+    return "objdump";
+  }
+
+  // Switches to the objdump command.
+  std::string GetDisassembleParameters() {
+    return " -D -bbinary -mmips:isa32r2";
+  }
+
+  // No need for assembly header here.
+  const char* GetAssemblyHeader() {
+    return nullptr;
+  }
+
+  void DriverWrapper(HParallelMove* move, std::string assembly_text, std::string test_name) {
+    codegen_->GetMoveResolver()->EmitNativeCode(move);
+    assembler_ = codegen_->GetAssembler();
+    assembler_->FinalizeCode();
+    std::unique_ptr<std::vector<uint8_t>> data(new std::vector<uint8_t>(assembler_->CodeSize()));
+    MemoryRegion code(&(*data)[0], data->size());
+    assembler_->FinalizeInstructions(code);
+    test_helper_->Driver(*data, assembly_text, test_name);
+  }
+
+ protected:
+  ArenaPool pool_;
+  HGraph* graph_;
+  HParallelMove* moves_;
+  mips::CodeGeneratorMIPS* codegen_;
+  mips::MipsAssembler* assembler_;
+  std::unique_ptr<ArenaAllocator> allocator_;
+  std::unique_ptr<AssemblerTestInfrastructure> test_helper_;
+  std::unique_ptr<const MipsInstructionSetFeatures> isa_features_;
+};
+
+TEST_F(EmitSwapMipsTest, TwoRegisters) {
+  moves_->AddMove(
+      Location::RegisterLocation(4),
+      Location::RegisterLocation(5),
+      Primitive::kPrimInt,
+      nullptr);
+  moves_->AddMove(
+      Location::RegisterLocation(5),
+      Location::RegisterLocation(4),
+      Primitive::kPrimInt,
+      nullptr);
+  const char* expected =
+      "or $t8, $a1, $zero\n"
+      "or $a1, $a0, $zero\n"
+      "or $a0, $t8, $zero\n";
+  DriverWrapper(moves_, expected, "TwoRegisters");
+}
+
+TEST_F(EmitSwapMipsTest, TwoRegisterPairs) {
+  moves_->AddMove(
+      Location::RegisterPairLocation(4, 5),
+      Location::RegisterPairLocation(6, 7),
+      Primitive::kPrimLong,
+      nullptr);
+  moves_->AddMove(
+      Location::RegisterPairLocation(6, 7),
+      Location::RegisterPairLocation(4, 5),
+      Primitive::kPrimLong,
+      nullptr);
+  const char* expected =
+      "or $t8, $a2, $zero\n"
+      "or $a2, $a0, $zero\n"
+      "or $a0, $t8, $zero\n"
+      "or $t8, $a3, $zero\n"
+      "or $a3, $a1, $zero\n"
+      "or $a1, $t8, $zero\n";
+  DriverWrapper(moves_, expected, "TwoRegisterPairs");
+}
+
+TEST_F(EmitSwapMipsTest, TwoFpuRegistersFloat) {
+  moves_->AddMove(
+      Location::FpuRegisterLocation(4),
+      Location::FpuRegisterLocation(6),
+      Primitive::kPrimFloat,
+      nullptr);
+  moves_->AddMove(
+      Location::FpuRegisterLocation(6),
+      Location::FpuRegisterLocation(4),
+      Primitive::kPrimFloat,
+      nullptr);
+  const char* expected =
+      "mov.s $f8, $f6\n"
+      "mov.s $f6, $f4\n"
+      "mov.s $f4, $f8\n";
+  DriverWrapper(moves_, expected, "TwoFpuRegistersFloat");
+}
+
+TEST_F(EmitSwapMipsTest, TwoFpuRegistersDouble) {
+  moves_->AddMove(
+      Location::FpuRegisterLocation(4),
+      Location::FpuRegisterLocation(6),
+      Primitive::kPrimDouble,
+      nullptr);
+  moves_->AddMove(
+      Location::FpuRegisterLocation(6),
+      Location::FpuRegisterLocation(4),
+      Primitive::kPrimDouble,
+      nullptr);
+  const char* expected =
+      "mov.d $f8, $f6\n"
+      "mov.d $f6, $f4\n"
+      "mov.d $f4, $f8\n";
+  DriverWrapper(moves_, expected, "TwoFpuRegistersDouble");
+}
+
+TEST_F(EmitSwapMipsTest, RegisterAndFpuRegister) {
+  moves_->AddMove(
+      Location::RegisterLocation(4),
+      Location::FpuRegisterLocation(6),
+      Primitive::kPrimFloat,
+      nullptr);
+  moves_->AddMove(
+      Location::FpuRegisterLocation(6),
+      Location::RegisterLocation(4),
+      Primitive::kPrimFloat,
+      nullptr);
+  const char* expected =
+      "or $t8, $a0, $zero\n"
+      "mfc1 $a0, $f6\n"
+      "mtc1 $t8, $f6\n";
+  DriverWrapper(moves_, expected, "RegisterAndFpuRegister");
+}
+
+TEST_F(EmitSwapMipsTest, RegisterPairAndFpuRegister) {
+  moves_->AddMove(
+      Location::RegisterPairLocation(4, 5),
+      Location::FpuRegisterLocation(4),
+      Primitive::kPrimDouble,
+      nullptr);
+  moves_->AddMove(
+      Location::FpuRegisterLocation(4),
+      Location::RegisterPairLocation(4, 5),
+      Primitive::kPrimDouble,
+      nullptr);
+  const char* expected =
+      "mfc1 $t8, $f4\n"
+      "mfc1 $at, $f5\n"
+      "mtc1 $a0, $f4\n"
+      "mtc1 $a1, $f5\n"
+      "or $a0, $t8, $zero\n"
+      "or $a1, $at, $zero\n";
+  DriverWrapper(moves_, expected, "RegisterPairAndFpuRegister");
+}
+
+TEST_F(EmitSwapMipsTest, TwoStackSlots) {
+  moves_->AddMove(
+      Location::StackSlot(52),
+      Location::StackSlot(48),
+      Primitive::kPrimInt,
+      nullptr);
+  moves_->AddMove(
+      Location::StackSlot(48),
+      Location::StackSlot(52),
+      Primitive::kPrimInt,
+      nullptr);
+  const char* expected =
+      "addiu $sp, $sp, -4\n"
+      "sw $v0, 0($sp)\n"
+      "lw $v0, 56($sp)\n"
+      "lw $t8, 52($sp)\n"
+      "sw $v0, 52($sp)\n"
+      "sw $t8, 56($sp)\n"
+      "lw $v0, 0($sp)\n"
+      "addiu $sp, $sp, 4\n";
+  DriverWrapper(moves_, expected, "TwoStackSlots");
+}
+
+TEST_F(EmitSwapMipsTest, TwoDoubleStackSlots) {
+  moves_->AddMove(
+      Location::DoubleStackSlot(56),
+      Location::DoubleStackSlot(48),
+      Primitive::kPrimLong,
+      nullptr);
+  moves_->AddMove(
+      Location::DoubleStackSlot(48),
+      Location::DoubleStackSlot(56),
+      Primitive::kPrimLong,
+      nullptr);
+  const char* expected =
+      "addiu $sp, $sp, -4\n"
+      "sw $v0, 0($sp)\n"
+      "lw $v0, 60($sp)\n"
+      "lw $t8, 52($sp)\n"
+      "sw $v0, 52($sp)\n"
+      "sw $t8, 60($sp)\n"
+      "lw $v0, 64($sp)\n"
+      "lw $t8, 56($sp)\n"
+      "sw $v0, 56($sp)\n"
+      "sw $t8, 64($sp)\n"
+      "lw $v0, 0($sp)\n"
+      "addiu $sp, $sp, 4\n";
+  DriverWrapper(moves_, expected, "TwoDoubleStackSlots");
+}
+
+TEST_F(EmitSwapMipsTest, RegisterAndStackSlot) {
+  moves_->AddMove(
+      Location::RegisterLocation(4),
+      Location::StackSlot(48),
+      Primitive::kPrimInt,
+      nullptr);
+  moves_->AddMove(
+      Location::StackSlot(48),
+      Location::RegisterLocation(4),
+      Primitive::kPrimInt,
+      nullptr);
+  const char* expected =
+      "or $t8, $a0, $zero\n"
+      "lw $a0, 48($sp)\n"
+      "sw $t8, 48($sp)\n";
+  DriverWrapper(moves_, expected, "RegisterAndStackSlot");
+}
+
+TEST_F(EmitSwapMipsTest, RegisterPairAndDoubleStackSlot) {
+  moves_->AddMove(
+      Location::RegisterPairLocation(4, 5),
+      Location::DoubleStackSlot(32),
+      Primitive::kPrimLong,
+      nullptr);
+  moves_->AddMove(
+      Location::DoubleStackSlot(32),
+      Location::RegisterPairLocation(4, 5),
+      Primitive::kPrimLong,
+      nullptr);
+  const char* expected =
+      "or $t8, $a0, $zero\n"
+      "lw $a0, 32($sp)\n"
+      "sw $t8, 32($sp)\n"
+      "or $t8, $a1, $zero\n"
+      "lw $a1, 36($sp)\n"
+      "sw $t8, 36($sp)\n";
+  DriverWrapper(moves_, expected, "RegisterPairAndDoubleStackSlot");
+}
+
+TEST_F(EmitSwapMipsTest, FpuRegisterAndStackSlot) {
+  moves_->AddMove(
+      Location::FpuRegisterLocation(4),
+      Location::StackSlot(48),
+      Primitive::kPrimFloat,
+      nullptr);
+  moves_->AddMove(
+      Location::StackSlot(48),
+      Location::FpuRegisterLocation(4),
+      Primitive::kPrimFloat,
+      nullptr);
+  const char* expected =
+      "mov.s $f8, $f4\n"
+      "lwc1 $f4, 48($sp)\n"
+      "swc1 $f8, 48($sp)\n";
+  DriverWrapper(moves_, expected, "FpuRegisterAndStackSlot");
+}
+
+TEST_F(EmitSwapMipsTest, FpuRegisterAndDoubleStackSlot) {
+  moves_->AddMove(
+      Location::FpuRegisterLocation(4),
+      Location::DoubleStackSlot(48),
+      Primitive::kPrimDouble,
+      nullptr);
+  moves_->AddMove(
+      Location::DoubleStackSlot(48),
+      Location::FpuRegisterLocation(4),
+      Primitive::kPrimDouble,
+      nullptr);
+  const char* expected =
+      "mov.d $f8, $f4\n"
+      "ldc1 $f4, 48($sp)\n"
+      "sdc1 $f8, 48($sp)\n";
+  DriverWrapper(moves_, expected, "FpuRegisterAndDoubleStackSlot");
+}
+
+}  // namespace art
diff --git a/compiler/optimizing/escape.cc b/compiler/optimizing/escape.cc
new file mode 100644
index 0000000..9df5bf1
--- /dev/null
+++ b/compiler/optimizing/escape.cc
@@ -0,0 +1,103 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "escape.h"
+
+#include "nodes.h"
+
+namespace art {
+
+void CalculateEscape(HInstruction* reference,
+                     bool (*no_escape)(HInstruction*, HInstruction*),
+                     /*out*/ bool* is_singleton,
+                     /*out*/ bool* is_singleton_and_not_returned,
+                     /*out*/ bool* is_singleton_and_not_deopt_visible) {
+  // For references not allocated in the method, don't assume anything.
+  if (!reference->IsNewInstance() && !reference->IsNewArray()) {
+    *is_singleton = false;
+    *is_singleton_and_not_returned = false;
+    *is_singleton_and_not_deopt_visible = false;
+    return;
+  }
+  // Assume the best until proven otherwise.
+  *is_singleton = true;
+  *is_singleton_and_not_returned = true;
+  *is_singleton_and_not_deopt_visible = true;
+  // Visit all uses to determine if this reference can escape into the heap,
+  // a method call, an alias, etc.
+  for (const HUseListNode<HInstruction*>& use : reference->GetUses()) {
+    HInstruction* user = use.GetUser();
+    if (no_escape != nullptr && (*no_escape)(reference, user)) {
+      // Client supplied analysis says there is no escape.
+      continue;
+    } else if (user->IsBoundType() || user->IsNullCheck()) {
+      // BoundType shouldn't normally be necessary for an allocation. Just be conservative
+      // for the uncommon cases. Similarly, null checks are eventually eliminated for explicit
+      // allocations, but if we see one before it is simplified, assume an alias.
+      *is_singleton = false;
+      *is_singleton_and_not_returned = false;
+      *is_singleton_and_not_deopt_visible = false;
+      return;
+    } else if (user->IsPhi() || user->IsSelect() || user->IsInvoke() ||
+               (user->IsInstanceFieldSet() && (reference == user->InputAt(1))) ||
+               (user->IsUnresolvedInstanceFieldSet() && (reference == user->InputAt(1))) ||
+               (user->IsStaticFieldSet() && (reference == user->InputAt(1))) ||
+               (user->IsUnresolvedStaticFieldSet() && (reference == user->InputAt(0))) ||
+               (user->IsArraySet() && (reference == user->InputAt(2)))) {
+      // The reference is merged to HPhi/HSelect, passed to a callee, or stored to heap.
+      // Hence, the reference is no longer the only name that can refer to its value.
+      *is_singleton = false;
+      *is_singleton_and_not_returned = false;
+      *is_singleton_and_not_deopt_visible = false;
+      return;
+    } else if ((user->IsUnresolvedInstanceFieldGet() && (reference == user->InputAt(0))) ||
+               (user->IsUnresolvedInstanceFieldSet() && (reference == user->InputAt(0)))) {
+      // The field is accessed in an unresolved way. We mark the object as a non-singleton.
+      // Note that we could optimize this case and still perform some optimizations until
+      // we hit the unresolved access, but the conservative assumption is the simplest.
+      *is_singleton = false;
+      *is_singleton_and_not_returned = false;
+      *is_singleton_and_not_deopt_visible = false;
+      return;
+    } else if (user->IsReturn()) {
+      *is_singleton_and_not_returned = false;
+    }
+  }
+
+  // Look at the environment uses if it's for HDeoptimize. Other environment uses are fine,
+  // as long as client optimizations that rely on this information are disabled for debuggable.
+  for (const HUseListNode<HEnvironment*>& use : reference->GetEnvUses()) {
+    HEnvironment* user = use.GetUser();
+    if (user->GetHolder()->IsDeoptimize()) {
+      *is_singleton_and_not_deopt_visible = false;
+      break;
+    }
+  }
+}
+
+bool DoesNotEscape(HInstruction* reference, bool (*no_escape)(HInstruction*, HInstruction*)) {
+  bool is_singleton = false;
+  bool is_singleton_and_not_returned = false;
+  bool is_singleton_and_not_deopt_visible = false;  // not relevant for escape
+  CalculateEscape(reference,
+                  no_escape,
+                  &is_singleton,
+                  &is_singleton_and_not_returned,
+                  &is_singleton_and_not_deopt_visible);
+  return is_singleton_and_not_returned;
+}
+
+}  // namespace art
diff --git a/compiler/optimizing/escape.h b/compiler/optimizing/escape.h
new file mode 100644
index 0000000..75e37b0
--- /dev/null
+++ b/compiler/optimizing/escape.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_ESCAPE_H_
+#define ART_COMPILER_OPTIMIZING_ESCAPE_H_
+
+namespace art {
+
+class HInstruction;
+
+/*
+ * Methods related to escape analysis, i.e. determining whether an object
+ * allocation is visible outside ('escapes') its immediate method context.
+ */
+
+/*
+ * Performs escape analysis on the given instruction, typically a reference to an
+ * allocation. The method assigns true to parameter 'is_singleton' if the reference
+ * is the only name that can refer to its value during the lifetime of the method,
+ * meaning that the reference is not aliased with something else, is not stored to
+ * heap memory, and not passed to another method. In addition, the method assigns
+ * true to parameter 'is_singleton_and_not_returned' if the reference is a singleton
+ * and not returned to the caller and to parameter 'is_singleton_and_not_deopt_visible'
+ * if the reference is a singleton and not used as an environment local of an
+ * HDeoptimize instruction (clients of the final value must run after BCE to ensure
+ * all such instructions have been introduced already).
+ *
+ * Note that being visible to a HDeoptimize instruction does not count for ordinary
+ * escape analysis, since switching between compiled code and interpreted code keeps
+ * non escaping references restricted to the lifetime of the method and the thread
+ * executing it. This property only concerns optimizations that are interested in
+ * escape analysis with respect to the *compiled* code (such as LSE).
+ *
+ * When set, the no_escape function is applied to any use of the allocation instruction
+ * prior to any built-in escape analysis. This allows clients to define better escape
+ * analysis in certain case-specific circumstances. If 'no_escape(reference, user)'
+ * returns true, the user is assumed *not* to cause any escape right away. The return
+ * value false means the client cannot provide a definite answer and built-in escape
+ * analysis is applied to the user instead.
+ */
+void CalculateEscape(HInstruction* reference,
+                     bool (*no_escape)(HInstruction*, HInstruction*),
+                     /*out*/ bool* is_singleton,
+                     /*out*/ bool* is_singleton_and_not_returned,
+                     /*out*/ bool* is_singleton_and_not_deopt_visible);
+
+/*
+ * Convenience method for testing the singleton and not returned properties at once.
+ * Callers should be aware that this method invokes the full analysis at each call.
+ */
+bool DoesNotEscape(HInstruction* reference, bool (*no_escape)(HInstruction*, HInstruction*));
+
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_ESCAPE_H_
diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc
index c8cba20..188ee3a 100644
--- a/compiler/optimizing/graph_checker.cc
+++ b/compiler/optimizing/graph_checker.cc
@@ -23,7 +23,6 @@
 #include "base/arena_containers.h"
 #include "base/bit_vector-inl.h"
 #include "base/stringprintf.h"
-#include "handle_scope-inl.h"
 
 namespace art {
 
@@ -448,7 +447,6 @@
 
   // Ensure that reference type instructions have reference type info.
   if (instruction->GetType() == Primitive::kPrimNot) {
-    ScopedObjectAccess soa(Thread::Current());
     if (!instruction->GetReferenceTypeInfo().IsValid()) {
       AddError(StringPrintf("Reference type instruction %s:%d does not have "
                             "valid reference type information.",
@@ -1011,7 +1009,6 @@
 void GraphChecker::VisitBoundType(HBoundType* instruction) {
   VisitInstruction(instruction);
 
-  ScopedObjectAccess soa(Thread::Current());
   if (!instruction->GetUpperBound().IsValid()) {
     AddError(StringPrintf(
         "%s %d does not have a valid upper bound RTI.",
diff --git a/compiler/optimizing/gvn_test.cc b/compiler/optimizing/gvn_test.cc
index 6abf00e..437d35c 100644
--- a/compiler/optimizing/gvn_test.cc
+++ b/compiler/optimizing/gvn_test.cc
@@ -35,7 +35,7 @@
   graph->AddBlock(entry);
   graph->SetEntryBlock(entry);
   HInstruction* parameter = new (&allocator) HParameterValue(graph->GetDexFile(),
-                                                             0,
+                                                             dex::TypeIndex(0),
                                                              0,
                                                              Primitive::kPrimNot);
   entry->AddInstruction(parameter);
@@ -120,7 +120,7 @@
   graph->AddBlock(entry);
   graph->SetEntryBlock(entry);
   HInstruction* parameter = new (&allocator) HParameterValue(graph->GetDexFile(),
-                                                             0,
+                                                             dex::TypeIndex(0),
                                                              0,
                                                              Primitive::kPrimNot);
   entry->AddInstruction(parameter);
@@ -204,7 +204,7 @@
   graph->SetEntryBlock(entry);
 
   HInstruction* parameter = new (&allocator) HParameterValue(graph->GetDexFile(),
-                                                             0,
+                                                             dex::TypeIndex(0),
                                                              0,
                                                              Primitive::kPrimNot);
   entry->AddInstruction(parameter);
@@ -352,7 +352,7 @@
   inner_loop_exit->AddSuccessor(outer_loop_header);
 
   HInstruction* parameter = new (&allocator) HParameterValue(graph->GetDexFile(),
-                                                             0,
+                                                             dex::TypeIndex(0),
                                                              0,
                                                              Primitive::kPrimBoolean);
   entry->AddInstruction(parameter);
diff --git a/compiler/optimizing/induction_var_analysis_test.cc b/compiler/optimizing/induction_var_analysis_test.cc
index 031f1d7..3425b88 100644
--- a/compiler/optimizing/induction_var_analysis_test.cc
+++ b/compiler/optimizing/induction_var_analysis_test.cc
@@ -80,7 +80,7 @@
 
     // Provide entry and exit instructions.
     parameter_ = new (&allocator_) HParameterValue(
-        graph_->GetDexFile(), 0, 0, Primitive::kPrimNot, true);
+        graph_->GetDexFile(), dex::TypeIndex(0), 0, Primitive::kPrimNot, true);
     entry_->AddInstruction(parameter_);
     constant0_ = graph_->GetIntConstant(0);
     constant1_ = graph_->GetIntConstant(1);
diff --git a/compiler/optimizing/induction_var_range.cc b/compiler/optimizing/induction_var_range.cc
index 7cc8b1e..235793d 100644
--- a/compiler/optimizing/induction_var_range.cc
+++ b/compiler/optimizing/induction_var_range.cc
@@ -58,22 +58,90 @@
 }
 
 /**
- * An upper bound a * (length / a) + b, where a >= 1, can be conservatively rewritten as length + b
- * because length >= 0 is true. This makes it more likely the bound is useful to clients.
+ * Detects an instruction that is >= 0. As long as the value is carried by
+ * a single instruction, arithmetic wrap-around cannot occur.
  */
-static InductionVarRange::Value SimplifyMax(InductionVarRange::Value v) {
-  int64_t value;
-  if (v.is_known &&
-      v.a_constant >= 1 &&
-      v.instruction->IsDiv() &&
-      v.instruction->InputAt(0)->IsArrayLength() &&
-      IsIntAndGet(v.instruction->InputAt(1), &value) && v.a_constant == value) {
-    return InductionVarRange::Value(v.instruction->InputAt(0), 1, v.b_constant);
+static bool IsGEZero(HInstruction* instruction) {
+  DCHECK(instruction != nullptr);
+  if (instruction->IsArrayLength()) {
+    return true;
+  } else if (instruction->IsInvokeStaticOrDirect()) {
+    switch (instruction->AsInvoke()->GetIntrinsic()) {
+      case Intrinsics::kMathMinIntInt:
+      case Intrinsics::kMathMinLongLong:
+        // Instruction MIN(>=0, >=0) is >= 0.
+        return IsGEZero(instruction->InputAt(0)) &&
+               IsGEZero(instruction->InputAt(1));
+      case Intrinsics::kMathAbsInt:
+      case Intrinsics::kMathAbsLong:
+        // Instruction ABS(x) is >= 0.
+        return true;
+      default:
+        break;
+    }
+  }
+  int64_t value = -1;
+  return IsIntAndGet(instruction, &value) && value >= 0;
+}
+
+/** Hunts "under the hood" for a suitable instruction at the hint. */
+static bool IsMaxAtHint(
+    HInstruction* instruction, HInstruction* hint, /*out*/HInstruction** suitable) {
+  if (instruction->IsInvokeStaticOrDirect()) {
+    switch (instruction->AsInvoke()->GetIntrinsic()) {
+      case Intrinsics::kMathMinIntInt:
+      case Intrinsics::kMathMinLongLong:
+        // For MIN(x, y), return most suitable x or y as maximum.
+        return IsMaxAtHint(instruction->InputAt(0), hint, suitable) ||
+               IsMaxAtHint(instruction->InputAt(1), hint, suitable);
+      default:
+        break;
+    }
+  } else {
+    *suitable = instruction;
+    while (instruction->IsArrayLength() ||
+           instruction->IsNullCheck() ||
+           instruction->IsNewArray()) {
+      instruction = instruction->InputAt(0);
+    }
+    return instruction == hint;
+  }
+  return false;
+}
+
+/** Post-analysis simplification of a minimum value that makes the bound more useful to clients. */
+static InductionVarRange::Value SimplifyMin(InductionVarRange::Value v) {
+  if (v.is_known && v.a_constant == 1 && v.b_constant <= 0) {
+    // If a == 1,  instruction >= 0 and b <= 0, just return the constant b.
+    // No arithmetic wrap-around can occur.
+    if (IsGEZero(v.instruction)) {
+      return InductionVarRange::Value(v.b_constant);
+    }
   }
   return v;
 }
 
-/** Helper method to test for a constant value. */
+/** Post-analysis simplification of a maximum value that makes the bound more useful to clients. */
+static InductionVarRange::Value SimplifyMax(InductionVarRange::Value v, HInstruction* hint) {
+  if (v.is_known && v.a_constant >= 1) {
+    // An upper bound a * (length / a) + b, where a >= 1, can be conservatively rewritten as
+    // length + b because length >= 0 is true.
+    int64_t value;
+    if (v.instruction->IsDiv() &&
+        v.instruction->InputAt(0)->IsArrayLength() &&
+        IsIntAndGet(v.instruction->InputAt(1), &value) && v.a_constant == value) {
+      return InductionVarRange::Value(v.instruction->InputAt(0), 1, v.b_constant);
+    }
+    // If a == 1, the most suitable one suffices as maximum value.
+    HInstruction* suitable = nullptr;
+    if (v.a_constant == 1 && IsMaxAtHint(v.instruction, hint, &suitable)) {
+      return InductionVarRange::Value(suitable, 1, v.b_constant);
+    }
+  }
+  return v;
+}
+
+/** Tests for a constant value. */
 static bool IsConstantValue(InductionVarRange::Value v) {
   return v.is_known && v.a_constant == 0;
 }
@@ -97,7 +165,7 @@
   }
 }
 
-/** Helper method to insert an instruction. */
+/** Inserts an instruction. */
 static HInstruction* Insert(HBasicBlock* block, HInstruction* instruction) {
   DCHECK(block != nullptr);
   DCHECK(block->GetLastInstruction() != nullptr) << block->GetBlockId();
@@ -106,7 +174,7 @@
   return instruction;
 }
 
-/** Helper method to obtain loop's control instruction. */
+/** Obtains loop's control instruction. */
 static HInstruction* GetLoopControl(HLoopInformation* loop) {
   DCHECK(loop != nullptr);
   return loop->GetHeader()->GetLastInstruction();
@@ -150,9 +218,14 @@
   chase_hint_ = chase_hint;
   bool in_body = context->GetBlock() != loop->GetHeader();
   int64_t stride_value = 0;
-  *min_val = GetVal(info, trip, in_body, /* is_min */ true);
-  *max_val = SimplifyMax(GetVal(info, trip, in_body, /* is_min */ false));
+  *min_val = SimplifyMin(GetVal(info, trip, in_body, /* is_min */ true));
+  *max_val = SimplifyMax(GetVal(info, trip, in_body, /* is_min */ false), chase_hint);
   *needs_finite_test = NeedsTripCount(info, &stride_value) && IsUnsafeTripCount(trip);
+  chase_hint_ = nullptr;
+  // Retry chasing constants for wrap-around (merge sensitive).
+  if (!min_val->is_known && info->induction_class == HInductionVarAnalysis::kWrapAround) {
+    *min_val = SimplifyMin(GetVal(info, trip, in_body, /* is_min */ true));
+  }
   return true;
 }
 
@@ -175,7 +248,7 @@
                                   needs_taken_test)
       && (stride_value == -1 ||
           stride_value == 0 ||
-          stride_value == 1);  // avoid wrap-around anomalies.
+          stride_value == 1);  // avoid arithmetic wrap-around anomalies.
 }
 
 void InductionVarRange::GenerateRange(HInstruction* context,
@@ -302,7 +375,8 @@
         return true;
       }
     }
-    // Try range analysis on the invariant, but only on proper range to avoid wrap-around anomalies.
+    // Try range analysis on the invariant, only accept a proper range
+    // to avoid arithmetic wrap-around anomalies.
     Value min_val = GetVal(info, nullptr, /* in_body */ true, /* is_min */ true);
     Value max_val = GetVal(info, nullptr, /* in_body */ true, /* is_min */ false);
     if (IsConstantValue(min_val) &&
@@ -450,25 +524,26 @@
                                                      HInductionVarAnalysis::InductionInfo* trip,
                                                      bool in_body,
                                                      bool is_min) const {
-  // Stop chasing the instruction at constant or hint.
-  int64_t value;
-  if (IsIntAndGet(instruction, &value) && CanLongValueFitIntoInt(value)) {
-    return Value(static_cast<int32_t>(value));
-  } else if (instruction == chase_hint_) {
-    return Value(instruction, 1, 0);
-  }
-  // Special cases when encountering a single instruction that denotes trip count in the
-  // loop-body: min is 1 and, when chasing constants, max of safe trip-count is max int
-  if (in_body && trip != nullptr && instruction == trip->op_a->fetch) {
+  // Special case when chasing constants: single instruction that denotes trip count in the
+  // loop-body is minimal 1 and maximal, with safe trip-count, max int,
+  if (chase_hint_ == nullptr && in_body && trip != nullptr && instruction == trip->op_a->fetch) {
     if (is_min) {
       return Value(1);
-    } else if (chase_hint_ == nullptr && !IsUnsafeTripCount(trip)) {
+    } else if (!IsUnsafeTripCount(trip)) {
       return Value(std::numeric_limits<int32_t>::max());
     }
   }
-  // Chase the instruction a bit deeper into the HIR tree, so that it becomes more likely
-  // range analysis will compare the same instructions as terminal nodes.
-  if (instruction->IsAdd()) {
+  // Unless at a constant or hint, chase the instruction a bit deeper into the HIR tree, so that
+  // it becomes more likely range analysis will compare the same instructions as terminal nodes.
+  int64_t value;
+  if (IsIntAndGet(instruction, &value) && CanLongValueFitIntoInt(value)) {
+    // Proper constant reveals best information.
+    return Value(static_cast<int32_t>(value));
+  } else if (instruction == chase_hint_) {
+    // At hint, fetch is represented by itself.
+    return Value(instruction, 1, 0);
+  } else if (instruction->IsAdd()) {
+    // Incorporate suitable constants in the chased value.
     if (IsIntAndGet(instruction->InputAt(0), &value) && CanLongValueFitIntoInt(value)) {
       return AddValue(Value(static_cast<int32_t>(value)),
                       GetFetch(instruction->InputAt(1), trip, in_body, is_min));
@@ -477,14 +552,14 @@
                       Value(static_cast<int32_t>(value)));
     }
   } else if (instruction->IsArrayLength()) {
-    // Return extreme values when chasing constants. Otherwise, chase deeper.
+    // Exploit length properties when chasing constants or chase into a new array declaration.
     if (chase_hint_ == nullptr) {
       return is_min ? Value(0) : Value(std::numeric_limits<int32_t>::max());
     } else if (instruction->InputAt(0)->IsNewArray()) {
       return GetFetch(instruction->InputAt(0)->InputAt(0), trip, in_body, is_min);
     }
   } else if (instruction->IsTypeConversion()) {
-    // Since analysis is 32-bit (or narrower) we allow a widening along the path.
+    // Since analysis is 32-bit (or narrower), chase beyond widening along the path.
     if (instruction->AsTypeConversion()->GetInputType() == Primitive::kPrimInt &&
         instruction->AsTypeConversion()->GetResultType() == Primitive::kPrimLong) {
       return GetFetch(instruction->InputAt(0), trip, in_body, is_min);
@@ -506,6 +581,7 @@
       !IsUnsafeTripCount(next_trip)) {
     return GetVal(next_info, next_trip, next_in_body, is_min);
   }
+  // Fetch is represented by itself.
   return Value(instruction, 1, 0);
 }
 
@@ -870,10 +946,11 @@
     HInstruction* opb = nullptr;
     switch (info->induction_class) {
       case HInductionVarAnalysis::kInvariant:
-        // Invariants.
+        // Invariants (note that even though is_min does not impact code generation for
+        // invariants, some effort is made to keep this parameter consistent).
         switch (info->operation) {
           case HInductionVarAnalysis::kAdd:
-          case HInductionVarAnalysis::kXor:
+          case HInductionVarAnalysis::kXor:  // no proper is_min for second arg
           case HInductionVarAnalysis::kLT:
           case HInductionVarAnalysis::kLE:
           case HInductionVarAnalysis::kGT:
diff --git a/compiler/optimizing/induction_var_range_test.cc b/compiler/optimizing/induction_var_range_test.cc
index 8bbdd4a..4c99e3c 100644
--- a/compiler/optimizing/induction_var_range_test.cc
+++ b/compiler/optimizing/induction_var_range_test.cc
@@ -62,9 +62,15 @@
     graph_->SetEntryBlock(entry_block_);
     graph_->SetExitBlock(exit_block_);
     // Two parameters.
-    x_ = new (&allocator_) HParameterValue(graph_->GetDexFile(), 0, 0, Primitive::kPrimInt);
+    x_ = new (&allocator_) HParameterValue(graph_->GetDexFile(),
+                                           dex::TypeIndex(0),
+                                           0,
+                                           Primitive::kPrimInt);
     entry_block_->AddInstruction(x_);
-    y_ = new (&allocator_) HParameterValue(graph_->GetDexFile(), 0, 0, Primitive::kPrimInt);
+    y_ = new (&allocator_) HParameterValue(graph_->GetDexFile(),
+                                           dex::TypeIndex(0),
+                                           0,
+                                           Primitive::kPrimInt);
     entry_block_->AddInstruction(y_);
     // Set arbitrary range analysis hint while testing private methods.
     SetHint(x_);
@@ -572,7 +578,8 @@
   HInstruction* new_array = new (&allocator_)
       HNewArray(x_,
                 graph_->GetCurrentMethod(),
-                0, Primitive::kPrimInt,
+                0,
+                dex::TypeIndex(Primitive::kPrimInt),
                 graph_->GetDexFile(),
                 kQuickAllocArray);
   entry_block_->AddInstruction(new_array);
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index cc420b3..8d93867 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -197,15 +197,15 @@
   }
 }
 
-static uint32_t FindClassIndexIn(mirror::Class* cls,
-                                 const DexFile& dex_file,
-                                 Handle<mirror::DexCache> dex_cache)
+static dex::TypeIndex FindClassIndexIn(mirror::Class* cls,
+                                       const DexFile& dex_file,
+                                       Handle<mirror::DexCache> dex_cache)
     REQUIRES_SHARED(Locks::mutator_lock_) {
-  uint32_t index = DexFile::kDexNoIndex;
+  dex::TypeIndex index;
   if (cls->GetDexCache() == nullptr) {
     DCHECK(cls->IsArrayClass()) << cls->PrettyClass();
     index = cls->FindTypeIndexInOtherDexFile(dex_file);
-  } else if (cls->GetDexTypeIndex() == DexFile::kDexNoIndex16) {
+  } else if (!cls->GetDexTypeIndex().IsValid()) {
     DCHECK(cls->IsProxyClass()) << cls->PrettyClass();
     // TODO: deal with proxy classes.
   } else if (IsSameDexFile(cls->GetDexFile(), dex_file)) {
@@ -223,8 +223,8 @@
     // We cannot guarantee the entry in the dex cache will resolve to the same class,
     // as there may be different class loaders. So only return the index if it's
     // the right class in the dex cache already.
-    if (index != DexFile::kDexNoIndex && dex_cache->GetResolvedType(index) != cls) {
-      index = DexFile::kDexNoIndex;
+    if (index.IsValid() && dex_cache->GetResolvedType(index) != cls) {
+      index = dex::TypeIndex::Invalid();
     }
   }
 
@@ -258,6 +258,55 @@
   ProfilingInfo* const profiling_info_;
 };
 
+static bool IsMonomorphic(Handle<mirror::ObjectArray<mirror::Class>> classes)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  DCHECK_GE(InlineCache::kIndividualCacheSize, 2);
+  return classes->Get(0) != nullptr && classes->Get(1) == nullptr;
+}
+
+static bool IsMegamorphic(Handle<mirror::ObjectArray<mirror::Class>> classes)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  for (size_t i = 0; i < InlineCache::kIndividualCacheSize; ++i) {
+    if (classes->Get(i) == nullptr) {
+      return false;
+    }
+  }
+  return true;
+}
+
+static mirror::Class* GetMonomorphicType(Handle<mirror::ObjectArray<mirror::Class>> classes)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  DCHECK(classes->Get(0) != nullptr);
+  return classes->Get(0);
+}
+
+static bool IsUninitialized(Handle<mirror::ObjectArray<mirror::Class>> classes)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  return classes->Get(0) == nullptr;
+}
+
+static bool IsPolymorphic(Handle<mirror::ObjectArray<mirror::Class>> classes)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  DCHECK_GE(InlineCache::kIndividualCacheSize, 3);
+  return classes->Get(1) != nullptr &&
+      classes->Get(InlineCache::kIndividualCacheSize - 1) == nullptr;
+}
+
+ArtMethod* HInliner::TryCHADevirtualization(ArtMethod* resolved_method) {
+  if (!resolved_method->HasSingleImplementation()) {
+    return nullptr;
+  }
+  if (Runtime::Current()->IsAotCompiler()) {
+    // No CHA-based devirtulization for AOT compiler (yet).
+    return nullptr;
+  }
+  if (outermost_graph_->IsCompilingOsr()) {
+    // We do not support HDeoptimize in OSR methods.
+    return nullptr;
+  }
+  return resolved_method->GetSingleImplementation();
+}
+
 bool HInliner::TryInline(HInvoke* invoke_instruction) {
   if (invoke_instruction->IsInvokeUnresolved()) {
     return false;  // Don't bother to move further if we know the method is unresolved.
@@ -283,10 +332,29 @@
     actual_method = FindVirtualOrInterfaceTarget(invoke_instruction, resolved_method);
   }
 
+  bool cha_devirtualize = false;
+  if (actual_method == nullptr) {
+    ArtMethod* method = TryCHADevirtualization(resolved_method);
+    if (method != nullptr) {
+      cha_devirtualize = true;
+      actual_method = method;
+    }
+  }
+
   if (actual_method != nullptr) {
-    bool result = TryInlineAndReplace(invoke_instruction, actual_method, /* do_rtp */ true);
+    bool result = TryInlineAndReplace(invoke_instruction,
+                                      actual_method,
+                                      /* do_rtp */ true,
+                                      cha_devirtualize);
     if (result && !invoke_instruction->IsInvokeStaticOrDirect()) {
-      MaybeRecordStat(kInlinedInvokeVirtualOrInterface);
+      if (cha_devirtualize) {
+        // Add dependency due to devirtulization. We've assumed resolved_method
+        // has single implementation.
+        outermost_graph_->AddCHASingleImplementationDependency(resolved_method);
+        MaybeRecordStat(kCHAInline);
+      } else {
+        MaybeRecordStat(kInlinedInvokeVirtualOrInterface);
+      }
     }
     return result;
   }
@@ -301,31 +369,48 @@
     ScopedProfilingInfoInlineUse spiis(caller, soa.Self());
     ProfilingInfo* profiling_info = spiis.GetProfilingInfo();
     if (profiling_info != nullptr) {
-      const InlineCache& ic = *profiling_info->GetInlineCache(invoke_instruction->GetDexPc());
-      if (ic.IsUninitialized()) {
-        VLOG(compiler) << "Interface or virtual call to "
-                       << caller_dex_file.PrettyMethod(method_index)
-                       << " is not hit and not inlined";
+      StackHandleScope<1> hs(soa.Self());
+      ClassLinker* class_linker = caller_compilation_unit_.GetClassLinker();
+      Handle<mirror::ObjectArray<mirror::Class>> inline_cache = hs.NewHandle(
+          mirror::ObjectArray<mirror::Class>::Alloc(
+              soa.Self(),
+              class_linker->GetClassRoot(ClassLinker::kClassArrayClass),
+              InlineCache::kIndividualCacheSize));
+      if (inline_cache.Get() == nullptr) {
+        // We got an OOME. Just clear the exception, and don't inline.
+        DCHECK(soa.Self()->IsExceptionPending());
+        soa.Self()->ClearException();
+        VLOG(compiler) << "Out of memory in the compiler when trying to inline";
         return false;
-      } else if (ic.IsMonomorphic()) {
-        MaybeRecordStat(kMonomorphicCall);
-        if (outermost_graph_->IsCompilingOsr()) {
-          // If we are compiling OSR, we pretend this call is polymorphic, as we may come from the
-          // interpreter and it may have seen different receiver types.
-          return TryInlinePolymorphicCall(invoke_instruction, resolved_method, ic);
-        } else {
-          return TryInlineMonomorphicCall(invoke_instruction, resolved_method, ic);
-        }
-      } else if (ic.IsPolymorphic()) {
-        MaybeRecordStat(kPolymorphicCall);
-        return TryInlinePolymorphicCall(invoke_instruction, resolved_method, ic);
       } else {
-        DCHECK(ic.IsMegamorphic());
-        VLOG(compiler) << "Interface or virtual call to "
-                       << caller_dex_file.PrettyMethod(method_index)
-                       << " is megamorphic and not inlined";
-        MaybeRecordStat(kMegamorphicCall);
-        return false;
+        Runtime::Current()->GetJit()->GetCodeCache()->CopyInlineCacheInto(
+            *profiling_info->GetInlineCache(invoke_instruction->GetDexPc()),
+            inline_cache);
+        if (IsUninitialized(inline_cache)) {
+          VLOG(compiler) << "Interface or virtual call to "
+                         << caller_dex_file.PrettyMethod(method_index)
+                         << " is not hit and not inlined";
+          return false;
+        } else if (IsMonomorphic(inline_cache)) {
+          MaybeRecordStat(kMonomorphicCall);
+          if (outermost_graph_->IsCompilingOsr()) {
+            // If we are compiling OSR, we pretend this call is polymorphic, as we may come from the
+            // interpreter and it may have seen different receiver types.
+            return TryInlinePolymorphicCall(invoke_instruction, resolved_method, inline_cache);
+          } else {
+            return TryInlineMonomorphicCall(invoke_instruction, resolved_method, inline_cache);
+          }
+        } else if (IsPolymorphic(inline_cache)) {
+          MaybeRecordStat(kPolymorphicCall);
+          return TryInlinePolymorphicCall(invoke_instruction, resolved_method, inline_cache);
+        } else {
+          DCHECK(IsMegamorphic(inline_cache));
+          VLOG(compiler) << "Interface or virtual call to "
+                         << caller_dex_file.PrettyMethod(method_index)
+                         << " is megamorphic and not inlined";
+          MaybeRecordStat(kMegamorphicCall);
+          return false;
+        }
       }
     }
   }
@@ -358,14 +443,14 @@
 
 bool HInliner::TryInlineMonomorphicCall(HInvoke* invoke_instruction,
                                         ArtMethod* resolved_method,
-                                        const InlineCache& ic) {
+                                        Handle<mirror::ObjectArray<mirror::Class>> classes) {
   DCHECK(invoke_instruction->IsInvokeVirtual() || invoke_instruction->IsInvokeInterface())
       << invoke_instruction->DebugName();
 
   const DexFile& caller_dex_file = *caller_compilation_unit_.GetDexFile();
-  uint32_t class_index = FindClassIndexIn(
-      ic.GetMonomorphicType(), caller_dex_file, caller_compilation_unit_.GetDexCache());
-  if (class_index == DexFile::kDexNoIndex) {
+  dex::TypeIndex class_index = FindClassIndexIn(
+      GetMonomorphicType(classes), caller_dex_file, caller_compilation_unit_.GetDexCache());
+  if (!class_index.IsValid()) {
     VLOG(compiler) << "Call to " << ArtMethod::PrettyMethod(resolved_method)
                    << " from inline cache is not inlined because its class is not"
                    << " accessible to the caller";
@@ -375,11 +460,11 @@
   ClassLinker* class_linker = caller_compilation_unit_.GetClassLinker();
   PointerSize pointer_size = class_linker->GetImagePointerSize();
   if (invoke_instruction->IsInvokeInterface()) {
-    resolved_method = ic.GetMonomorphicType()->FindVirtualMethodForInterface(
+    resolved_method = GetMonomorphicType(classes)->FindVirtualMethodForInterface(
         resolved_method, pointer_size);
   } else {
     DCHECK(invoke_instruction->IsInvokeVirtual());
-    resolved_method = ic.GetMonomorphicType()->FindVirtualMethodForVirtual(
+    resolved_method = GetMonomorphicType(classes)->FindVirtualMethodForVirtual(
         resolved_method, pointer_size);
   }
   DCHECK(resolved_method != nullptr);
@@ -387,13 +472,16 @@
   HInstruction* cursor = invoke_instruction->GetPrevious();
   HBasicBlock* bb_cursor = invoke_instruction->GetBlock();
 
-  if (!TryInlineAndReplace(invoke_instruction, resolved_method, /* do_rtp */ false)) {
+  if (!TryInlineAndReplace(invoke_instruction,
+                           resolved_method,
+                           /* do_rtp */ false,
+                           /* cha_devirtualize */ false)) {
     return false;
   }
 
   // We successfully inlined, now add a guard.
   bool is_referrer =
-      (ic.GetMonomorphicType() == outermost_graph_->GetArtMethod()->GetDeclaringClass());
+      (GetMonomorphicType(classes) == outermost_graph_->GetArtMethod()->GetDeclaringClass());
   AddTypeGuard(receiver,
                cursor,
                bb_cursor,
@@ -414,10 +502,29 @@
   return true;
 }
 
+void HInliner::AddCHAGuard(HInstruction* invoke_instruction,
+                           uint32_t dex_pc,
+                           HInstruction* cursor,
+                           HBasicBlock* bb_cursor) {
+  HInstruction* deopt_flag = new (graph_->GetArena()) HShouldDeoptimizeFlag(dex_pc);
+  HInstruction* should_deopt = new (graph_->GetArena()) HNotEqual(
+      deopt_flag, graph_->GetIntConstant(0, dex_pc));
+  HInstruction* deopt = new (graph_->GetArena()) HDeoptimize(should_deopt, dex_pc);
+
+  if (cursor != nullptr) {
+    bb_cursor->InsertInstructionAfter(deopt_flag, cursor);
+  } else {
+    bb_cursor->InsertInstructionBefore(deopt_flag, bb_cursor->GetFirstInstruction());
+  }
+  bb_cursor->InsertInstructionAfter(should_deopt, deopt_flag);
+  bb_cursor->InsertInstructionAfter(deopt, should_deopt);
+  deopt->CopyEnvironmentFrom(invoke_instruction->GetEnvironment());
+}
+
 HInstruction* HInliner::AddTypeGuard(HInstruction* receiver,
                                      HInstruction* cursor,
                                      HBasicBlock* bb_cursor,
-                                     uint32_t class_index,
+                                     dex::TypeIndex class_index,
                                      bool is_referrer,
                                      HInstruction* invoke_instruction,
                                      bool with_deoptimization) {
@@ -457,11 +564,11 @@
 
 bool HInliner::TryInlinePolymorphicCall(HInvoke* invoke_instruction,
                                         ArtMethod* resolved_method,
-                                        const InlineCache& ic) {
+                                        Handle<mirror::ObjectArray<mirror::Class>> classes) {
   DCHECK(invoke_instruction->IsInvokeVirtual() || invoke_instruction->IsInvokeInterface())
       << invoke_instruction->DebugName();
 
-  if (TryInlinePolymorphicCallToSameTarget(invoke_instruction, resolved_method, ic)) {
+  if (TryInlinePolymorphicCallToSameTarget(invoke_instruction, resolved_method, classes)) {
     return true;
   }
 
@@ -472,16 +579,16 @@
   bool all_targets_inlined = true;
   bool one_target_inlined = false;
   for (size_t i = 0; i < InlineCache::kIndividualCacheSize; ++i) {
-    if (ic.GetTypeAt(i) == nullptr) {
+    if (classes->Get(i) == nullptr) {
       break;
     }
     ArtMethod* method = nullptr;
     if (invoke_instruction->IsInvokeInterface()) {
-      method = ic.GetTypeAt(i)->FindVirtualMethodForInterface(
+      method = classes->Get(i)->FindVirtualMethodForInterface(
           resolved_method, pointer_size);
     } else {
       DCHECK(invoke_instruction->IsInvokeVirtual());
-      method = ic.GetTypeAt(i)->FindVirtualMethodForVirtual(
+      method = classes->Get(i)->FindVirtualMethodForVirtual(
           resolved_method, pointer_size);
     }
 
@@ -489,21 +596,21 @@
     HInstruction* cursor = invoke_instruction->GetPrevious();
     HBasicBlock* bb_cursor = invoke_instruction->GetBlock();
 
-    uint32_t class_index = FindClassIndexIn(
-        ic.GetTypeAt(i), caller_dex_file, caller_compilation_unit_.GetDexCache());
+    dex::TypeIndex class_index = FindClassIndexIn(
+        classes->Get(i), caller_dex_file, caller_compilation_unit_.GetDexCache());
     HInstruction* return_replacement = nullptr;
-    if (class_index == DexFile::kDexNoIndex ||
+    if (!class_index.IsValid() ||
         !TryBuildAndInline(invoke_instruction, method, &return_replacement)) {
       all_targets_inlined = false;
     } else {
       one_target_inlined = true;
-      bool is_referrer = (ic.GetTypeAt(i) == outermost_graph_->GetArtMethod()->GetDeclaringClass());
+      bool is_referrer = (classes->Get(i) == outermost_graph_->GetArtMethod()->GetDeclaringClass());
 
       // If we have inlined all targets before, and this receiver is the last seen,
       // we deoptimize instead of keeping the original invoke instruction.
       bool deoptimize = all_targets_inlined &&
           (i != InlineCache::kIndividualCacheSize - 1) &&
-          (ic.GetTypeAt(i + 1) == nullptr);
+          (classes->Get(i + 1) == nullptr);
 
       if (outermost_graph_->IsCompilingOsr()) {
         // We do not support HDeoptimize in OSR methods.
@@ -618,9 +725,10 @@
       merge, original_invoke_block, /* replace_if_back_edge */ true);
 }
 
-bool HInliner::TryInlinePolymorphicCallToSameTarget(HInvoke* invoke_instruction,
-                                                    ArtMethod* resolved_method,
-                                                    const InlineCache& ic) {
+bool HInliner::TryInlinePolymorphicCallToSameTarget(
+    HInvoke* invoke_instruction,
+    ArtMethod* resolved_method,
+    Handle<mirror::ObjectArray<mirror::Class>> classes) {
   // This optimization only works under JIT for now.
   DCHECK(Runtime::Current()->UseJitCompilation());
   if (graph_->GetInstructionSet() == kMips64) {
@@ -639,12 +747,12 @@
   // Check whether we are actually calling the same method among
   // the different types seen.
   for (size_t i = 0; i < InlineCache::kIndividualCacheSize; ++i) {
-    if (ic.GetTypeAt(i) == nullptr) {
+    if (classes->Get(i) == nullptr) {
       break;
     }
     ArtMethod* new_method = nullptr;
     if (invoke_instruction->IsInvokeInterface()) {
-      new_method = ic.GetTypeAt(i)->GetImt(pointer_size)->Get(
+      new_method = classes->Get(i)->GetImt(pointer_size)->Get(
           method_index, pointer_size);
       if (new_method->IsRuntimeMethod()) {
         // Bail out as soon as we see a conflict trampoline in one of the target's
@@ -653,7 +761,7 @@
       }
     } else {
       DCHECK(invoke_instruction->IsInvokeVirtual());
-      new_method = ic.GetTypeAt(i)->GetEmbeddedVTableEntry(method_index, pointer_size);
+      new_method = classes->Get(i)->GetEmbeddedVTableEntry(method_index, pointer_size);
     }
     DCHECK(new_method != nullptr);
     if (actual_method == nullptr) {
@@ -735,8 +843,14 @@
   return true;
 }
 
-bool HInliner::TryInlineAndReplace(HInvoke* invoke_instruction, ArtMethod* method, bool do_rtp) {
+bool HInliner::TryInlineAndReplace(HInvoke* invoke_instruction,
+                                   ArtMethod* method,
+                                   bool do_rtp,
+                                   bool cha_devirtualize) {
   HInstruction* return_replacement = nullptr;
+  uint32_t dex_pc = invoke_instruction->GetDexPc();
+  HInstruction* cursor = invoke_instruction->GetPrevious();
+  HBasicBlock* bb_cursor = invoke_instruction->GetBlock();
   if (!TryBuildAndInline(invoke_instruction, method, &return_replacement)) {
     if (invoke_instruction->IsInvokeInterface()) {
       // Turn an invoke-interface into an invoke-virtual. An invoke-virtual is always
@@ -774,6 +888,9 @@
       return false;
     }
   }
+  if (cha_devirtualize) {
+    AddCHAGuard(invoke_instruction, dex_pc, cursor, bb_cursor);
+  }
   if (return_replacement != nullptr) {
     invoke_instruction->ReplaceWith(return_replacement);
   }
@@ -1226,12 +1343,22 @@
 
   // Skip the entry block, it does not contain instructions that prevent inlining.
   for (HBasicBlock* block : callee_graph->GetReversePostOrderSkipEntryBlock()) {
-    if (block->IsLoopHeader() && block->GetLoopInformation()->IsIrreducible()) {
-      // Don't inline methods with irreducible loops, they could prevent some
-      // optimizations to run.
-      VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index)
-                     << " could not be inlined because it contains an irreducible loop";
-      return false;
+    if (block->IsLoopHeader()) {
+      if (block->GetLoopInformation()->IsIrreducible()) {
+        // Don't inline methods with irreducible loops, they could prevent some
+        // optimizations to run.
+        VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index)
+                       << " could not be inlined because it contains an irreducible loop";
+        return false;
+      }
+      if (!block->GetLoopInformation()->HasExitEdge()) {
+        // Don't inline methods with loops without exit, since they cause the
+        // loop information to be computed incorrectly when updating after
+        // inlining.
+        VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index)
+                       << " could not be inlined because it contains a loop with no exit";
+        return false;
+      }
     }
 
     for (HInstructionIterator instr_it(block->GetInstructions());
@@ -1315,8 +1442,8 @@
                                   const DexCompilationUnit& dex_compilation_unit) {
   // Note: if the outermost_graph_ is being compiled OSR, we should not run any
   // optimization that could lead to a HDeoptimize. The following optimizations do not.
-  HDeadCodeElimination dce(callee_graph, stats_);
-  HConstantFolding fold(callee_graph);
+  HDeadCodeElimination dce(callee_graph, stats_, "dead_code_elimination$inliner");
+  HConstantFolding fold(callee_graph, "constant_folding$inliner");
   HSharpening sharpening(callee_graph, codegen_, dex_compilation_unit, compiler_driver_);
   InstructionSimplifier simplify(callee_graph, stats_);
   IntrinsicsRecognizer intrinsics(callee_graph, stats_);
diff --git a/compiler/optimizing/inliner.h b/compiler/optimizing/inliner.h
index a1dcd58..ffebd97 100644
--- a/compiler/optimizing/inliner.h
+++ b/compiler/optimizing/inliner.h
@@ -17,6 +17,7 @@
 #ifndef ART_COMPILER_OPTIMIZING_INLINER_H_
 #define ART_COMPILER_OPTIMIZING_INLINER_H_
 
+#include "dex_file_types.h"
 #include "invoke_type.h"
 #include "optimization.h"
 
@@ -27,7 +28,6 @@
 class DexCompilationUnit;
 class HGraph;
 class HInvoke;
-class InlineCache;
 class OptimizingCompilerStats;
 
 class HInliner : public HOptimization {
@@ -62,8 +62,12 @@
 
   // Try to inline `resolved_method` in place of `invoke_instruction`. `do_rtp` is whether
   // reference type propagation can run after the inlining. If the inlining is successful, this
-  // method will replace and remove the `invoke_instruction`.
-  bool TryInlineAndReplace(HInvoke* invoke_instruction, ArtMethod* resolved_method, bool do_rtp)
+  // method will replace and remove the `invoke_instruction`. If `cha_devirtualize` is true,
+  // a CHA guard needs to be added for the inlining.
+  bool TryInlineAndReplace(HInvoke* invoke_instruction,
+                           ArtMethod* resolved_method,
+                           bool do_rtp,
+                           bool cha_devirtualize)
     REQUIRES_SHARED(Locks::mutator_lock_);
 
   bool TryBuildAndInline(HInvoke* invoke_instruction,
@@ -104,20 +108,32 @@
   // ... // inlined code
   bool TryInlineMonomorphicCall(HInvoke* invoke_instruction,
                                 ArtMethod* resolved_method,
-                                const InlineCache& ic)
+                                Handle<mirror::ObjectArray<mirror::Class>> classes)
     REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Try to inline targets of a polymorphic call.
   bool TryInlinePolymorphicCall(HInvoke* invoke_instruction,
                                 ArtMethod* resolved_method,
-                                const InlineCache& ic)
+                                Handle<mirror::ObjectArray<mirror::Class>> classes)
     REQUIRES_SHARED(Locks::mutator_lock_);
 
   bool TryInlinePolymorphicCallToSameTarget(HInvoke* invoke_instruction,
                                             ArtMethod* resolved_method,
-                                            const InlineCache& ic)
+                                            Handle<mirror::ObjectArray<mirror::Class>> classes)
     REQUIRES_SHARED(Locks::mutator_lock_);
 
+  // Try CHA-based devirtualization to change virtual method calls into
+  // direct calls.
+  // Returns the actual method that resolved_method can be devirtualized to.
+  ArtMethod* TryCHADevirtualization(ArtMethod* resolved_method)
+    REQUIRES_SHARED(Locks::mutator_lock_);
+
+  // Add a CHA guard for a CHA-based devirtualized call. A CHA guard checks a
+  // should_deoptimize flag and if it's true, does deoptimization.
+  void AddCHAGuard(HInstruction* invoke_instruction,
+                   uint32_t dex_pc,
+                   HInstruction* cursor,
+                   HBasicBlock* bb_cursor);
 
   HInstanceFieldGet* BuildGetReceiverClass(ClassLinker* class_linker,
                                            HInstruction* receiver,
@@ -150,7 +166,7 @@
   HInstruction* AddTypeGuard(HInstruction* receiver,
                              HInstruction* cursor,
                              HBasicBlock* bb_cursor,
-                             uint32_t class_index,
+                             dex::TypeIndex class_index,
                              bool is_referrer,
                              HInstruction* invoke_instruction,
                              bool with_deoptimization)
diff --git a/compiler/optimizing/instruction_builder.cc b/compiler/optimizing/instruction_builder.cc
index c8c4ca7..b97581b 100644
--- a/compiler/optimizing/instruction_builder.cc
+++ b/compiler/optimizing/instruction_builder.cc
@@ -908,7 +908,7 @@
                       false /* is_unresolved */);
 }
 
-bool HInstructionBuilder::BuildNewInstance(uint16_t type_index, uint32_t dex_pc) {
+bool HInstructionBuilder::BuildNewInstance(dex::TypeIndex type_index, uint32_t dex_pc) {
   ScopedObjectAccess soa(Thread::Current());
   StackHandleScope<1> hs(soa.Self());
   Handle<mirror::DexCache> dex_cache = dex_compilation_unit_->GetDexCache();
@@ -1004,7 +1004,7 @@
   Handle<mirror::Class> resolved_method_class(hs.NewHandle(resolved_method->GetDeclaringClass()));
 
   // The index at which the method's class is stored in the DexCache's type array.
-  uint32_t storage_index = DexFile::kDexNoIndex;
+  dex::TypeIndex storage_index;
   bool is_outer_class = (resolved_method->GetDeclaringClass() == outer_class.Get());
   if (is_outer_class) {
     storage_index = outer_class->GetDexTypeIndex();
@@ -1021,7 +1021,7 @@
 
   if (IsInitialized(resolved_method_class)) {
     *clinit_check_requirement = HInvokeStaticOrDirect::ClinitCheckRequirement::kNone;
-  } else if (storage_index != DexFile::kDexNoIndex) {
+  } else if (storage_index.IsValid()) {
     *clinit_check_requirement = HInvokeStaticOrDirect::ClinitCheckRequirement::kExplicit;
     HLoadClass* load_class = new (arena_) HLoadClass(
         graph_->GetCurrentMethod(),
@@ -1297,7 +1297,7 @@
   return GetClassFrom(compiler_driver_, *dex_compilation_unit_);
 }
 
-bool HInstructionBuilder::IsOutermostCompilingClass(uint16_t type_index) const {
+bool HInstructionBuilder::IsOutermostCompilingClass(dex::TypeIndex type_index) const {
   ScopedObjectAccess soa(Thread::Current());
   StackHandleScope<3> hs(soa.Self());
   Handle<mirror::DexCache> dex_cache = dex_compilation_unit_->GetDexCache();
@@ -1360,7 +1360,7 @@
   Handle<mirror::Class> outer_class(hs.NewHandle(GetOutermostCompilingClass()));
 
   // The index at which the field's class is stored in the DexCache's type array.
-  uint32_t storage_index;
+  dex::TypeIndex storage_index;
   bool is_outer_class = (outer_class.Get() == resolved_field->GetDeclaringClass());
   if (is_outer_class) {
     storage_index = outer_class->GetDexTypeIndex();
@@ -1497,7 +1497,7 @@
 }
 
 void HInstructionBuilder::BuildFilledNewArray(uint32_t dex_pc,
-                                              uint32_t type_index,
+                                              dex::TypeIndex type_index,
                                               uint32_t number_of_vreg_arguments,
                                               bool is_range,
                                               uint32_t* args,
@@ -1644,7 +1644,7 @@
 void HInstructionBuilder::BuildTypeCheck(const Instruction& instruction,
                                          uint8_t destination,
                                          uint8_t reference,
-                                         uint16_t type_index,
+                                         dex::TypeIndex type_index,
                                          uint32_t dex_pc) {
   ScopedObjectAccess soa(Thread::Current());
   StackHandleScope<1> hs(soa.Self());
@@ -1684,14 +1684,14 @@
   }
 }
 
-bool HInstructionBuilder::NeedsAccessCheck(uint32_t type_index,
+bool HInstructionBuilder::NeedsAccessCheck(dex::TypeIndex type_index,
                                            Handle<mirror::DexCache> dex_cache,
                                            bool* finalizable) const {
   return !compiler_driver_->CanAccessInstantiableTypeWithoutChecks(
       dex_compilation_unit_->GetDexMethodIndex(), dex_cache, type_index, finalizable);
 }
 
-bool HInstructionBuilder::NeedsAccessCheck(uint32_t type_index, bool* finalizable) const {
+bool HInstructionBuilder::NeedsAccessCheck(dex::TypeIndex type_index, bool* finalizable) const {
   ScopedObjectAccess soa(Thread::Current());
   Handle<mirror::DexCache> dex_cache = dex_compilation_unit_->GetDexCache();
   return NeedsAccessCheck(type_index, dex_cache, finalizable);
@@ -1723,7 +1723,10 @@
     if (dex_pc_in_map == dex_pc) {
       return value_in_map;
     } else {
-      skipped_interpreter_metadata_.Put(dex_pc_in_map, value_in_map);
+      // Overwrite and not Put, as quickened CHECK-CAST has two entries with
+      // the same dex_pc. This is OK, because the compiler does not care about those
+      // entries.
+      skipped_interpreter_metadata_.Overwrite(dex_pc_in_map, value_in_map);
     }
   }
 }
@@ -2446,7 +2449,7 @@
     }
 
     case Instruction::NEW_INSTANCE: {
-      if (!BuildNewInstance(instruction.VRegB_21c(), dex_pc)) {
+      if (!BuildNewInstance(dex::TypeIndex(instruction.VRegB_21c()), dex_pc)) {
         return false;
       }
       UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction());
@@ -2454,7 +2457,7 @@
     }
 
     case Instruction::NEW_ARRAY: {
-      uint16_t type_index = instruction.VRegC_22c();
+      dex::TypeIndex type_index(instruction.VRegC_22c());
       HInstruction* length = LoadLocal(instruction.VRegB_22c(), Primitive::kPrimInt);
       bool finalizable;
       QuickEntrypointEnum entrypoint = NeedsAccessCheck(type_index, &finalizable)
@@ -2472,7 +2475,7 @@
 
     case Instruction::FILLED_NEW_ARRAY: {
       uint32_t number_of_vreg_arguments = instruction.VRegA_35c();
-      uint32_t type_index = instruction.VRegB_35c();
+      dex::TypeIndex type_index(instruction.VRegB_35c());
       uint32_t args[5];
       instruction.GetVarArgs(args);
       BuildFilledNewArray(dex_pc, type_index, number_of_vreg_arguments, false, args, 0);
@@ -2481,7 +2484,7 @@
 
     case Instruction::FILLED_NEW_ARRAY_RANGE: {
       uint32_t number_of_vreg_arguments = instruction.VRegA_3rc();
-      uint32_t type_index = instruction.VRegB_3rc();
+      dex::TypeIndex type_index(instruction.VRegB_3rc());
       uint32_t register_index = instruction.VRegC_3rc();
       BuildFilledNewArray(
           dex_pc, type_index, number_of_vreg_arguments, true, nullptr, register_index);
@@ -2622,7 +2625,7 @@
     }
 
     case Instruction::CONST_STRING: {
-      uint32_t string_index = instruction.VRegB_21c();
+      dex::StringIndex string_index(instruction.VRegB_21c());
       AppendInstruction(
           new (arena_) HLoadString(graph_->GetCurrentMethod(), string_index, *dex_file_, dex_pc));
       UpdateLocal(instruction.VRegA_21c(), current_block_->GetLastInstruction());
@@ -2630,7 +2633,7 @@
     }
 
     case Instruction::CONST_STRING_JUMBO: {
-      uint32_t string_index = instruction.VRegB_31c();
+      dex::StringIndex string_index(instruction.VRegB_31c());
       AppendInstruction(
           new (arena_) HLoadString(graph_->GetCurrentMethod(), string_index, *dex_file_, dex_pc));
       UpdateLocal(instruction.VRegA_31c(), current_block_->GetLastInstruction());
@@ -2638,7 +2641,7 @@
     }
 
     case Instruction::CONST_CLASS: {
-      uint16_t type_index = instruction.VRegB_21c();
+      dex::TypeIndex type_index(instruction.VRegB_21c());
       // `CanAccessTypeWithoutChecks` will tell whether the method being
       // built is trying to access its own class, so that the generated
       // code can optimize for this case. However, the optimization does not
@@ -2679,14 +2682,14 @@
     case Instruction::INSTANCE_OF: {
       uint8_t destination = instruction.VRegA_22c();
       uint8_t reference = instruction.VRegB_22c();
-      uint16_t type_index = instruction.VRegC_22c();
+      dex::TypeIndex type_index(instruction.VRegC_22c());
       BuildTypeCheck(instruction, destination, reference, type_index, dex_pc);
       break;
     }
 
     case Instruction::CHECK_CAST: {
       uint8_t reference = instruction.VRegA_21c();
-      uint16_t type_index = instruction.VRegB_21c();
+      dex::TypeIndex type_index(instruction.VRegB_21c());
       BuildTypeCheck(instruction, -1, reference, type_index, dex_pc);
       break;
     }
diff --git a/compiler/optimizing/instruction_builder.h b/compiler/optimizing/instruction_builder.h
index aa34ddd..f29e522 100644
--- a/compiler/optimizing/instruction_builder.h
+++ b/compiler/optimizing/instruction_builder.h
@@ -20,6 +20,7 @@
 #include "base/arena_containers.h"
 #include "base/arena_object.h"
 #include "block_builder.h"
+#include "dex_file_types.h"
 #include "driver/compiler_driver.h"
 #include "driver/compiler_driver-inl.h"
 #include "driver/dex_compilation_unit.h"
@@ -100,11 +101,11 @@
 
   // Returns whether the current method needs access check for the type.
   // Output parameter finalizable is set to whether the type is finalizable.
-  bool NeedsAccessCheck(uint32_t type_index,
+  bool NeedsAccessCheck(dex::TypeIndex type_index,
                         Handle<mirror::DexCache> dex_cache,
                         /*out*/bool* finalizable) const
       REQUIRES_SHARED(Locks::mutator_lock_);
-  bool NeedsAccessCheck(uint32_t type_index, /*out*/bool* finalizable) const;
+  bool NeedsAccessCheck(dex::TypeIndex type_index, /*out*/bool* finalizable) const;
 
   template<typename T>
   void Unop_12x(const Instruction& instruction, Primitive::Type type, uint32_t dex_pc);
@@ -176,7 +177,7 @@
 
   // Builds a new array node and the instructions that fill it.
   void BuildFilledNewArray(uint32_t dex_pc,
-                           uint32_t type_index,
+                           dex::TypeIndex type_index,
                            uint32_t number_of_vreg_arguments,
                            bool is_range,
                            uint32_t* args,
@@ -205,7 +206,7 @@
   void BuildTypeCheck(const Instruction& instruction,
                       uint8_t destination,
                       uint8_t reference,
-                      uint16_t type_index,
+                      dex::TypeIndex type_index,
                       uint32_t dex_pc);
 
   // Builds an instruction sequence for a switch statement.
@@ -218,7 +219,7 @@
   mirror::Class* GetCompilingClass() const;
 
   // Returns whether `type_index` points to the outer-most compiling method's class.
-  bool IsOutermostCompilingClass(uint16_t type_index) const;
+  bool IsOutermostCompilingClass(dex::TypeIndex type_index) const;
 
   void PotentiallySimplifyFakeString(uint16_t original_dex_register,
                                      uint32_t dex_pc,
@@ -258,7 +259,7 @@
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Build a HNewInstance instruction.
-  bool BuildNewInstance(uint16_t type_index, uint32_t dex_pc);
+  bool BuildNewInstance(dex::TypeIndex type_index, uint32_t dex_pc);
 
   // Return whether the compiler can assume `cls` is initialized.
   bool IsInitialized(Handle<mirror::Class> cls) const
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index e4d280f..658b804 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -16,6 +16,7 @@
 
 #include "instruction_simplifier.h"
 
+#include "escape.h"
 #include "intrinsics.h"
 #include "mirror/class-inl.h"
 #include "scoped_thread_state_change-inl.h"
@@ -106,14 +107,19 @@
   void SimplifyFP2Int(HInvoke* invoke);
   void SimplifyStringCharAt(HInvoke* invoke);
   void SimplifyStringIsEmptyOrLength(HInvoke* invoke);
+  void SimplifyNPEOnArgN(HInvoke* invoke, size_t);
+  void SimplifyReturnThis(HInvoke* invoke);
+  void SimplifyAllocationIntrinsic(HInvoke* invoke);
   void SimplifyMemBarrier(HInvoke* invoke, MemBarrierKind barrier_kind);
 
   OptimizingCompilerStats* stats_;
   bool simplification_occurred_ = false;
   int simplifications_at_current_position_ = 0;
-  // We ensure we do not loop infinitely. The value is a finger in the air guess
-  // that should allow enough simplification.
-  static constexpr int kMaxSamePositionSimplifications = 10;
+  // We ensure we do not loop infinitely. The value should not be too high, since that
+  // would allow looping around the same basic block too many times. The value should
+  // not be too low either, however, since we want to allow revisiting a basic block
+  // with many statements and simplifications at least once.
+  static constexpr int kMaxSamePositionSimplifications = 50;
 };
 
 void InstructionSimplifier::Run() {
@@ -605,11 +611,23 @@
   return nullptr;
 }
 
+static bool CmpHasBoolType(HInstruction* input, HInstruction* cmp) {
+  if (input->GetType() == Primitive::kPrimBoolean) {
+    return true;  // input has direct boolean type
+  } else if (cmp->GetUses().HasExactlyOneElement()) {
+    // Comparison also has boolean type if both its input and the instruction
+    // itself feed into the same phi node.
+    HInstruction* user = cmp->GetUses().front().GetUser();
+    return user->IsPhi() && user->HasInput(input) && user->HasInput(cmp);
+  }
+  return false;
+}
+
 void InstructionSimplifierVisitor::VisitEqual(HEqual* equal) {
   HInstruction* input_const = equal->GetConstantRight();
   if (input_const != nullptr) {
     HInstruction* input_value = equal->GetLeastConstantLeft();
-    if (input_value->GetType() == Primitive::kPrimBoolean && input_const->IsIntConstant()) {
+    if (CmpHasBoolType(input_value, equal) && input_const->IsIntConstant()) {
       HBasicBlock* block = equal->GetBlock();
       // We are comparing the boolean to a constant which is of type int and can
       // be any constant.
@@ -619,6 +637,7 @@
         block->RemoveInstruction(equal);
         RecordSimplification();
       } else if (input_const->AsIntConstant()->IsFalse()) {
+        // Replace (bool_value == false) with !bool_value
         equal->ReplaceWith(GetGraph()->InsertOppositeCondition(input_value, equal));
         block->RemoveInstruction(equal);
         RecordSimplification();
@@ -640,11 +659,12 @@
   HInstruction* input_const = not_equal->GetConstantRight();
   if (input_const != nullptr) {
     HInstruction* input_value = not_equal->GetLeastConstantLeft();
-    if (input_value->GetType() == Primitive::kPrimBoolean && input_const->IsIntConstant()) {
+    if (CmpHasBoolType(input_value, not_equal) && input_const->IsIntConstant()) {
       HBasicBlock* block = not_equal->GetBlock();
       // We are comparing the boolean to a constant which is of type int and can
       // be any constant.
       if (input_const->AsIntConstant()->IsTrue()) {
+        // Replace (bool_value != true) with !bool_value
         not_equal->ReplaceWith(GetGraph()->InsertOppositeCondition(input_value, not_equal));
         block->RemoveInstruction(not_equal);
         RecordSimplification();
@@ -1842,6 +1862,66 @@
   invoke->GetBlock()->ReplaceAndRemoveInstructionWith(invoke, replacement);
 }
 
+// This method should only be used on intrinsics whose sole way of throwing an
+// exception is raising a NPE when the nth argument is null. If that argument
+// is provably non-null, we can clear the flag.
+void InstructionSimplifierVisitor::SimplifyNPEOnArgN(HInvoke* invoke, size_t n) {
+  HInstruction* arg = invoke->InputAt(n);
+  if (invoke->CanThrow() && !arg->CanBeNull()) {
+    invoke->SetCanThrow(false);
+  }
+}
+
+// Methods that return "this" can replace the returned value with the receiver.
+void InstructionSimplifierVisitor::SimplifyReturnThis(HInvoke* invoke) {
+  if (invoke->HasUses()) {
+    HInstruction* receiver = invoke->InputAt(0);
+    invoke->ReplaceWith(receiver);
+    RecordSimplification();
+  }
+}
+
+// Helper method for StringBuffer escape analysis.
+static bool NoEscapeForStringBufferReference(HInstruction* reference, HInstruction* user) {
+  if (user->IsInvokeStaticOrDirect()) {
+    // Any constructor on StringBuffer is okay.
+    return user->AsInvokeStaticOrDirect()->GetResolvedMethod()->IsConstructor() &&
+           user->InputAt(0) == reference;
+  } else if (user->IsInvokeVirtual()) {
+    switch (user->AsInvokeVirtual()->GetIntrinsic()) {
+      case Intrinsics::kStringBufferLength:
+      case Intrinsics::kStringBufferToString:
+        DCHECK_EQ(user->InputAt(0), reference);
+        return true;
+      case Intrinsics::kStringBufferAppend:
+        // Returns "this", so only okay if no further uses.
+        DCHECK_EQ(user->InputAt(0), reference);
+        DCHECK_NE(user->InputAt(1), reference);
+        return !user->HasUses();
+      default:
+        break;
+    }
+  }
+  return false;
+}
+
+// Certain allocation intrinsics are not removed by dead code elimination
+// because of potentially throwing an OOM exception or other side effects.
+// This method removes such intrinsics when special circumstances allow.
+void InstructionSimplifierVisitor::SimplifyAllocationIntrinsic(HInvoke* invoke) {
+  if (!invoke->HasUses()) {
+    // Instruction has no uses. If unsynchronized, we can remove right away, safely ignoring
+    // the potential OOM of course. Otherwise, we must ensure the receiver object of this
+    // call does not escape since only thread-local synchronization may be removed.
+    bool is_synchronized = invoke->GetIntrinsic() == Intrinsics::kStringBufferToString;
+    HInstruction* receiver = invoke->InputAt(0);
+    if (!is_synchronized || DoesNotEscape(receiver, NoEscapeForStringBufferReference)) {
+      invoke->GetBlock()->RemoveInstruction(invoke);
+      RecordSimplification();
+    }
+  }
+}
+
 void InstructionSimplifierVisitor::SimplifyMemBarrier(HInvoke* invoke, MemBarrierKind barrier_kind) {
   uint32_t dex_pc = invoke->GetDexPc();
   HMemoryBarrier* mem_barrier = new (GetGraph()->GetArena()) HMemoryBarrier(barrier_kind, dex_pc);
@@ -1895,6 +1975,18 @@
     case Intrinsics::kStringLength:
       SimplifyStringIsEmptyOrLength(instruction);
       break;
+    case Intrinsics::kStringStringIndexOf:
+    case Intrinsics::kStringStringIndexOfAfter:
+      SimplifyNPEOnArgN(instruction, 1);  // 0th has own NullCheck
+      break;
+    case Intrinsics::kStringBufferAppend:
+    case Intrinsics::kStringBuilderAppend:
+      SimplifyReturnThis(instruction);
+      break;
+    case Intrinsics::kStringBufferToString:
+    case Intrinsics::kStringBuilderToString:
+      SimplifyAllocationIntrinsic(instruction);
+      break;
     case Intrinsics::kUnsafeLoadFence:
       SimplifyMemBarrier(instruction, MemBarrierKind::kLoadAny);
       break;
diff --git a/compiler/optimizing/instruction_simplifier_arm.h b/compiler/optimizing/instruction_simplifier_arm.h
index 782110c..9b54511 100644
--- a/compiler/optimizing/instruction_simplifier_arm.h
+++ b/compiler/optimizing/instruction_simplifier_arm.h
@@ -48,7 +48,7 @@
 class InstructionSimplifierArm : public HOptimization {
  public:
   InstructionSimplifierArm(HGraph* graph, OptimizingCompilerStats* stats)
-    : HOptimization(graph, kInstructionSimplifierArmPassName, stats) {}
+      : HOptimization(graph, kInstructionSimplifierArmPassName, stats) {}
 
   static constexpr const char* kInstructionSimplifierArmPassName = "instruction_simplifier_arm";
 
diff --git a/compiler/optimizing/instruction_simplifier_arm64.cc b/compiler/optimizing/instruction_simplifier_arm64.cc
index d0dd650..6d107d5 100644
--- a/compiler/optimizing/instruction_simplifier_arm64.cc
+++ b/compiler/optimizing/instruction_simplifier_arm64.cc
@@ -140,13 +140,6 @@
 
 void InstructionSimplifierArm64Visitor::VisitArrayGet(HArrayGet* instruction) {
   size_t data_offset = CodeGenerator::GetArrayDataOffset(instruction);
-  // Don't move the array pointer if it is charAt because we need to take the count first.
-  // TODO: Implement reading (length + compression) for String compression feature from
-  // negative offset (count_offset - data_offset) using LDP and clobbering an extra temporary.
-  // Note that "LDR (Immediate)" does not have a "signed offset" encoding.
-  if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
-    return;
-  }
   if (TryExtractArrayAccessAddress(instruction,
                                    instruction->GetArray(),
                                    instruction->GetIndex(),
diff --git a/compiler/optimizing/instruction_simplifier_arm64.h b/compiler/optimizing/instruction_simplifier_arm64.h
index f71684e..d4cb1f1 100644
--- a/compiler/optimizing/instruction_simplifier_arm64.h
+++ b/compiler/optimizing/instruction_simplifier_arm64.h
@@ -82,9 +82,10 @@
 class InstructionSimplifierArm64 : public HOptimization {
  public:
   InstructionSimplifierArm64(HGraph* graph, OptimizingCompilerStats* stats)
-    : HOptimization(graph, kInstructionSimplifierArm64PassName, stats) {}
-  static constexpr const char* kInstructionSimplifierArm64PassName
-      = "instruction_simplifier_arm64";
+      : HOptimization(graph, kInstructionSimplifierArm64PassName, stats) {}
+
+  static constexpr const char* kInstructionSimplifierArm64PassName = "instruction_simplifier_arm64";
+
   void Run() OVERRIDE {
     InstructionSimplifierArm64Visitor visitor(graph_, stats_);
     visitor.VisitReversePostOrder();
diff --git a/compiler/optimizing/instruction_simplifier_shared.cc b/compiler/optimizing/instruction_simplifier_shared.cc
index 04e063c..c2b1374 100644
--- a/compiler/optimizing/instruction_simplifier_shared.cc
+++ b/compiler/optimizing/instruction_simplifier_shared.cc
@@ -231,15 +231,6 @@
                                   HInstruction* array,
                                   HInstruction* index,
                                   size_t data_offset) {
-  if (kEmitCompilerReadBarrier) {
-    // The read barrier instrumentation does not support the
-    // HIntermediateAddress instruction yet.
-    //
-    // TODO: Handle this case properly in the ARM64 and ARM code generator and
-    // re-enable this optimization; otherwise, remove this TODO.
-    // b/26601270
-    return false;
-  }
   if (index->IsConstant() ||
       (index->IsBoundsCheck() && index->AsBoundsCheck()->GetIndex()->IsConstant())) {
     // When the index is a constant all the addressing can be fitted in the
@@ -251,14 +242,20 @@
     // The access may require a runtime call or the original array pointer.
     return false;
   }
+  if (kEmitCompilerReadBarrier &&
+      access->IsArrayGet() &&
+      access->GetType() == Primitive::kPrimNot) {
+    // For object arrays, the read barrier instrumentation requires
+    // the original array pointer.
+    return false;
+  }
 
   // Proceed to extract the base address computation.
   HGraph* graph = access->GetBlock()->GetGraph();
   ArenaAllocator* arena = graph->GetArena();
 
   HIntConstant* offset = graph->GetIntConstant(data_offset);
-  HIntermediateAddress* address =
-      new (arena) HIntermediateAddress(array, offset, kNoDexPc);
+  HIntermediateAddress* address = new (arena) HIntermediateAddress(array, offset, kNoDexPc);
   // TODO: Is it ok to not have this on the intermediate address?
   // address->SetReferenceTypeInfo(array->GetReferenceTypeInfo());
   access->GetBlock()->InsertInstructionBefore(address, access);
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
index 8790c1e..8f64fae 100644
--- a/compiler/optimizing/intrinsics_arm.cc
+++ b/compiler/optimizing/intrinsics_arm.cc
@@ -1058,7 +1058,6 @@
   // Need temporary registers for String compression's feature.
   if (mirror::kUseStringCompression) {
     locations->AddTemp(Location::RequiresRegister());
-    locations->AddTemp(Location::RequiresRegister());
   }
   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
 }
@@ -1074,10 +1073,9 @@
   Register temp0 = locations->GetTemp(0).AsRegister<Register>();
   Register temp1 = locations->GetTemp(1).AsRegister<Register>();
   Register temp2 = locations->GetTemp(2).AsRegister<Register>();
-  Register temp3, temp4;
+  Register temp3;
   if (mirror::kUseStringCompression) {
     temp3 = locations->GetTemp(3).AsRegister<Register>();
-    temp4 = locations->GetTemp(4).AsRegister<Register>();
   }
 
   Label loop;
@@ -1104,41 +1102,42 @@
   // Reference equality check, return 0 if same reference.
   __ subs(out, str, ShifterOperand(arg));
   __ b(&end, EQ);
+
   if (mirror::kUseStringCompression) {
-    // Load lengths of this and argument strings.
+    // Load `count` fields of this and argument strings.
     __ ldr(temp3, Address(str, count_offset));
-    __ ldr(temp4, Address(arg, count_offset));
-    // Clean out compression flag from lengths.
-    __ bic(temp0, temp3, ShifterOperand(0x80000000));
-    __ bic(IP, temp4, ShifterOperand(0x80000000));
+    __ ldr(temp2, Address(arg, count_offset));
+    // Extract lengths from the `count` fields.
+    __ Lsr(temp0, temp3, 1u);
+    __ Lsr(temp1, temp2, 1u);
   } else {
     // Load lengths of this and argument strings.
     __ ldr(temp0, Address(str, count_offset));
-    __ ldr(IP, Address(arg, count_offset));
+    __ ldr(temp1, Address(arg, count_offset));
   }
   // out = length diff.
-  __ subs(out, temp0, ShifterOperand(IP));
+  __ subs(out, temp0, ShifterOperand(temp1));
   // temp0 = min(len(str), len(arg)).
   __ it(GT);
-  __ mov(temp0, ShifterOperand(IP), GT);
+  __ mov(temp0, ShifterOperand(temp1), GT);
   // Shorter string is empty?
   __ CompareAndBranchIfZero(temp0, &end);
 
   if (mirror::kUseStringCompression) {
     // Check if both strings using same compression style to use this comparison loop.
-    __ eors(temp3, temp3, ShifterOperand(temp4));
-    __ b(&different_compression, MI);
-  }
-  // Store offset of string value in preparation for comparison loop.
-  __ mov(temp1, ShifterOperand(value_offset));
-  if (mirror::kUseStringCompression) {
+    __ eor(temp2, temp2, ShifterOperand(temp3));
+    __ Lsrs(temp2, temp2, 1u);
+    __ b(&different_compression, CS);
     // For string compression, calculate the number of bytes to compare (not chars).
     // This could in theory exceed INT32_MAX, so treat temp0 as unsigned.
-    __ cmp(temp4, ShifterOperand(0));
-    __ it(GE);
-    __ add(temp0, temp0, ShifterOperand(temp0), GE);
+    __ Lsls(temp3, temp3, 31u);  // Extract purely the compression flag.
+    __ it(NE);
+    __ add(temp0, temp0, ShifterOperand(temp0), NE);
   }
 
+  // Store offset of string value in preparation for comparison loop.
+  __ mov(temp1, ShifterOperand(value_offset));
+
   // Assertions that must hold in order to compare multiple characters at a time.
   CHECK_ALIGNED(value_offset, 8);
   static_assert(IsAligned<8>(kObjectAlignment),
@@ -1198,69 +1197,80 @@
   // The comparison is unsigned for string compression, otherwise signed.
   __ cmp(temp0, ShifterOperand(temp1, LSR, mirror::kUseStringCompression ? 3 : 4));
   __ b(&end, mirror::kUseStringCompression ? LS : LE);
+
   // Extract the characters and calculate the difference.
-  Label uncompressed_string, continue_process;
   if (mirror::kUseStringCompression) {
-    __ cmp(temp4, ShifterOperand(0));
-    __ b(&uncompressed_string, GE);
-    __ bic(temp1, temp1, ShifterOperand(0x7));
-    __ b(&continue_process);
+    // For compressed strings we need to clear 0x7 from temp1, for uncompressed we need to clear
+    // 0xf. We also need to prepare the character extraction mask `uncompressed ? 0xffffu : 0xffu`.
+    // The compression flag is now in the highest bit of temp3, so let's play some tricks.
+    __ orr(temp3, temp3, ShifterOperand(0xffu << 23));  // uncompressed ? 0xff800000u : 0x7ff80000u
+    __ bic(temp1, temp1, ShifterOperand(temp3, LSR, 31 - 3));  // &= ~(uncompressed ? 0xfu : 0x7u)
+    __ Asr(temp3, temp3, 7u);                           // uncompressed ? 0xffff0000u : 0xff0000u.
+    __ Lsr(temp2, temp2, temp1);                        // Extract second character.
+    __ Lsr(temp3, temp3, 16u);                          // uncompressed ? 0xffffu : 0xffu
+    __ Lsr(out, IP, temp1);                             // Extract first character.
+    __ and_(temp2, temp2, ShifterOperand(temp3));
+    __ and_(out, out, ShifterOperand(temp3));
+  } else {
+    __ bic(temp1, temp1, ShifterOperand(0xf));
+    __ Lsr(temp2, temp2, temp1);
+    __ Lsr(out, IP, temp1);
+    __ movt(temp2, 0);
+    __ movt(out, 0);
   }
-  __ Bind(&uncompressed_string);
-  __ bic(temp1, temp1, ShifterOperand(0xf));
-  __ Bind(&continue_process);
 
-  __ Lsr(temp2, temp2, temp1);
-  __ Lsr(IP, IP, temp1);
-  Label calculate_difference, uncompressed_string_extract_chars;
-  if (mirror::kUseStringCompression) {
-    __ cmp(temp4, ShifterOperand(0));
-    __ b(&uncompressed_string_extract_chars, GE);
-    __ ubfx(temp2, temp2, 0, 8);
-    __ ubfx(IP, IP, 0, 8);
-    __ b(&calculate_difference);
-  }
-  __ Bind(&uncompressed_string_extract_chars);
-  __ movt(temp2, 0);
-  __ movt(IP, 0);
-  __ Bind(&calculate_difference);
-  __ sub(out, IP, ShifterOperand(temp2));
-  __ b(&end);
+  __ sub(out, out, ShifterOperand(temp2));
 
   if (mirror::kUseStringCompression) {
+    __ b(&end);
+    __ Bind(&different_compression);
+
+    // Comparison for different compression style.
     const size_t c_char_size = Primitive::ComponentSize(Primitive::kPrimByte);
     DCHECK_EQ(c_char_size, 1u);
-    Label loop_arg_compressed, loop_this_compressed, find_diff;
-    // Comparison for different compression style.
-    // This part is when THIS is compressed and ARG is not.
-    __ Bind(&different_compression);
-    __ add(temp2, str, ShifterOperand(value_offset));
-    __ add(temp3, arg, ShifterOperand(value_offset));
-    __ cmp(temp4, ShifterOperand(0));
-    __ b(&loop_arg_compressed, LT);
 
-    __ Bind(&loop_this_compressed);
-    __ ldrb(IP, Address(temp2, c_char_size, Address::PostIndex));
-    __ ldrh(temp4, Address(temp3, char_size, Address::PostIndex));
-    __ cmp(IP, ShifterOperand(temp4));
-    __ b(&find_diff, NE);
-    __ subs(temp0, temp0, ShifterOperand(1));
-    __ b(&loop_this_compressed, GT);
-    __ b(&end);
+    // We want to free up the temp3, currently holding `str.count`, for comparison.
+    // So, we move it to the bottom bit of the iteration count `temp0` which we tnen
+    // need to treat as unsigned. Start by freeing the bit with an ADD and continue
+    // further down by a LSRS+SBC which will flip the meaning of the flag but allow
+    // `subs temp0, #2; bhi different_compression_loop` to serve as the loop condition.
+    __ add(temp0, temp0, ShifterOperand(temp0));  // Unlike LSL, this ADD is always 16-bit.
+    // `temp1` will hold the compressed data pointer, `temp2` the uncompressed data pointer.
+    __ mov(temp1, ShifterOperand(str));
+    __ mov(temp2, ShifterOperand(arg));
+    __ Lsrs(temp3, temp3, 1u);                // Continue the move of the compression flag.
+    __ it(CS, kItThen);                       // Interleave with selection of temp1 and temp2.
+    __ mov(temp1, ShifterOperand(arg), CS);   // Preserves flags.
+    __ mov(temp2, ShifterOperand(str), CS);   // Preserves flags.
+    __ sbc(temp0, temp0, ShifterOperand(0));  // Complete the move of the compression flag.
 
-    // This part is when THIS is not compressed and ARG is.
-    __ Bind(&loop_arg_compressed);
-    __ ldrh(IP, Address(temp2, char_size, Address::PostIndex));
-    __ ldrb(temp4, Address(temp3, c_char_size, Address::PostIndex));
-    __ cmp(IP, ShifterOperand(temp4));
-    __ b(&find_diff, NE);
-    __ subs(temp0, temp0, ShifterOperand(1));
-    __ b(&loop_arg_compressed, GT);
+    // Adjust temp1 and temp2 from string pointers to data pointers.
+    __ add(temp1, temp1, ShifterOperand(value_offset));
+    __ add(temp2, temp2, ShifterOperand(value_offset));
+
+    Label different_compression_loop;
+    Label different_compression_diff;
+
+    // Main loop for different compression.
+    __ Bind(&different_compression_loop);
+    __ ldrb(IP, Address(temp1, c_char_size, Address::PostIndex));
+    __ ldrh(temp3, Address(temp2, char_size, Address::PostIndex));
+    __ cmp(IP, ShifterOperand(temp3));
+    __ b(&different_compression_diff, NE);
+    __ subs(temp0, temp0, ShifterOperand(2));
+    __ b(&different_compression_loop, HI);
     __ b(&end);
 
     // Calculate the difference.
-    __ Bind(&find_diff);
-    __ sub(out, IP, ShifterOperand(temp4));
+    __ Bind(&different_compression_diff);
+    __ sub(out, IP, ShifterOperand(temp3));
+    // Flip the difference if the `arg` is compressed.
+    // `temp0` contains inverted `str` compression flag, i.e the same as `arg` compression flag.
+    __ Lsrs(temp0, temp0, 1u);
+    static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
+                  "Expecting 0=compressed, 1=uncompressed");
+    __ it(CC);
+    __ rsb(out, out, ShifterOperand(0), CC);
   }
 
   __ Bind(&end);
@@ -1298,7 +1308,7 @@
   Register temp1 = locations->GetTemp(1).AsRegister<Register>();
   Register temp2 = locations->GetTemp(2).AsRegister<Register>();
 
-  Label loop, preloop;
+  Label loop;
   Label end;
   Label return_true;
   Label return_false;
@@ -1317,6 +1327,10 @@
     __ CompareAndBranchIfZero(arg, &return_false);
   }
 
+  // Reference equality check, return true if same reference.
+  __ cmp(str, ShifterOperand(arg));
+  __ b(&return_true, EQ);
+
   if (!optimizations.GetArgumentIsString()) {
     // Instanceof check for the argument by comparing class fields.
     // All string objects must have the same type since String cannot be subclassed.
@@ -1328,48 +1342,44 @@
     __ b(&return_false, NE);
   }
 
-  // Load lengths of this and argument strings.
+  // Load `count` fields of this and argument strings.
   __ ldr(temp, Address(str, count_offset));
   __ ldr(temp1, Address(arg, count_offset));
-  // Check if lengths are equal, return false if they're not.
+  // Check if `count` fields are equal, return false if they're not.
   // Also compares the compression style, if differs return false.
   __ cmp(temp, ShifterOperand(temp1));
   __ b(&return_false, NE);
-  // Return true if both strings are empty.
-  if (mirror::kUseStringCompression) {
-    // Length needs to be masked out first because 0 is treated as compressed.
-    __ bic(temp, temp, ShifterOperand(0x80000000));
-  }
+  // Return true if both strings are empty. Even with string compression `count == 0` means empty.
+  static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
+                "Expecting 0=compressed, 1=uncompressed");
   __ cbz(temp, &return_true);
-  // Reference equality check, return true if same reference.
-  __ cmp(str, ShifterOperand(arg));
-  __ b(&return_true, EQ);
 
-  // Assertions that must hold in order to compare strings 2 characters at a time.
+  // Assertions that must hold in order to compare strings 4 bytes at a time.
   DCHECK_ALIGNED(value_offset, 4);
   static_assert(IsAligned<4>(kObjectAlignment), "String data must be aligned for fast compare.");
 
   if (mirror::kUseStringCompression) {
-    // If not compressed, directly to fast compare. Else do preprocess on length.
-    __ cmp(temp1, ShifterOperand(0));
-    __ b(&preloop, GT);
-    // Mask out compression flag and adjust length for compressed string (8-bit)
-    // as if it is a 16-bit data, new_length = (length + 1) / 2.
-    __ add(temp, temp, ShifterOperand(1));
-    __ Lsr(temp, temp, 1);
-    __ Bind(&preloop);
+    // For string compression, calculate the number of bytes to compare (not chars).
+    // This could in theory exceed INT32_MAX, so treat temp as unsigned.
+    __ Lsrs(temp, temp, 1u);                        // Extract length and check compression flag.
+    __ it(CS);                                      // If uncompressed,
+    __ add(temp, temp, ShifterOperand(temp), CS);   //   double the byte count.
   }
-  // Loop to compare strings 2 characters at a time starting at the front of the string.
-  // Ok to do this because strings with an odd length are zero-padded.
+
+  // Store offset of string value in preparation for comparison loop.
   __ LoadImmediate(temp1, value_offset);
+
+  // Loop to compare strings 4 bytes at a time starting at the front of the string.
+  // Ok to do this because strings are zero-padded to kObjectAlignment.
   __ Bind(&loop);
   __ ldr(out, Address(str, temp1));
   __ ldr(temp2, Address(arg, temp1));
+  __ add(temp1, temp1, ShifterOperand(sizeof(uint32_t)));
   __ cmp(out, ShifterOperand(temp2));
   __ b(&return_false, NE);
-  __ add(temp1, temp1, ShifterOperand(sizeof(uint32_t)));
-  __ subs(temp, temp, ShifterOperand(sizeof(uint32_t) /  sizeof(uint16_t)));
-  __ b(&loop, GT);
+  // With string compression, we have compared 4 bytes, otherwise 2 chars.
+  __ subs(temp, temp, ShifterOperand(mirror::kUseStringCompression ? 4 : 2));
+  __ b(&loop, HI);
 
   // Return true and exit the function.
   // If loop does not result in returning false, we return true.
@@ -1945,7 +1955,7 @@
     //   if (src_ptr != end_ptr) {
     //     uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
     //     lfence;  // Load fence or artificial data dependency to prevent load-load reordering
-    //     bool is_gray = (rb_state == ReadBarrier::gray_ptr_);
+    //     bool is_gray = (rb_state == ReadBarrier::GrayState());
     //     if (is_gray) {
     //       // Slow-path copy.
     //       do {
@@ -1986,9 +1996,8 @@
     // Given the numeric representation, it's enough to check the low bit of the
     // rb_state. We do that by shifting the bit out of the lock word with LSRS
     // which can be a 16-bit instruction unlike the TST immediate.
-    static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0");
-    static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1");
-    static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2");
+    static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
+    static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
     __ Lsrs(temp2, temp2, LockWord::kReadBarrierStateShift + 1);
     // Carry flag is the last bit shifted out by LSRS.
     __ b(read_barrier_slow_path->GetEntryLabel(), CS);
@@ -2478,8 +2487,8 @@
     const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
     // String's length.
     __ ldr(IP, Address(srcObj, count_offset));
-    __ cmp(IP, ShifterOperand(0));
-    __ b(&compressed_string_preloop, LT);
+    __ tst(IP, ShifterOperand(1));
+    __ b(&compressed_string_preloop, EQ);
   }
   __ add(src_ptr, src_ptr, ShifterOperand(srcBegin, LSL, 1));
 
@@ -2514,9 +2523,10 @@
   __ subs(num_chr, num_chr, ShifterOperand(1));
   __ strh(IP, Address(dst_ptr, char_size, Address::PostIndex));
   __ b(&remainder, GT);
-  __ b(&done);
 
   if (mirror::kUseStringCompression) {
+    __ b(&done);
+
     const size_t c_char_size = Primitive::ComponentSize(Primitive::kPrimByte);
     DCHECK_EQ(c_char_size, 1u);
     // Copy loop for compressed src, copying 1 character (8-bit) to (16-bit) at a time.
@@ -2601,6 +2611,15 @@
 UNIMPLEMENTED_INTRINSIC(ARM, IntegerLowestOneBit)
 UNIMPLEMENTED_INTRINSIC(ARM, LongLowestOneBit)
 
+UNIMPLEMENTED_INTRINSIC(ARM, StringStringIndexOf);
+UNIMPLEMENTED_INTRINSIC(ARM, StringStringIndexOfAfter);
+UNIMPLEMENTED_INTRINSIC(ARM, StringBufferAppend);
+UNIMPLEMENTED_INTRINSIC(ARM, StringBufferLength);
+UNIMPLEMENTED_INTRINSIC(ARM, StringBufferToString);
+UNIMPLEMENTED_INTRINSIC(ARM, StringBuilderAppend);
+UNIMPLEMENTED_INTRINSIC(ARM, StringBuilderLength);
+UNIMPLEMENTED_INTRINSIC(ARM, StringBuilderToString);
+
 // 1.8.
 UNIMPLEMENTED_INTRINSIC(ARM, UnsafeGetAndAddInt)
 UNIMPLEMENTED_INTRINSIC(ARM, UnsafeGetAndAddLong)
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index db1c022..d8a896e 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -1243,7 +1243,6 @@
   // Need temporary registers for String compression's feature.
   if (mirror::kUseStringCompression) {
     locations->AddTemp(Location::RequiresRegister());
-    locations->AddTemp(Location::RequiresRegister());
   }
   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
 }
@@ -1261,10 +1260,9 @@
   Register temp0 = WRegisterFrom(locations->GetTemp(0));
   Register temp1 = WRegisterFrom(locations->GetTemp(1));
   Register temp2 = WRegisterFrom(locations->GetTemp(2));
-  Register temp3, temp5;
+  Register temp3;
   if (mirror::kUseStringCompression) {
     temp3 = WRegisterFrom(locations->GetTemp(3));
-    temp5 = WRegisterFrom(locations->GetTemp(4));
   }
 
   vixl::aarch64::Label loop;
@@ -1291,68 +1289,65 @@
   // Reference equality check, return 0 if same reference.
   __ Subs(out, str, arg);
   __ B(&end, eq);
+
   if (mirror::kUseStringCompression) {
-    // Load lengths of this and argument strings.
+    // Load `count` fields of this and argument strings.
     __ Ldr(temp3, HeapOperand(str, count_offset));
-    __ Ldr(temp5, HeapOperand(arg, count_offset));
+    __ Ldr(temp2, HeapOperand(arg, count_offset));
     // Clean out compression flag from lengths.
-    __ Bic(temp0, temp3, Operand(static_cast<int32_t>(0x80000000)));
-    __ Bic(temp1, temp5, Operand(static_cast<int32_t>(0x80000000)));
+    __ Lsr(temp0, temp3, 1u);
+    __ Lsr(temp1, temp2, 1u);
   } else {
     // Load lengths of this and argument strings.
     __ Ldr(temp0, HeapOperand(str, count_offset));
     __ Ldr(temp1, HeapOperand(arg, count_offset));
   }
-  // Return zero if both strings are empty.
-  __ Orr(out, temp0, temp1);
-  __ Cbz(out, &end);
   // out = length diff.
   __ Subs(out, temp0, temp1);
-  // temp2 = min(len(str), len(arg)).
-  __ Csel(temp2, temp1, temp0, ge);
+  // temp0 = min(len(str), len(arg)).
+  __ Csel(temp0, temp1, temp0, ge);
   // Shorter string is empty?
-  __ Cbz(temp2, &end);
+  __ Cbz(temp0, &end);
 
   if (mirror::kUseStringCompression) {
     // Check if both strings using same compression style to use this comparison loop.
-    __ Eor(temp3.W(), temp3, Operand(temp5));
-    __ Tbnz(temp3.W(), kWRegSize - 1, &different_compression);
+    __ Eor(temp2, temp2, Operand(temp3));
+    // Interleave with compression flag extraction which is needed for both paths
+    // and also set flags which is needed only for the different compressions path.
+    __ Ands(temp3.W(), temp3.W(), Operand(1));
+    __ Tbnz(temp2, 0, &different_compression);  // Does not use flags.
   }
   // Store offset of string value in preparation for comparison loop.
   __ Mov(temp1, value_offset);
   if (mirror::kUseStringCompression) {
     // For string compression, calculate the number of bytes to compare (not chars).
-    // This could be in theory exceed INT32_MAX, so treat temp2 as unsigned.
-    vixl::aarch64::Label let_it_signed;
-    __ Cmp(temp5, Operand(0));
-    __ B(lt, &let_it_signed);
-    __ Add(temp2, temp2, Operand(temp2));
-    __ Bind(&let_it_signed);
+    // This could in theory exceed INT32_MAX, so treat temp0 as unsigned.
+    __ Lsl(temp0, temp0, temp3);
   }
 
   UseScratchRegisterScope scratch_scope(masm);
   Register temp4 = scratch_scope.AcquireX();
 
-  // Assertions that must hold in order to compare strings 4 characters at a time.
+  // Assertions that must hold in order to compare strings 8 bytes at a time.
   DCHECK_ALIGNED(value_offset, 8);
   static_assert(IsAligned<8>(kObjectAlignment), "String of odd length is not zero padded");
 
   const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar);
   DCHECK_EQ(char_size, 2u);
 
-  // Promote temp0 to an X reg, ready for LDR.
-  temp0 = temp0.X();
+  // Promote temp2 to an X reg, ready for LDR.
+  temp2 = temp2.X();
 
   // Loop to compare 4x16-bit characters at a time (ok because of string data alignment).
   __ Bind(&loop);
   __ Ldr(temp4, MemOperand(str.X(), temp1.X()));
-  __ Ldr(temp0, MemOperand(arg.X(), temp1.X()));
-  __ Cmp(temp4, temp0);
+  __ Ldr(temp2, MemOperand(arg.X(), temp1.X()));
+  __ Cmp(temp4, temp2);
   __ B(ne, &find_char_diff);
   __ Add(temp1, temp1, char_size * 4);
   // With string compression, we have compared 8 bytes, otherwise 4 chars.
-  __ Subs(temp2, temp2, (mirror::kUseStringCompression) ? 8 : 4);
-  __ B(hi, &loop);
+  __ Subs(temp0, temp0, (mirror::kUseStringCompression) ? 8 : 4);
+  __ B(&loop, hi);
   __ B(&end);
 
   // Promote temp1 to an X reg, ready for EOR.
@@ -1361,78 +1356,85 @@
   // Find the single character difference.
   __ Bind(&find_char_diff);
   // Get the bit position of the first character that differs.
-  __ Eor(temp1, temp0, temp4);
+  __ Eor(temp1, temp2, temp4);
   __ Rbit(temp1, temp1);
   __ Clz(temp1, temp1);
+
   // If the number of chars remaining <= the index where the difference occurs (0-3), then
   // the difference occurs outside the remaining string data, so just return length diff (out).
   // Unlike ARM, we're doing the comparison in one go here, without the subtraction at the
   // find_char_diff_2nd_cmp path, so it doesn't matter whether the comparison is signed or
   // unsigned when string compression is disabled.
   // When it's enabled, the comparison must be unsigned.
-  __ Cmp(temp2, Operand(temp1.W(), LSR, (mirror::kUseStringCompression) ? 3 : 4));
+  __ Cmp(temp0, Operand(temp1.W(), LSR, (mirror::kUseStringCompression) ? 3 : 4));
   __ B(ls, &end);
+
   // Extract the characters and calculate the difference.
-  vixl::aarch64::Label uncompressed_string, continue_process;
   if (mirror:: kUseStringCompression) {
-    __ Tbz(temp5, kWRegSize - 1, &uncompressed_string);
     __ Bic(temp1, temp1, 0x7);
-    __ B(&continue_process);
+    __ Bic(temp1, temp1, Operand(temp3.X(), LSL, 3u));
+  } else {
+    __ Bic(temp1, temp1, 0xf);
   }
-  __ Bind(&uncompressed_string);
-  __ Bic(temp1, temp1, 0xf);
-  __ Bind(&continue_process);
-
-  __ Lsr(temp0, temp0, temp1);
+  __ Lsr(temp2, temp2, temp1);
   __ Lsr(temp4, temp4, temp1);
-  vixl::aarch64::Label uncompressed_string_extract_chars;
   if (mirror::kUseStringCompression) {
-    __ Tbz(temp5, kWRegSize - 1, &uncompressed_string_extract_chars);
-    __ And(temp4, temp4, 0xff);
-    __ Sub(out, temp4.W(), Operand(temp0.W(), UXTB));
-    __ B(&end);
+    // Prioritize the case of compressed strings and calculate such result first.
+    __ Uxtb(temp1, temp4);
+    __ Sub(out, temp1.W(), Operand(temp2.W(), UXTB));
+    __ Tbz(temp3, 0u, &end);  // If actually compressed, we're done.
   }
-  __ Bind(&uncompressed_string_extract_chars);
-  __ And(temp4, temp4, 0xffff);
-  __ Sub(out, temp4.W(), Operand(temp0.W(), UXTH));
-  __ B(&end);
+  __ Uxth(temp4, temp4);
+  __ Sub(out, temp4.W(), Operand(temp2.W(), UXTH));
 
   if (mirror::kUseStringCompression) {
-    vixl::aarch64::Label loop_this_compressed, loop_arg_compressed, find_diff;
+    __ B(&end);
+    __ Bind(&different_compression);
+
+    // Comparison for different compression style.
     const size_t c_char_size = Primitive::ComponentSize(Primitive::kPrimByte);
     DCHECK_EQ(c_char_size, 1u);
-    temp0 = temp0.W();
     temp1 = temp1.W();
-    // Comparison for different compression style.
-    // This part is when THIS is compressed and ARG is not.
-    __ Bind(&different_compression);
-    __ Add(temp0, str, Operand(value_offset));
-    __ Add(temp1, arg, Operand(value_offset));
-    __ Cmp(temp5, Operand(0));
-    __ B(lt, &loop_arg_compressed);
+    temp2 = temp2.W();
+    temp4 = temp4.W();
 
-    __ Bind(&loop_this_compressed);
-    __ Ldrb(temp3, MemOperand(temp0.X(), c_char_size, PostIndex));
-    __ Ldrh(temp5, MemOperand(temp1.X(), char_size, PostIndex));
-    __ Cmp(temp3, Operand(temp5));
-    __ B(ne, &find_diff);
-    __ Subs(temp2, temp2, 1);
-    __ B(gt, &loop_this_compressed);
-    __ B(&end);
+    // `temp1` will hold the compressed data pointer, `temp2` the uncompressed data pointer.
+    // Note that flags have been set by the `str` compression flag extraction to `temp3`
+    // before branching to the `different_compression` label.
+    __ Csel(temp1, str, arg, eq);   // Pointer to the compressed string.
+    __ Csel(temp2, str, arg, ne);   // Pointer to the uncompressed string.
 
-    // This part is when THIS is not compressed and ARG is.
-    __ Bind(&loop_arg_compressed);
-    __ Ldrh(temp3, MemOperand(temp0.X(), char_size, PostIndex));
-    __ Ldrb(temp5, MemOperand(temp1.X(), c_char_size, PostIndex));
-    __ Cmp(temp3, Operand(temp5));
-    __ B(ne, &find_diff);
-    __ Subs(temp2, temp2, 1);
-    __ B(gt, &loop_arg_compressed);
+    // We want to free up the temp3, currently holding `str` compression flag, for comparison.
+    // So, we move it to the bottom bit of the iteration count `temp0` which we then need to treat
+    // as unsigned. Start by freeing the bit with a LSL and continue further down by a SUB which
+    // will allow `subs temp0, #2; bhi different_compression_loop` to serve as the loop condition.
+    __ Lsl(temp0, temp0, 1u);
+
+    // Adjust temp1 and temp2 from string pointers to data pointers.
+    __ Add(temp1, temp1, Operand(value_offset));
+    __ Add(temp2, temp2, Operand(value_offset));
+
+    // Complete the move of the compression flag.
+    __ Sub(temp0, temp0, Operand(temp3));
+
+    vixl::aarch64::Label different_compression_loop;
+    vixl::aarch64::Label different_compression_diff;
+
+    __ Bind(&different_compression_loop);
+    __ Ldrb(temp4, MemOperand(temp1.X(), c_char_size, PostIndex));
+    __ Ldrh(temp3, MemOperand(temp2.X(), char_size, PostIndex));
+    __ Subs(temp4, temp4, Operand(temp3));
+    __ B(&different_compression_diff, ne);
+    __ Subs(temp0, temp0, 2);
+    __ B(&different_compression_loop, hi);
     __ B(&end);
 
     // Calculate the difference.
-    __ Bind(&find_diff);
-    __ Sub(out, temp3.W(), Operand(temp5.W(), UXTH));
+    __ Bind(&different_compression_diff);
+    __ Tst(temp0, Operand(1));
+    static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
+                  "Expecting 0=compressed, 1=uncompressed");
+    __ Cneg(out, temp4, ne);
   }
 
   __ Bind(&end);
@@ -1468,7 +1470,7 @@
   Register temp1 = WRegisterFrom(locations->GetTemp(0));
   Register temp2 = WRegisterFrom(locations->GetTemp(1));
 
-  vixl::aarch64::Label loop, preloop;
+  vixl::aarch64::Label loop;
   vixl::aarch64::Label end;
   vixl::aarch64::Label return_true;
   vixl::aarch64::Label return_false;
@@ -1502,49 +1504,46 @@
     __ B(&return_false, ne);
   }
 
-  // Load lengths of this and argument strings.
+  // Load `count` fields of this and argument strings.
   __ Ldr(temp, MemOperand(str.X(), count_offset));
   __ Ldr(temp1, MemOperand(arg.X(), count_offset));
-  // Check if lengths are equal, return false if they're not.
+  // Check if `count` fields are equal, return false if they're not.
   // Also compares the compression style, if differs return false.
   __ Cmp(temp, temp1);
   __ B(&return_false, ne);
-  // Return true if both strings are empty.
-  if (mirror::kUseStringCompression) {
-    // Length needs to be masked out first because 0 is treated as compressed.
-    __ Bic(temp, temp, Operand(static_cast<int32_t>(0x80000000)));
-  }
+  // Return true if both strings are empty. Even with string compression `count == 0` means empty.
+  static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
+                "Expecting 0=compressed, 1=uncompressed");
   __ Cbz(temp, &return_true);
 
-  // Assertions that must hold in order to compare strings 4 characters at a time.
+  // Assertions that must hold in order to compare strings 8 bytes at a time.
   DCHECK_ALIGNED(value_offset, 8);
   static_assert(IsAligned<8>(kObjectAlignment), "String of odd length is not zero padded");
 
   if (mirror::kUseStringCompression) {
-    // If not compressed, directly to fast compare. Else do preprocess on length.
-    __ Cmp(temp1, Operand(0));
-    __ B(&preloop, gt);
-    // Mask out compression flag and adjust length for compressed string (8-bit)
-    // as if it is a 16-bit data, new_length = (length + 1) / 2
-    __ Add(temp, temp, 1);
-    __ Lsr(temp, temp, 1);
+    // For string compression, calculate the number of bytes to compare (not chars).
+    // This could in theory exceed INT32_MAX, so treat temp as unsigned.
+    __ Lsr(temp, temp, 1u);             // Extract length.
+    __ And(temp1, temp1, Operand(1));   // Extract compression flag.
+    __ Lsl(temp, temp, temp1);          // Calculate number of bytes to compare.
   }
 
+  // Store offset of string value in preparation for comparison loop
+  __ Mov(temp1, value_offset);
+
   temp1 = temp1.X();
   temp2 = temp2.X();
-  // Loop to compare strings 4 characters at a time starting at the beginning of the string.
-  // Ok to do this because strings are zero-padded to be 8-byte aligned.
-  // Store offset of string value in preparation for comparison loop
-  __ Bind(&preloop);
-  __ Mov(temp1, value_offset);
+  // Loop to compare strings 8 bytes at a time starting at the front of the string.
+  // Ok to do this because strings are zero-padded to kObjectAlignment.
   __ Bind(&loop);
   __ Ldr(out, MemOperand(str.X(), temp1));
   __ Ldr(temp2, MemOperand(arg.X(), temp1));
   __ Add(temp1, temp1, Operand(sizeof(uint64_t)));
   __ Cmp(out, temp2);
   __ B(&return_false, ne);
-  __ Sub(temp, temp, Operand(4), SetFlags);
-  __ B(&loop, gt);
+  // With string compression, we have compared 8 bytes, otherwise 4 chars.
+  __ Sub(temp, temp, Operand(mirror::kUseStringCompression ? 8 : 4), SetFlags);
+  __ B(&loop, hi);
 
   // Return true and exit the function.
   // If loop does not result in returning false, we return true.
@@ -1900,10 +1899,6 @@
   locations->AddTemp(Location::RequiresRegister());
   locations->AddTemp(Location::RequiresRegister());
   locations->AddTemp(Location::RequiresRegister());
-  // Need temporary register for String compression feature.
-  if (mirror::kUseStringCompression) {
-    locations->AddTemp(Location::RequiresRegister());
-  }
 }
 
 void IntrinsicCodeGeneratorARM64::VisitStringGetCharsNoCheck(HInvoke* invoke) {
@@ -1931,10 +1926,6 @@
   Register src_ptr = XRegisterFrom(locations->GetTemp(0));
   Register num_chr = XRegisterFrom(locations->GetTemp(1));
   Register tmp1 = XRegisterFrom(locations->GetTemp(2));
-  Register tmp3;
-  if (mirror::kUseStringCompression) {
-    tmp3 = WRegisterFrom(locations->GetTemp(3));
-  }
 
   UseScratchRegisterScope temps(masm);
   Register dst_ptr = temps.AcquireX();
@@ -1957,8 +1948,8 @@
     // Location of count in string.
     const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
     // String's length.
-    __ Ldr(tmp3, MemOperand(srcObj, count_offset));
-    __ Tbnz(tmp3, kWRegSize - 1, &compressed_string_preloop);
+    __ Ldr(tmp2, MemOperand(srcObj, count_offset));
+    __ Tbz(tmp2, 0, &compressed_string_preloop);
   }
   __ Add(src_ptr, src_ptr, Operand(srcBegin, LSL, 1));
 
@@ -2174,11 +2165,11 @@
   __ Cbz(dst, slow_path->GetEntryLabel());
 
   if (!length.IsConstant()) {
-    // If the length is negative, bail out.
-    __ Tbnz(WRegisterFrom(length), kWRegSize - 1, slow_path->GetEntryLabel());
-    // If the length > 32 then (currently) prefer libcore's native implementation.
+    // Merge the following two comparisons into one:
+    //   If the length is negative, bail out (delegate to libcore's native implementation).
+    //   If the length > 32 then (currently) prefer libcore's native implementation.
     __ Cmp(WRegisterFrom(length), kSystemArrayCopyCharThreshold);
-    __ B(slow_path->GetEntryLabel(), gt);
+    __ B(slow_path->GetEntryLabel(), hi);
   } else {
     // We have already checked in the LocationsBuilder for the constant case.
     DCHECK_GE(length.GetConstant()->AsIntConstant()->GetValue(), 0);
@@ -2388,11 +2379,11 @@
   if (!length.IsConstant() &&
       !optimizations.GetCountIsSourceLength() &&
       !optimizations.GetCountIsDestinationLength()) {
-    // If the length is negative, bail out.
-    __ Tbnz(WRegisterFrom(length), kWRegSize - 1, intrinsic_slow_path->GetEntryLabel());
-    // If the length >= 128 then (currently) prefer native implementation.
+    // Merge the following two comparisons into one:
+    //   If the length is negative, bail out (delegate to libcore's native implementation).
+    //   If the length >= 128 then (currently) prefer native implementation.
     __ Cmp(WRegisterFrom(length), kSystemArrayCopyThreshold);
-    __ B(intrinsic_slow_path->GetEntryLabel(), ge);
+    __ B(intrinsic_slow_path->GetEntryLabel(), hs);
   }
   // Validity checks: source.
   CheckSystemArrayCopyPosition(masm,
@@ -2659,7 +2650,7 @@
       //   if (src_ptr != end_ptr) {
       //     uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
       //     lfence;  // Load fence or artificial data dependency to prevent load-load reordering
-      //     bool is_gray = (rb_state == ReadBarrier::gray_ptr_);
+      //     bool is_gray = (rb_state == ReadBarrier::GrayState());
       //     if (is_gray) {
       //       // Slow-path copy.
       //       do {
@@ -2704,9 +2695,8 @@
       codegen_->AddSlowPath(read_barrier_slow_path);
 
       // Given the numeric representation, it's enough to check the low bit of the rb_state.
-      static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0");
-      static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1");
-      static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2");
+      static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
+      static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
       __ Tbnz(tmp, LockWord::kReadBarrierStateShift, read_barrier_slow_path->GetEntryLabel());
 
       // Fast-path copy.
@@ -2789,6 +2779,15 @@
 UNIMPLEMENTED_INTRINSIC(ARM64, IntegerLowestOneBit)
 UNIMPLEMENTED_INTRINSIC(ARM64, LongLowestOneBit)
 
+UNIMPLEMENTED_INTRINSIC(ARM64, StringStringIndexOf);
+UNIMPLEMENTED_INTRINSIC(ARM64, StringStringIndexOfAfter);
+UNIMPLEMENTED_INTRINSIC(ARM64, StringBufferAppend);
+UNIMPLEMENTED_INTRINSIC(ARM64, StringBufferLength);
+UNIMPLEMENTED_INTRINSIC(ARM64, StringBufferToString);
+UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppend);
+UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderLength);
+UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderToString);
+
 // 1.8.
 UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndAddInt)
 UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndAddLong)
diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc
new file mode 100644
index 0000000..9e72447
--- /dev/null
+++ b/compiler/optimizing/intrinsics_arm_vixl.cc
@@ -0,0 +1,2728 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "intrinsics_arm_vixl.h"
+
+#include "arch/arm/instruction_set_features_arm.h"
+#include "code_generator_arm_vixl.h"
+#include "common_arm.h"
+#include "lock_word.h"
+#include "mirror/array-inl.h"
+
+#include "aarch32/constants-aarch32.h"
+
+namespace art {
+namespace arm {
+
+#define __ assembler->GetVIXLAssembler()->
+
+using helpers::DRegisterFrom;
+using helpers::HighRegisterFrom;
+using helpers::InputDRegisterAt;
+using helpers::InputRegisterAt;
+using helpers::InputSRegisterAt;
+using helpers::InputVRegisterAt;
+using helpers::Int32ConstantFrom;
+using helpers::LocationFrom;
+using helpers::LowRegisterFrom;
+using helpers::LowSRegisterFrom;
+using helpers::OutputDRegister;
+using helpers::OutputRegister;
+using helpers::OutputVRegister;
+using helpers::RegisterFrom;
+using helpers::SRegisterFrom;
+
+using namespace vixl::aarch32;  // NOLINT(build/namespaces)
+
+ArmVIXLAssembler* IntrinsicCodeGeneratorARMVIXL::GetAssembler() {
+  return codegen_->GetAssembler();
+}
+
+ArenaAllocator* IntrinsicCodeGeneratorARMVIXL::GetAllocator() {
+  return codegen_->GetGraph()->GetArena();
+}
+
+// Default slow-path for fallback (calling the managed code to handle the intrinsic) in an
+// intrinsified call. This will copy the arguments into the positions for a regular call.
+//
+// Note: The actual parameters are required to be in the locations given by the invoke's location
+//       summary. If an intrinsic modifies those locations before a slowpath call, they must be
+//       restored!
+//
+// Note: If an invoke wasn't sharpened, we will put down an invoke-virtual here. That's potentially
+//       sub-optimal (compared to a direct pointer call), but this is a slow-path.
+
+class IntrinsicSlowPathARMVIXL : public SlowPathCodeARMVIXL {
+ public:
+  explicit IntrinsicSlowPathARMVIXL(HInvoke* invoke)
+      : SlowPathCodeARMVIXL(invoke), invoke_(invoke) {}
+
+  Location MoveArguments(CodeGenerator* codegen) {
+    InvokeDexCallingConventionVisitorARM calling_convention_visitor;
+    IntrinsicVisitor::MoveArguments(invoke_, codegen, &calling_convention_visitor);
+    return calling_convention_visitor.GetMethodLocation();
+  }
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    ArmVIXLAssembler* assembler = down_cast<ArmVIXLAssembler*>(codegen->GetAssembler());
+    __ Bind(GetEntryLabel());
+
+    SaveLiveRegisters(codegen, invoke_->GetLocations());
+
+    Location method_loc = MoveArguments(codegen);
+
+    if (invoke_->IsInvokeStaticOrDirect()) {
+      codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), method_loc);
+    } else {
+      codegen->GenerateVirtualCall(invoke_->AsInvokeVirtual(), method_loc);
+    }
+    codegen->RecordPcInfo(invoke_, invoke_->GetDexPc(), this);
+
+    // Copy the result back to the expected output.
+    Location out = invoke_->GetLocations()->Out();
+    if (out.IsValid()) {
+      DCHECK(out.IsRegister());  // TODO: Replace this when we support output in memory.
+      DCHECK(!invoke_->GetLocations()->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
+      codegen->MoveFromReturnRegister(out, invoke_->GetType());
+    }
+
+    RestoreLiveRegisters(codegen, invoke_->GetLocations());
+    __ B(GetExitLabel());
+  }
+
+  const char* GetDescription() const OVERRIDE { return "IntrinsicSlowPath"; }
+
+ private:
+  // The instruction where this slow path is happening.
+  HInvoke* const invoke_;
+
+  DISALLOW_COPY_AND_ASSIGN(IntrinsicSlowPathARMVIXL);
+};
+
+// Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers.
+class ReadBarrierSystemArrayCopySlowPathARMVIXL : public SlowPathCodeARMVIXL {
+ public:
+  explicit ReadBarrierSystemArrayCopySlowPathARMVIXL(HInstruction* instruction)
+      : SlowPathCodeARMVIXL(instruction) {
+    DCHECK(kEmitCompilerReadBarrier);
+    DCHECK(kUseBakerReadBarrier);
+  }
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
+    ArmVIXLAssembler* assembler = arm_codegen->GetAssembler();
+    LocationSummary* locations = instruction_->GetLocations();
+    DCHECK(locations->CanCall());
+    DCHECK(instruction_->IsInvokeStaticOrDirect())
+        << "Unexpected instruction in read barrier arraycopy slow path: "
+        << instruction_->DebugName();
+    DCHECK(instruction_->GetLocations()->Intrinsified());
+    DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy);
+
+    int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot);
+    uint32_t element_size_shift = Primitive::ComponentSizeShift(Primitive::kPrimNot);
+    uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value();
+
+    vixl32::Register dest = InputRegisterAt(instruction_, 2);
+    Location dest_pos = locations->InAt(3);
+    vixl32::Register src_curr_addr = RegisterFrom(locations->GetTemp(0));
+    vixl32::Register dst_curr_addr = RegisterFrom(locations->GetTemp(1));
+    vixl32::Register src_stop_addr = RegisterFrom(locations->GetTemp(2));
+    vixl32::Register tmp = RegisterFrom(locations->GetTemp(3));
+
+    __ Bind(GetEntryLabel());
+    // Compute the base destination address in `dst_curr_addr`.
+    if (dest_pos.IsConstant()) {
+      int32_t constant = Int32ConstantFrom(dest_pos);
+      __ Add(dst_curr_addr, dest, element_size * constant + offset);
+    } else {
+      __ Add(dst_curr_addr,
+             dest,
+             Operand(RegisterFrom(dest_pos), vixl32::LSL, element_size_shift));
+      __ Add(dst_curr_addr, dst_curr_addr, offset);
+    }
+
+    vixl32::Label loop;
+    __ Bind(&loop);
+    __ Ldr(tmp, MemOperand(src_curr_addr, element_size, PostIndex));
+    assembler->MaybeUnpoisonHeapReference(tmp);
+    // TODO: Inline the mark bit check before calling the runtime?
+    // tmp = ReadBarrier::Mark(tmp);
+    // No need to save live registers; it's taken care of by the
+    // entrypoint. Also, there is no need to update the stack mask,
+    // as this runtime call will not trigger a garbage collection.
+    // (See ReadBarrierMarkSlowPathARM::EmitNativeCode for more
+    // explanations.)
+    DCHECK(!tmp.IsSP());
+    DCHECK(!tmp.IsLR());
+    DCHECK(!tmp.IsPC());
+    // IP is used internally by the ReadBarrierMarkRegX entry point
+    // as a temporary (and not preserved).  It thus cannot be used by
+    // any live register in this slow path.
+    DCHECK(!src_curr_addr.Is(ip));
+    DCHECK(!dst_curr_addr.Is(ip));
+    DCHECK(!src_stop_addr.Is(ip));
+    DCHECK(!tmp.Is(ip));
+    DCHECK(tmp.IsRegister()) << tmp;
+    int32_t entry_point_offset =
+        CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(tmp.GetCode());
+    // This runtime call does not require a stack map.
+    arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
+    assembler->MaybePoisonHeapReference(tmp);
+    __ Str(tmp, MemOperand(dst_curr_addr, element_size, PostIndex));
+    __ Cmp(src_curr_addr, src_stop_addr);
+    __ B(ne, &loop);
+    __ B(GetExitLabel());
+  }
+
+  const char* GetDescription() const OVERRIDE {
+    return "ReadBarrierSystemArrayCopySlowPathARMVIXL";
+  }
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathARMVIXL);
+};
+
+IntrinsicLocationsBuilderARMVIXL::IntrinsicLocationsBuilderARMVIXL(CodeGeneratorARMVIXL* codegen)
+    : arena_(codegen->GetGraph()->GetArena()),
+      assembler_(codegen->GetAssembler()),
+      features_(codegen->GetInstructionSetFeatures()) {}
+
+bool IntrinsicLocationsBuilderARMVIXL::TryDispatch(HInvoke* invoke) {
+  Dispatch(invoke);
+  LocationSummary* res = invoke->GetLocations();
+  if (res == nullptr) {
+    return false;
+  }
+  return res->Intrinsified();
+}
+
+static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresFpuRegister());
+  locations->SetOut(Location::RequiresRegister());
+}
+
+static void CreateIntToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresFpuRegister());
+}
+
+static void MoveFPToInt(LocationSummary* locations, bool is64bit, ArmVIXLAssembler* assembler) {
+  Location input = locations->InAt(0);
+  Location output = locations->Out();
+  if (is64bit) {
+    __ Vmov(LowRegisterFrom(output), HighRegisterFrom(output), DRegisterFrom(input));
+  } else {
+    __ Vmov(RegisterFrom(output), SRegisterFrom(input));
+  }
+}
+
+static void MoveIntToFP(LocationSummary* locations, bool is64bit, ArmVIXLAssembler* assembler) {
+  Location input = locations->InAt(0);
+  Location output = locations->Out();
+  if (is64bit) {
+    __ Vmov(DRegisterFrom(output), LowRegisterFrom(input), HighRegisterFrom(input));
+  } else {
+    __ Vmov(SRegisterFrom(output), RegisterFrom(input));
+  }
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
+  CreateFPToIntLocations(arena_, invoke);
+}
+void IntrinsicLocationsBuilderARMVIXL::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
+  CreateIntToFPLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
+  MoveFPToInt(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
+}
+void IntrinsicCodeGeneratorARMVIXL::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
+  MoveIntToFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
+  CreateFPToIntLocations(arena_, invoke);
+}
+void IntrinsicLocationsBuilderARMVIXL::VisitFloatIntBitsToFloat(HInvoke* invoke) {
+  CreateIntToFPLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
+  MoveFPToInt(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
+}
+void IntrinsicCodeGeneratorARMVIXL::VisitFloatIntBitsToFloat(HInvoke* invoke) {
+  MoveIntToFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
+}
+
+static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresFpuRegister());
+  locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+}
+
+static void GenNumberOfLeadingZeros(LocationSummary* locations,
+                                    Primitive::Type type,
+                                    ArmVIXLAssembler* assembler) {
+  Location in = locations->InAt(0);
+  vixl32::Register out = RegisterFrom(locations->Out());
+
+  DCHECK((type == Primitive::kPrimInt) || (type == Primitive::kPrimLong));
+
+  if (type == Primitive::kPrimLong) {
+    vixl32::Register in_reg_lo = LowRegisterFrom(in);
+    vixl32::Register in_reg_hi = HighRegisterFrom(in);
+    vixl32::Label end;
+    __ Clz(out, in_reg_hi);
+    __ CompareAndBranchIfNonZero(in_reg_hi, &end, /* far_target */ false);
+    __ Clz(out, in_reg_lo);
+    __ Add(out, out, 32);
+    __ Bind(&end);
+  } else {
+    __ Clz(out, RegisterFrom(in));
+  }
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
+  GenNumberOfLeadingZeros(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
+  GenNumberOfLeadingZeros(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
+}
+
+static void GenNumberOfTrailingZeros(LocationSummary* locations,
+                                     Primitive::Type type,
+                                     ArmVIXLAssembler* assembler) {
+  DCHECK((type == Primitive::kPrimInt) || (type == Primitive::kPrimLong));
+
+  vixl32::Register out = RegisterFrom(locations->Out());
+
+  if (type == Primitive::kPrimLong) {
+    vixl32::Register in_reg_lo = LowRegisterFrom(locations->InAt(0));
+    vixl32::Register in_reg_hi = HighRegisterFrom(locations->InAt(0));
+    vixl32::Label end;
+    __ Rbit(out, in_reg_lo);
+    __ Clz(out, out);
+    __ CompareAndBranchIfNonZero(in_reg_lo, &end, /* far_target */ false);
+    __ Rbit(out, in_reg_hi);
+    __ Clz(out, out);
+    __ Add(out, out, 32);
+    __ Bind(&end);
+  } else {
+    vixl32::Register in = RegisterFrom(locations->InAt(0));
+    __ Rbit(out, in);
+    __ Clz(out, out);
+  }
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kNoCall,
+                                                            kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
+  GenNumberOfTrailingZeros(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kNoCall,
+                                                            kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
+  GenNumberOfTrailingZeros(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
+}
+
+static void MathAbsFP(HInvoke* invoke, ArmVIXLAssembler* assembler) {
+  __ Vabs(OutputVRegister(invoke), InputVRegisterAt(invoke, 0));
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitMathAbsDouble(HInvoke* invoke) {
+  CreateFPToFPLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsDouble(HInvoke* invoke) {
+  MathAbsFP(invoke, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitMathAbsFloat(HInvoke* invoke) {
+  CreateFPToFPLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsFloat(HInvoke* invoke) {
+  MathAbsFP(invoke, GetAssembler());
+}
+
+static void CreateIntToIntPlusTemp(ArenaAllocator* arena, HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+
+  locations->AddTemp(Location::RequiresRegister());
+}
+
+static void GenAbsInteger(LocationSummary* locations,
+                          bool is64bit,
+                          ArmVIXLAssembler* assembler) {
+  Location in = locations->InAt(0);
+  Location output = locations->Out();
+
+  vixl32::Register mask = RegisterFrom(locations->GetTemp(0));
+
+  if (is64bit) {
+    vixl32::Register in_reg_lo = LowRegisterFrom(in);
+    vixl32::Register in_reg_hi = HighRegisterFrom(in);
+    vixl32::Register out_reg_lo = LowRegisterFrom(output);
+    vixl32::Register out_reg_hi = HighRegisterFrom(output);
+
+    DCHECK(!out_reg_lo.Is(in_reg_hi)) << "Diagonal overlap unexpected.";
+
+    __ Asr(mask, in_reg_hi, 31);
+    __ Adds(out_reg_lo, in_reg_lo, mask);
+    __ Adc(out_reg_hi, in_reg_hi, mask);
+    __ Eor(out_reg_lo, mask, out_reg_lo);
+    __ Eor(out_reg_hi, mask, out_reg_hi);
+  } else {
+    vixl32::Register in_reg = RegisterFrom(in);
+    vixl32::Register out_reg = RegisterFrom(output);
+
+    __ Asr(mask, in_reg, 31);
+    __ Add(out_reg, in_reg, mask);
+    __ Eor(out_reg, mask, out_reg);
+  }
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitMathAbsInt(HInvoke* invoke) {
+  CreateIntToIntPlusTemp(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsInt(HInvoke* invoke) {
+  GenAbsInteger(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
+}
+
+
+void IntrinsicLocationsBuilderARMVIXL::VisitMathAbsLong(HInvoke* invoke) {
+  CreateIntToIntPlusTemp(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsLong(HInvoke* invoke) {
+  GenAbsInteger(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
+}
+
+static void GenMinMax(HInvoke* invoke, bool is_min, ArmVIXLAssembler* assembler) {
+  vixl32::Register op1 = InputRegisterAt(invoke, 0);
+  vixl32::Register op2 = InputRegisterAt(invoke, 1);
+  vixl32::Register out = OutputRegister(invoke);
+
+  __ Cmp(op1, op2);
+
+  {
+    AssemblerAccurateScope aas(assembler->GetVIXLAssembler(),
+                               3 * kMaxInstructionSizeInBytes,
+                               CodeBufferCheckScope::kMaximumSize);
+
+    __ ite(is_min ? lt : gt);
+    __ mov(is_min ? lt : gt, out, op1);
+    __ mov(is_min ? ge : le, out, op2);
+  }
+}
+
+static void CreateIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitMathMinIntInt(HInvoke* invoke) {
+  CreateIntIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitMathMinIntInt(HInvoke* invoke) {
+  GenMinMax(invoke, /* is_min */ true, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxIntInt(HInvoke* invoke) {
+  CreateIntIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitMathMaxIntInt(HInvoke* invoke) {
+  GenMinMax(invoke, /* is_min */ false, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitMathSqrt(HInvoke* invoke) {
+  CreateFPToFPLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitMathSqrt(HInvoke* invoke) {
+  ArmVIXLAssembler* assembler = GetAssembler();
+  __ Vsqrt(OutputDRegister(invoke), InputDRegisterAt(invoke, 0));
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekByte(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekByte(HInvoke* invoke) {
+  ArmVIXLAssembler* assembler = GetAssembler();
+  // Ignore upper 4B of long address.
+  __ Ldrsb(OutputRegister(invoke), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekIntNative(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekIntNative(HInvoke* invoke) {
+  ArmVIXLAssembler* assembler = GetAssembler();
+  // Ignore upper 4B of long address.
+  __ Ldr(OutputRegister(invoke), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekLongNative(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekLongNative(HInvoke* invoke) {
+  ArmVIXLAssembler* assembler = GetAssembler();
+  // Ignore upper 4B of long address.
+  vixl32::Register addr = LowRegisterFrom(invoke->GetLocations()->InAt(0));
+  // Worst case: Control register bit SCTLR.A = 0. Then unaligned accesses throw a processor
+  // exception. So we can't use ldrd as addr may be unaligned.
+  vixl32::Register lo = LowRegisterFrom(invoke->GetLocations()->Out());
+  vixl32::Register hi = HighRegisterFrom(invoke->GetLocations()->Out());
+  if (addr.Is(lo)) {
+    __ Ldr(hi, MemOperand(addr, 4));
+    __ Ldr(lo, MemOperand(addr));
+  } else {
+    __ Ldr(lo, MemOperand(addr));
+    __ Ldr(hi, MemOperand(addr, 4));
+  }
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekShortNative(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekShortNative(HInvoke* invoke) {
+  ArmVIXLAssembler* assembler = GetAssembler();
+  // Ignore upper 4B of long address.
+  __ Ldrsh(OutputRegister(invoke), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
+}
+
+static void CreateIntIntToVoidLocations(ArenaAllocator* arena, HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPokeByte(HInvoke* invoke) {
+  CreateIntIntToVoidLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeByte(HInvoke* invoke) {
+  ArmVIXLAssembler* assembler = GetAssembler();
+  __ Strb(InputRegisterAt(invoke, 1), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPokeIntNative(HInvoke* invoke) {
+  CreateIntIntToVoidLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeIntNative(HInvoke* invoke) {
+  ArmVIXLAssembler* assembler = GetAssembler();
+  __ Str(InputRegisterAt(invoke, 1), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPokeLongNative(HInvoke* invoke) {
+  CreateIntIntToVoidLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeLongNative(HInvoke* invoke) {
+  ArmVIXLAssembler* assembler = GetAssembler();
+  // Ignore upper 4B of long address.
+  vixl32::Register addr = LowRegisterFrom(invoke->GetLocations()->InAt(0));
+  // Worst case: Control register bit SCTLR.A = 0. Then unaligned accesses throw a processor
+  // exception. So we can't use ldrd as addr may be unaligned.
+  __ Str(LowRegisterFrom(invoke->GetLocations()->InAt(1)), MemOperand(addr));
+  __ Str(HighRegisterFrom(invoke->GetLocations()->InAt(1)), MemOperand(addr, 4));
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPokeShortNative(HInvoke* invoke) {
+  CreateIntIntToVoidLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeShortNative(HInvoke* invoke) {
+  ArmVIXLAssembler* assembler = GetAssembler();
+  __ Strh(InputRegisterAt(invoke, 1), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitThreadCurrentThread(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kNoCall,
+                                                            kIntrinsified);
+  locations->SetOut(Location::RequiresRegister());
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitThreadCurrentThread(HInvoke* invoke) {
+  ArmVIXLAssembler* assembler = GetAssembler();
+  __ Ldr(OutputRegister(invoke),
+         MemOperand(tr, Thread::PeerOffset<kArmPointerSize>().Int32Value()));
+}
+
+static void GenUnsafeGet(HInvoke* invoke,
+                         Primitive::Type type,
+                         bool is_volatile,
+                         CodeGeneratorARMVIXL* codegen) {
+  LocationSummary* locations = invoke->GetLocations();
+  ArmVIXLAssembler* assembler = codegen->GetAssembler();
+  Location base_loc = locations->InAt(1);
+  vixl32::Register base = InputRegisterAt(invoke, 1);     // Object pointer.
+  Location offset_loc = locations->InAt(2);
+  vixl32::Register offset = LowRegisterFrom(offset_loc);  // Long offset, lo part only.
+  Location trg_loc = locations->Out();
+
+  switch (type) {
+    case Primitive::kPrimInt: {
+      vixl32::Register trg = RegisterFrom(trg_loc);
+      __ Ldr(trg, MemOperand(base, offset));
+      if (is_volatile) {
+        __ Dmb(vixl32::ISH);
+      }
+      break;
+    }
+
+    case Primitive::kPrimNot: {
+      vixl32::Register trg = RegisterFrom(trg_loc);
+      if (kEmitCompilerReadBarrier) {
+        if (kUseBakerReadBarrier) {
+          Location temp = locations->GetTemp(0);
+          codegen->GenerateReferenceLoadWithBakerReadBarrier(
+              invoke, trg_loc, base, 0U, offset_loc, TIMES_1, temp, /* needs_null_check */ false);
+          if (is_volatile) {
+            __ Dmb(vixl32::ISH);
+          }
+        } else {
+          __ Ldr(trg, MemOperand(base, offset));
+          if (is_volatile) {
+            __ Dmb(vixl32::ISH);
+          }
+          codegen->GenerateReadBarrierSlow(invoke, trg_loc, trg_loc, base_loc, 0U, offset_loc);
+        }
+      } else {
+        __ Ldr(trg, MemOperand(base, offset));
+        if (is_volatile) {
+          __ Dmb(vixl32::ISH);
+        }
+        assembler->MaybeUnpoisonHeapReference(trg);
+      }
+      break;
+    }
+
+    case Primitive::kPrimLong: {
+      vixl32::Register trg_lo = LowRegisterFrom(trg_loc);
+      vixl32::Register trg_hi = HighRegisterFrom(trg_loc);
+      if (is_volatile && !codegen->GetInstructionSetFeatures().HasAtomicLdrdAndStrd()) {
+        UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
+        const vixl32::Register temp_reg = temps.Acquire();
+        __ Add(temp_reg, base, offset);
+        __ Ldrexd(trg_lo, trg_hi, MemOperand(temp_reg));
+      } else {
+        __ Ldrd(trg_lo, trg_hi, MemOperand(base, offset));
+      }
+      if (is_volatile) {
+        __ Dmb(vixl32::ISH);
+      }
+      break;
+    }
+
+    default:
+      LOG(FATAL) << "Unexpected type " << type;
+      UNREACHABLE();
+  }
+}
+
+static void CreateIntIntIntToIntLocations(ArenaAllocator* arena,
+                                          HInvoke* invoke,
+                                          Primitive::Type type) {
+  bool can_call = kEmitCompilerReadBarrier &&
+      (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject ||
+       invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile);
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           (can_call
+                                                                ? LocationSummary::kCallOnSlowPath
+                                                                : LocationSummary::kNoCall),
+                                                           kIntrinsified);
+  if (can_call && kUseBakerReadBarrier) {
+    locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
+  }
+  locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
+  locations->SetInAt(1, Location::RequiresRegister());
+  locations->SetInAt(2, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(),
+                    (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap));
+  if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+    // We need a temporary register for the read barrier marking slow
+    // path in InstructionCodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier.
+    locations->AddTemp(Location::RequiresRegister());
+  }
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGet(HInvoke* invoke) {
+  CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt);
+}
+void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetVolatile(HInvoke* invoke) {
+  CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt);
+}
+void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetLong(HInvoke* invoke) {
+  CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong);
+}
+void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
+  CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong);
+}
+void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetObject(HInvoke* invoke) {
+  CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot);
+}
+void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
+  CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGet(HInvoke* invoke) {
+  GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ false, codegen_);
+}
+void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetVolatile(HInvoke* invoke) {
+  GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ true, codegen_);
+}
+void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetLong(HInvoke* invoke) {
+  GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ false, codegen_);
+}
+void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
+  GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ true, codegen_);
+}
+void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetObject(HInvoke* invoke) {
+  GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ false, codegen_);
+}
+void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
+  GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ true, codegen_);
+}
+
+static void CreateIntIntIntIntToVoid(ArenaAllocator* arena,
+                                     const ArmInstructionSetFeatures& features,
+                                     Primitive::Type type,
+                                     bool is_volatile,
+                                     HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
+  locations->SetInAt(1, Location::RequiresRegister());
+  locations->SetInAt(2, Location::RequiresRegister());
+  locations->SetInAt(3, Location::RequiresRegister());
+
+  if (type == Primitive::kPrimLong) {
+    // Potentially need temps for ldrexd-strexd loop.
+    if (is_volatile && !features.HasAtomicLdrdAndStrd()) {
+      locations->AddTemp(Location::RequiresRegister());  // Temp_lo.
+      locations->AddTemp(Location::RequiresRegister());  // Temp_hi.
+    }
+  } else if (type == Primitive::kPrimNot) {
+    // Temps for card-marking.
+    locations->AddTemp(Location::RequiresRegister());  // Temp.
+    locations->AddTemp(Location::RequiresRegister());  // Card.
+  }
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePut(HInvoke* invoke) {
+  CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimInt, /* is_volatile */ false, invoke);
+}
+void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutOrdered(HInvoke* invoke) {
+  CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimInt, /* is_volatile */ false, invoke);
+}
+void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutVolatile(HInvoke* invoke) {
+  CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimInt, /* is_volatile */ true, invoke);
+}
+void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutObject(HInvoke* invoke) {
+  CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimNot, /* is_volatile */ false, invoke);
+}
+void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
+  CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimNot, /* is_volatile */ false, invoke);
+}
+void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
+  CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimNot, /* is_volatile */ true, invoke);
+}
+void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutLong(HInvoke* invoke) {
+  CreateIntIntIntIntToVoid(
+      arena_, features_, Primitive::kPrimLong, /* is_volatile */ false, invoke);
+}
+void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutLongOrdered(HInvoke* invoke) {
+  CreateIntIntIntIntToVoid(
+      arena_, features_, Primitive::kPrimLong, /* is_volatile */ false, invoke);
+}
+void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutLongVolatile(HInvoke* invoke) {
+  CreateIntIntIntIntToVoid(
+      arena_, features_, Primitive::kPrimLong, /* is_volatile */ true, invoke);
+}
+
+static void GenUnsafePut(LocationSummary* locations,
+                         Primitive::Type type,
+                         bool is_volatile,
+                         bool is_ordered,
+                         CodeGeneratorARMVIXL* codegen) {
+  ArmVIXLAssembler* assembler = codegen->GetAssembler();
+
+  vixl32::Register base = RegisterFrom(locations->InAt(1));       // Object pointer.
+  vixl32::Register offset = LowRegisterFrom(locations->InAt(2));  // Long offset, lo part only.
+  vixl32::Register value;
+
+  if (is_volatile || is_ordered) {
+    __ Dmb(vixl32::ISH);
+  }
+
+  if (type == Primitive::kPrimLong) {
+    vixl32::Register value_lo = LowRegisterFrom(locations->InAt(3));
+    vixl32::Register value_hi = HighRegisterFrom(locations->InAt(3));
+    value = value_lo;
+    if (is_volatile && !codegen->GetInstructionSetFeatures().HasAtomicLdrdAndStrd()) {
+      vixl32::Register temp_lo = RegisterFrom(locations->GetTemp(0));
+      vixl32::Register temp_hi = RegisterFrom(locations->GetTemp(1));
+      UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
+      const vixl32::Register temp_reg = temps.Acquire();
+
+      __ Add(temp_reg, base, offset);
+      vixl32::Label loop_head;
+      __ Bind(&loop_head);
+      __ Ldrexd(temp_lo, temp_hi, MemOperand(temp_reg));
+      __ Strexd(temp_lo, value_lo, value_hi, MemOperand(temp_reg));
+      __ Cmp(temp_lo, 0);
+      __ B(ne, &loop_head);
+    } else {
+      __ Strd(value_lo, value_hi, MemOperand(base, offset));
+    }
+  } else {
+    value = RegisterFrom(locations->InAt(3));
+    vixl32::Register source = value;
+    if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
+      vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
+      __ Mov(temp, value);
+      assembler->PoisonHeapReference(temp);
+      source = temp;
+    }
+    __ Str(source, MemOperand(base, offset));
+  }
+
+  if (is_volatile) {
+    __ Dmb(vixl32::ISH);
+  }
+
+  if (type == Primitive::kPrimNot) {
+    vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
+    vixl32::Register card = RegisterFrom(locations->GetTemp(1));
+    bool value_can_be_null = true;  // TODO: Worth finding out this information?
+    codegen->MarkGCCard(temp, card, base, value, value_can_be_null);
+  }
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePut(HInvoke* invoke) {
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimInt,
+               /* is_volatile */ false,
+               /* is_ordered */ false,
+               codegen_);
+}
+void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutOrdered(HInvoke* invoke) {
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimInt,
+               /* is_volatile */ false,
+               /* is_ordered */ true,
+               codegen_);
+}
+void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutVolatile(HInvoke* invoke) {
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimInt,
+               /* is_volatile */ true,
+               /* is_ordered */ false,
+               codegen_);
+}
+void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutObject(HInvoke* invoke) {
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimNot,
+               /* is_volatile */ false,
+               /* is_ordered */ false,
+               codegen_);
+}
+void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimNot,
+               /* is_volatile */ false,
+               /* is_ordered */ true,
+               codegen_);
+}
+void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimNot,
+               /* is_volatile */ true,
+               /* is_ordered */ false,
+               codegen_);
+}
+void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutLong(HInvoke* invoke) {
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimLong,
+               /* is_volatile */ false,
+               /* is_ordered */ false,
+               codegen_);
+}
+void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutLongOrdered(HInvoke* invoke) {
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimLong,
+               /* is_volatile */ false,
+               /* is_ordered */ true,
+               codegen_);
+}
+void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutLongVolatile(HInvoke* invoke) {
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimLong,
+               /* is_volatile */ true,
+               /* is_ordered */ false,
+               codegen_);
+}
+
+static void CreateIntIntIntIntIntToIntPlusTemps(ArenaAllocator* arena,
+                                                HInvoke* invoke,
+                                                Primitive::Type type) {
+  bool can_call = kEmitCompilerReadBarrier &&
+      kUseBakerReadBarrier &&
+      (invoke->GetIntrinsic() == Intrinsics::kUnsafeCASObject);
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           (can_call
+                                                                ? LocationSummary::kCallOnSlowPath
+                                                                : LocationSummary::kNoCall),
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
+  locations->SetInAt(1, Location::RequiresRegister());
+  locations->SetInAt(2, Location::RequiresRegister());
+  locations->SetInAt(3, Location::RequiresRegister());
+  locations->SetInAt(4, Location::RequiresRegister());
+
+  // If heap poisoning is enabled, we don't want the unpoisoning
+  // operations to potentially clobber the output. Likewise when
+  // emitting a (Baker) read barrier, which may call.
+  Location::OutputOverlap overlaps =
+      ((kPoisonHeapReferences && type == Primitive::kPrimNot) || can_call)
+      ? Location::kOutputOverlap
+      : Location::kNoOutputOverlap;
+  locations->SetOut(Location::RequiresRegister(), overlaps);
+
+  // Temporary registers used in CAS. In the object case
+  // (UnsafeCASObject intrinsic), these are also used for
+  // card-marking, and possibly for (Baker) read barrier.
+  locations->AddTemp(Location::RequiresRegister());  // Pointer.
+  locations->AddTemp(Location::RequiresRegister());  // Temp 1.
+}
+
+static void GenCas(HInvoke* invoke, Primitive::Type type, CodeGeneratorARMVIXL* codegen) {
+  DCHECK_NE(type, Primitive::kPrimLong);
+
+  ArmVIXLAssembler* assembler = codegen->GetAssembler();
+  LocationSummary* locations = invoke->GetLocations();
+
+  Location out_loc = locations->Out();
+  vixl32::Register out = OutputRegister(invoke);                      // Boolean result.
+
+  vixl32::Register base = InputRegisterAt(invoke, 1);                 // Object pointer.
+  Location offset_loc = locations->InAt(2);
+  vixl32::Register offset = LowRegisterFrom(offset_loc);              // Offset (discard high 4B).
+  vixl32::Register expected = InputRegisterAt(invoke, 3);             // Expected.
+  vixl32::Register value = InputRegisterAt(invoke, 4);                // Value.
+
+  Location tmp_ptr_loc = locations->GetTemp(0);
+  vixl32::Register tmp_ptr = RegisterFrom(tmp_ptr_loc);               // Pointer to actual memory.
+  vixl32::Register tmp = RegisterFrom(locations->GetTemp(1));         // Value in memory.
+
+  if (type == Primitive::kPrimNot) {
+    // The only read barrier implementation supporting the
+    // UnsafeCASObject intrinsic is the Baker-style read barriers.
+    DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
+
+    // Mark card for object assuming new value is stored. Worst case we will mark an unchanged
+    // object and scan the receiver at the next GC for nothing.
+    bool value_can_be_null = true;  // TODO: Worth finding out this information?
+    codegen->MarkGCCard(tmp_ptr, tmp, base, value, value_can_be_null);
+
+    if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+      // Need to make sure the reference stored in the field is a to-space
+      // one before attempting the CAS or the CAS could fail incorrectly.
+      codegen->GenerateReferenceLoadWithBakerReadBarrier(
+          invoke,
+          out_loc,  // Unused, used only as a "temporary" within the read barrier.
+          base,
+          /* offset */ 0u,
+          /* index */ offset_loc,
+          ScaleFactor::TIMES_1,
+          tmp_ptr_loc,
+          /* needs_null_check */ false,
+          /* always_update_field */ true,
+          &tmp);
+    }
+  }
+
+  // Prevent reordering with prior memory operations.
+  // Emit a DMB ISH instruction instead of an DMB ISHST one, as the
+  // latter allows a preceding load to be delayed past the STXR
+  // instruction below.
+  __ Dmb(vixl32::ISH);
+
+  __ Add(tmp_ptr, base, offset);
+
+  if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
+    codegen->GetAssembler()->PoisonHeapReference(expected);
+    if (value.Is(expected)) {
+      // Do not poison `value`, as it is the same register as
+      // `expected`, which has just been poisoned.
+    } else {
+      codegen->GetAssembler()->PoisonHeapReference(value);
+    }
+  }
+
+  // do {
+  //   tmp = [r_ptr] - expected;
+  // } while (tmp == 0 && failure([r_ptr] <- r_new_value));
+  // result = tmp != 0;
+
+  vixl32::Label loop_head;
+  __ Bind(&loop_head);
+
+  __ Ldrex(tmp, MemOperand(tmp_ptr));
+
+  __ Subs(tmp, tmp, expected);
+
+  {
+    AssemblerAccurateScope aas(assembler->GetVIXLAssembler(),
+                               3 * kMaxInstructionSizeInBytes,
+                               CodeBufferCheckScope::kMaximumSize);
+
+    __ itt(eq);
+    __ strex(eq, tmp, value, MemOperand(tmp_ptr));
+    __ cmp(eq, tmp, 1);
+  }
+
+  __ B(eq, &loop_head);
+
+  __ Dmb(vixl32::ISH);
+
+  __ Rsbs(out, tmp, 1);
+
+  {
+    AssemblerAccurateScope aas(assembler->GetVIXLAssembler(),
+                               2 * kMaxInstructionSizeInBytes,
+                               CodeBufferCheckScope::kMaximumSize);
+
+    __ it(cc);
+    __ mov(cc, out, 0);
+  }
+
+  if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
+    codegen->GetAssembler()->UnpoisonHeapReference(expected);
+    if (value.Is(expected)) {
+      // Do not unpoison `value`, as it is the same register as
+      // `expected`, which has just been unpoisoned.
+    } else {
+      codegen->GetAssembler()->UnpoisonHeapReference(value);
+    }
+  }
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeCASInt(HInvoke* invoke) {
+  CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke, Primitive::kPrimInt);
+}
+void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeCASObject(HInvoke* invoke) {
+  // The only read barrier implementation supporting the
+  // UnsafeCASObject intrinsic is the Baker-style read barriers.
+  if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
+    return;
+  }
+
+  CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke, Primitive::kPrimNot);
+}
+void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeCASInt(HInvoke* invoke) {
+  GenCas(invoke, Primitive::kPrimInt, codegen_);
+}
+void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeCASObject(HInvoke* invoke) {
+  // The only read barrier implementation supporting the
+  // UnsafeCASObject intrinsic is the Baker-style read barriers.
+  DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
+
+  GenCas(invoke, Primitive::kPrimNot, codegen_);
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitStringCompareTo(HInvoke* invoke) {
+  // The inputs plus one temp.
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            invoke->InputAt(1)->CanBeNull()
+                                                                ? LocationSummary::kCallOnSlowPath
+                                                                : LocationSummary::kNoCall,
+                                                            kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+  locations->AddTemp(Location::RequiresRegister());
+  locations->AddTemp(Location::RequiresRegister());
+  locations->AddTemp(Location::RequiresRegister());
+  // Need temporary registers for String compression's feature.
+  if (mirror::kUseStringCompression) {
+    locations->AddTemp(Location::RequiresRegister());
+  }
+  locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitStringCompareTo(HInvoke* invoke) {
+  ArmVIXLAssembler* assembler = GetAssembler();
+  LocationSummary* locations = invoke->GetLocations();
+
+  vixl32::Register str = InputRegisterAt(invoke, 0);
+  vixl32::Register arg = InputRegisterAt(invoke, 1);
+  vixl32::Register out = OutputRegister(invoke);
+
+  vixl32::Register temp0 = RegisterFrom(locations->GetTemp(0));
+  vixl32::Register temp1 = RegisterFrom(locations->GetTemp(1));
+  vixl32::Register temp2 = RegisterFrom(locations->GetTemp(2));
+  vixl32::Register temp3;
+  if (mirror::kUseStringCompression) {
+    temp3 = RegisterFrom(locations->GetTemp(3));
+  }
+
+  vixl32::Label loop;
+  vixl32::Label find_char_diff;
+  vixl32::Label end;
+  vixl32::Label different_compression;
+
+  // Get offsets of count and value fields within a string object.
+  const int32_t count_offset = mirror::String::CountOffset().Int32Value();
+  const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
+
+  // Note that the null check must have been done earlier.
+  DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
+
+  // Take slow path and throw if input can be and is null.
+  SlowPathCodeARMVIXL* slow_path = nullptr;
+  const bool can_slow_path = invoke->InputAt(1)->CanBeNull();
+  if (can_slow_path) {
+    slow_path = new (GetAllocator()) IntrinsicSlowPathARMVIXL(invoke);
+    codegen_->AddSlowPath(slow_path);
+    __ CompareAndBranchIfZero(arg, slow_path->GetEntryLabel());
+  }
+
+  // Reference equality check, return 0 if same reference.
+  __ Subs(out, str, arg);
+  __ B(eq, &end);
+
+  if (mirror::kUseStringCompression) {
+    // Load `count` fields of this and argument strings.
+    __ Ldr(temp3, MemOperand(str, count_offset));
+    __ Ldr(temp2, MemOperand(arg, count_offset));
+    // Extract lengths from the `count` fields.
+    __ Lsr(temp0, temp3, 1u);
+    __ Lsr(temp1, temp2, 1u);
+  } else {
+    // Load lengths of this and argument strings.
+    __ Ldr(temp0, MemOperand(str, count_offset));
+    __ Ldr(temp1, MemOperand(arg, count_offset));
+  }
+  // out = length diff.
+  __ Subs(out, temp0, temp1);
+  // temp0 = min(len(str), len(arg)).
+
+  {
+    AssemblerAccurateScope aas(assembler->GetVIXLAssembler(),
+                               2 * kMaxInstructionSizeInBytes,
+                               CodeBufferCheckScope::kMaximumSize);
+
+    __ it(gt);
+    __ mov(gt, temp0, temp1);
+  }
+
+  // Shorter string is empty?
+  // Note that mirror::kUseStringCompression==true introduces lots of instructions,
+  // which makes &end label far away from this branch and makes it not 'CBZ-encodable'.
+  __ CompareAndBranchIfZero(temp0, &end, mirror::kUseStringCompression);
+
+  if (mirror::kUseStringCompression) {
+    // Check if both strings using same compression style to use this comparison loop.
+    __ Eors(temp2, temp2, temp3);
+    __ Lsrs(temp2, temp2, 1u);
+    __ B(cs, &different_compression);
+    // For string compression, calculate the number of bytes to compare (not chars).
+    // This could in theory exceed INT32_MAX, so treat temp0 as unsigned.
+    __ Lsls(temp3, temp3, 31u);  // Extract purely the compression flag.
+
+    AssemblerAccurateScope aas(assembler->GetVIXLAssembler(),
+                               2 * kMaxInstructionSizeInBytes,
+                               CodeBufferCheckScope::kMaximumSize);
+
+    __ it(ne);
+    __ add(ne, temp0, temp0, temp0);
+  }
+
+  // Store offset of string value in preparation for comparison loop.
+  __ Mov(temp1, value_offset);
+
+  // Assertions that must hold in order to compare multiple characters at a time.
+  CHECK_ALIGNED(value_offset, 8);
+  static_assert(IsAligned<8>(kObjectAlignment),
+                "String data must be 8-byte aligned for unrolled CompareTo loop.");
+
+  const unsigned char_size = Primitive::ComponentSize(Primitive::kPrimChar);
+  DCHECK_EQ(char_size, 2u);
+
+  UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
+
+  vixl32::Label find_char_diff_2nd_cmp;
+  // Unrolled loop comparing 4x16-bit chars per iteration (ok because of string data alignment).
+  __ Bind(&loop);
+  vixl32::Register temp_reg = temps.Acquire();
+  __ Ldr(temp_reg, MemOperand(str, temp1));
+  __ Ldr(temp2, MemOperand(arg, temp1));
+  __ Cmp(temp_reg, temp2);
+  __ B(ne, &find_char_diff);
+  __ Add(temp1, temp1, char_size * 2);
+
+  __ Ldr(temp_reg, MemOperand(str, temp1));
+  __ Ldr(temp2, MemOperand(arg, temp1));
+  __ Cmp(temp_reg, temp2);
+  __ B(ne, &find_char_diff_2nd_cmp);
+  __ Add(temp1, temp1, char_size * 2);
+  // With string compression, we have compared 8 bytes, otherwise 4 chars.
+  __ Subs(temp0, temp0, (mirror::kUseStringCompression ? 8 : 4));
+  __ B(hi, &loop);
+  __ B(&end);
+
+  __ Bind(&find_char_diff_2nd_cmp);
+  if (mirror::kUseStringCompression) {
+    __ Subs(temp0, temp0, 4);  // 4 bytes previously compared.
+    __ B(ls, &end);  // Was the second comparison fully beyond the end?
+  } else {
+    // Without string compression, we can start treating temp0 as signed
+    // and rely on the signed comparison below.
+    __ Sub(temp0, temp0, 2);
+  }
+
+  // Find the single character difference.
+  __ Bind(&find_char_diff);
+  // Get the bit position of the first character that differs.
+  __ Eor(temp1, temp2, temp_reg);
+  __ Rbit(temp1, temp1);
+  __ Clz(temp1, temp1);
+
+  // temp0 = number of characters remaining to compare.
+  // (Without string compression, it could be < 1 if a difference is found by the second CMP
+  // in the comparison loop, and after the end of the shorter string data).
+
+  // Without string compression (temp1 >> 4) = character where difference occurs between the last
+  // two words compared, in the interval [0,1].
+  // (0 for low half-word different, 1 for high half-word different).
+  // With string compression, (temp1 << 3) = byte where the difference occurs,
+  // in the interval [0,3].
+
+  // If temp0 <= (temp1 >> (kUseStringCompression ? 3 : 4)), the difference occurs outside
+  // the remaining string data, so just return length diff (out).
+  // The comparison is unsigned for string compression, otherwise signed.
+  __ Cmp(temp0, Operand(temp1, vixl32::LSR, (mirror::kUseStringCompression ? 3 : 4)));
+  __ B((mirror::kUseStringCompression ? ls : le), &end);
+
+  // Extract the characters and calculate the difference.
+  if (mirror::kUseStringCompression) {
+    // For compressed strings we need to clear 0x7 from temp1, for uncompressed we need to clear
+    // 0xf. We also need to prepare the character extraction mask `uncompressed ? 0xffffu : 0xffu`.
+    // The compression flag is now in the highest bit of temp3, so let's play some tricks.
+    __ Orr(temp3, temp3, 0xffu << 23);                  // uncompressed ? 0xff800000u : 0x7ff80000u
+    __ Bic(temp1, temp1, Operand(temp3, vixl32::LSR, 31 - 3));  // &= ~(uncompressed ? 0xfu : 0x7u)
+    __ Asr(temp3, temp3, 7u);                           // uncompressed ? 0xffff0000u : 0xff0000u.
+    __ Lsr(temp2, temp2, temp1);                        // Extract second character.
+    __ Lsr(temp3, temp3, 16u);                          // uncompressed ? 0xffffu : 0xffu
+    __ Lsr(out, temp_reg, temp1);                       // Extract first character.
+    __ And(temp2, temp2, temp3);
+    __ And(out, out, temp3);
+  } else {
+    __ Bic(temp1, temp1, 0xf);
+    __ Lsr(temp2, temp2, temp1);
+    __ Lsr(out, temp_reg, temp1);
+    __ Movt(temp2, 0);
+    __ Movt(out, 0);
+  }
+
+  __ Sub(out, out, temp2);
+  temps.Release(temp_reg);
+
+  if (mirror::kUseStringCompression) {
+    __ B(&end);
+    __ Bind(&different_compression);
+
+    // Comparison for different compression style.
+    const size_t c_char_size = Primitive::ComponentSize(Primitive::kPrimByte);
+    DCHECK_EQ(c_char_size, 1u);
+
+    // We want to free up the temp3, currently holding `str.count`, for comparison.
+    // So, we move it to the bottom bit of the iteration count `temp0` which we tnen
+    // need to treat as unsigned. Start by freeing the bit with an ADD and continue
+    // further down by a LSRS+SBC which will flip the meaning of the flag but allow
+    // `subs temp0, #2; bhi different_compression_loop` to serve as the loop condition.
+    __ Add(temp0, temp0, temp0);              // Unlike LSL, this ADD is always 16-bit.
+    // `temp1` will hold the compressed data pointer, `temp2` the uncompressed data pointer.
+    __ Mov(temp1, str);
+    __ Mov(temp2, arg);
+    __ Lsrs(temp3, temp3, 1u);                // Continue the move of the compression flag.
+    {
+      AssemblerAccurateScope aas(assembler->GetVIXLAssembler(),
+                                 3 * kMaxInstructionSizeInBytes,
+                                 CodeBufferCheckScope::kMaximumSize);
+      __ itt(cs);                             // Interleave with selection of temp1 and temp2.
+      __ mov(cs, temp1, arg);                 // Preserves flags.
+      __ mov(cs, temp2, str);                 // Preserves flags.
+    }
+    __ Sbc(temp0, temp0, 0);                  // Complete the move of the compression flag.
+
+    // Adjust temp1 and temp2 from string pointers to data pointers.
+    __ Add(temp1, temp1, value_offset);
+    __ Add(temp2, temp2, value_offset);
+
+    vixl32::Label different_compression_loop;
+    vixl32::Label different_compression_diff;
+
+    // Main loop for different compression.
+    temp_reg = temps.Acquire();
+    __ Bind(&different_compression_loop);
+    __ Ldrb(temp_reg, MemOperand(temp1, c_char_size, PostIndex));
+    __ Ldrh(temp3, MemOperand(temp2, char_size, PostIndex));
+    __ Cmp(temp_reg, temp3);
+    __ B(ne, &different_compression_diff);
+    __ Subs(temp0, temp0, 2);
+    __ B(hi, &different_compression_loop);
+    __ B(&end);
+
+    // Calculate the difference.
+    __ Bind(&different_compression_diff);
+    __ Sub(out, temp_reg, temp3);
+    temps.Release(temp_reg);
+    // Flip the difference if the `arg` is compressed.
+    // `temp0` contains inverted `str` compression flag, i.e the same as `arg` compression flag.
+    __ Lsrs(temp0, temp0, 1u);
+    static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
+                  "Expecting 0=compressed, 1=uncompressed");
+
+    AssemblerAccurateScope aas(assembler->GetVIXLAssembler(),
+                               2 * kMaxInstructionSizeInBytes,
+                               CodeBufferCheckScope::kMaximumSize);
+    __ it(cc);
+    __ rsb(cc, out, out, 0);
+  }
+
+  __ Bind(&end);
+
+  if (can_slow_path) {
+    __ Bind(slow_path->GetExitLabel());
+  }
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitStringEquals(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kNoCall,
+                                                            kIntrinsified);
+  InvokeRuntimeCallingConventionARMVIXL calling_convention;
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+  // Temporary registers to store lengths of strings and for calculations.
+  // Using instruction cbz requires a low register, so explicitly set a temp to be R0.
+  locations->AddTemp(LocationFrom(r0));
+  locations->AddTemp(Location::RequiresRegister());
+  locations->AddTemp(Location::RequiresRegister());
+
+  locations->SetOut(Location::RequiresRegister());
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitStringEquals(HInvoke* invoke) {
+  ArmVIXLAssembler* assembler = GetAssembler();
+  LocationSummary* locations = invoke->GetLocations();
+
+  vixl32::Register str = InputRegisterAt(invoke, 0);
+  vixl32::Register arg = InputRegisterAt(invoke, 1);
+  vixl32::Register out = OutputRegister(invoke);
+
+  vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
+  vixl32::Register temp1 = RegisterFrom(locations->GetTemp(1));
+  vixl32::Register temp2 = RegisterFrom(locations->GetTemp(2));
+
+  vixl32::Label loop;
+  vixl32::Label end;
+  vixl32::Label return_true;
+  vixl32::Label return_false;
+
+  // Get offsets of count, value, and class fields within a string object.
+  const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
+  const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
+  const uint32_t class_offset = mirror::Object::ClassOffset().Uint32Value();
+
+  // Note that the null check must have been done earlier.
+  DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
+
+  StringEqualsOptimizations optimizations(invoke);
+  if (!optimizations.GetArgumentNotNull()) {
+    // Check if input is null, return false if it is.
+    __ CompareAndBranchIfZero(arg, &return_false, /* far_target */ false);
+  }
+
+  // Reference equality check, return true if same reference.
+  __ Cmp(str, arg);
+  __ B(eq, &return_true);
+
+  if (!optimizations.GetArgumentIsString()) {
+    // Instanceof check for the argument by comparing class fields.
+    // All string objects must have the same type since String cannot be subclassed.
+    // Receiver must be a string object, so its class field is equal to all strings' class fields.
+    // If the argument is a string object, its class field must be equal to receiver's class field.
+    __ Ldr(temp, MemOperand(str, class_offset));
+    __ Ldr(temp1, MemOperand(arg, class_offset));
+    __ Cmp(temp, temp1);
+    __ B(ne, &return_false);
+  }
+
+  // Load `count` fields of this and argument strings.
+  __ Ldr(temp, MemOperand(str, count_offset));
+  __ Ldr(temp1, MemOperand(arg, count_offset));
+  // Check if `count` fields are equal, return false if they're not.
+  // Also compares the compression style, if differs return false.
+  __ Cmp(temp, temp1);
+  __ B(ne, &return_false);
+  // Return true if both strings are empty. Even with string compression `count == 0` means empty.
+  static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
+                "Expecting 0=compressed, 1=uncompressed");
+  __ CompareAndBranchIfZero(temp, &return_true, /* far_target */ false);
+
+  // Assertions that must hold in order to compare strings 4 bytes at a time.
+  DCHECK_ALIGNED(value_offset, 4);
+  static_assert(IsAligned<4>(kObjectAlignment), "String data must be aligned for fast compare.");
+
+  if (mirror::kUseStringCompression) {
+    // For string compression, calculate the number of bytes to compare (not chars).
+    // This could in theory exceed INT32_MAX, so treat temp as unsigned.
+    __ Lsrs(temp, temp, 1u);                        // Extract length and check compression flag.
+    AssemblerAccurateScope aas(assembler->GetVIXLAssembler(),
+                               2 * kMaxInstructionSizeInBytes,
+                               CodeBufferCheckScope::kMaximumSize);
+    __ it(cs);                                      // If uncompressed,
+    __ add(cs, temp, temp, temp);                   //   double the byte count.
+  }
+
+  // Store offset of string value in preparation for comparison loop.
+  __ Mov(temp1, value_offset);
+
+  // Loop to compare strings 4 bytes at a time starting at the front of the string.
+  // Ok to do this because strings are zero-padded to kObjectAlignment.
+  __ Bind(&loop);
+  __ Ldr(out, MemOperand(str, temp1));
+  __ Ldr(temp2, MemOperand(arg, temp1));
+  __ Add(temp1, temp1, Operand::From(sizeof(uint32_t)));
+  __ Cmp(out, temp2);
+  __ B(ne, &return_false);
+  // With string compression, we have compared 4 bytes, otherwise 2 chars.
+  __ Subs(temp, temp, mirror::kUseStringCompression ? 4 : 2);
+  __ B(hi, &loop);
+
+  // Return true and exit the function.
+  // If loop does not result in returning false, we return true.
+  __ Bind(&return_true);
+  __ Mov(out, 1);
+  __ B(&end);
+
+  // Return false and exit the function.
+  __ Bind(&return_false);
+  __ Mov(out, 0);
+  __ Bind(&end);
+}
+
+static void GenerateVisitStringIndexOf(HInvoke* invoke,
+                                       ArmVIXLAssembler* assembler,
+                                       CodeGeneratorARMVIXL* codegen,
+                                       ArenaAllocator* allocator,
+                                       bool start_at_zero) {
+  LocationSummary* locations = invoke->GetLocations();
+
+  // Note that the null check must have been done earlier.
+  DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
+
+  // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
+  // or directly dispatch for a large constant, or omit slow-path for a small constant or a char.
+  SlowPathCodeARMVIXL* slow_path = nullptr;
+  HInstruction* code_point = invoke->InputAt(1);
+  if (code_point->IsIntConstant()) {
+    if (static_cast<uint32_t>(code_point->AsIntConstant()->GetValue()) >
+        std::numeric_limits<uint16_t>::max()) {
+      // Always needs the slow-path. We could directly dispatch to it, but this case should be
+      // rare, so for simplicity just put the full slow-path down and branch unconditionally.
+      slow_path = new (allocator) IntrinsicSlowPathARMVIXL(invoke);
+      codegen->AddSlowPath(slow_path);
+      __ B(slow_path->GetEntryLabel());
+      __ Bind(slow_path->GetExitLabel());
+      return;
+    }
+  } else if (code_point->GetType() != Primitive::kPrimChar) {
+    vixl32::Register char_reg = InputRegisterAt(invoke, 1);
+    // 0xffff is not modified immediate but 0x10000 is, so use `>= 0x10000` instead of `> 0xffff`.
+    __ Cmp(char_reg, static_cast<uint32_t>(std::numeric_limits<uint16_t>::max()) + 1);
+    slow_path = new (allocator) IntrinsicSlowPathARMVIXL(invoke);
+    codegen->AddSlowPath(slow_path);
+    __ B(hs, slow_path->GetEntryLabel());
+  }
+
+  if (start_at_zero) {
+    vixl32::Register tmp_reg = RegisterFrom(locations->GetTemp(0));
+    DCHECK(tmp_reg.Is(r2));
+    // Start-index = 0.
+    __ Mov(tmp_reg, 0);
+  }
+
+  codegen->InvokeRuntime(kQuickIndexOf, invoke, invoke->GetDexPc(), slow_path);
+  CheckEntrypointTypes<kQuickIndexOf, int32_t, void*, uint32_t, uint32_t>();
+
+  if (slow_path != nullptr) {
+    __ Bind(slow_path->GetExitLabel());
+  }
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitStringIndexOf(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kCallOnMainAndSlowPath,
+                                                            kIntrinsified);
+  // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
+  // best to align the inputs accordingly.
+  InvokeRuntimeCallingConventionARMVIXL calling_convention;
+  locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
+  locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
+  locations->SetOut(LocationFrom(r0));
+
+  // Need to send start-index=0.
+  locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(2)));
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitStringIndexOf(HInvoke* invoke) {
+  GenerateVisitStringIndexOf(
+      invoke, GetAssembler(), codegen_, GetAllocator(), /* start_at_zero */ true);
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitStringIndexOfAfter(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kCallOnMainAndSlowPath,
+                                                            kIntrinsified);
+  // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
+  // best to align the inputs accordingly.
+  InvokeRuntimeCallingConventionARMVIXL calling_convention;
+  locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
+  locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
+  locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
+  locations->SetOut(LocationFrom(r0));
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitStringIndexOfAfter(HInvoke* invoke) {
+  GenerateVisitStringIndexOf(
+      invoke, GetAssembler(), codegen_, GetAllocator(), /* start_at_zero */ false);
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitStringNewStringFromBytes(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kCallOnMainAndSlowPath,
+                                                            kIntrinsified);
+  InvokeRuntimeCallingConventionARMVIXL calling_convention;
+  locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
+  locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
+  locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
+  locations->SetInAt(3, LocationFrom(calling_convention.GetRegisterAt(3)));
+  locations->SetOut(LocationFrom(r0));
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitStringNewStringFromBytes(HInvoke* invoke) {
+  ArmVIXLAssembler* assembler = GetAssembler();
+  vixl32::Register byte_array = InputRegisterAt(invoke, 0);
+  __ Cmp(byte_array, 0);
+  SlowPathCodeARMVIXL* slow_path = new (GetAllocator()) IntrinsicSlowPathARMVIXL(invoke);
+  codegen_->AddSlowPath(slow_path);
+  __ B(eq, slow_path->GetEntryLabel());
+
+  codegen_->InvokeRuntime(kQuickAllocStringFromBytes, invoke, invoke->GetDexPc(), slow_path);
+  CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>();
+  __ Bind(slow_path->GetExitLabel());
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitStringNewStringFromChars(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kCallOnMainOnly,
+                                                            kIntrinsified);
+  InvokeRuntimeCallingConventionARMVIXL calling_convention;
+  locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
+  locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
+  locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
+  locations->SetOut(LocationFrom(r0));
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitStringNewStringFromChars(HInvoke* invoke) {
+  // No need to emit code checking whether `locations->InAt(2)` is a null
+  // pointer, as callers of the native method
+  //
+  //   java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data)
+  //
+  // all include a null check on `data` before calling that method.
+  codegen_->InvokeRuntime(kQuickAllocStringFromChars, invoke, invoke->GetDexPc());
+  CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>();
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitStringNewStringFromString(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kCallOnMainAndSlowPath,
+                                                            kIntrinsified);
+  InvokeRuntimeCallingConventionARMVIXL calling_convention;
+  locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
+  locations->SetOut(LocationFrom(r0));
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitStringNewStringFromString(HInvoke* invoke) {
+  ArmVIXLAssembler* assembler = GetAssembler();
+  vixl32::Register string_to_copy = InputRegisterAt(invoke, 0);
+  __ Cmp(string_to_copy, 0);
+  SlowPathCodeARMVIXL* slow_path = new (GetAllocator()) IntrinsicSlowPathARMVIXL(invoke);
+  codegen_->AddSlowPath(slow_path);
+  __ B(eq, slow_path->GetEntryLabel());
+
+  codegen_->InvokeRuntime(kQuickAllocStringFromString, invoke, invoke->GetDexPc(), slow_path);
+  CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>();
+
+  __ Bind(slow_path->GetExitLabel());
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) {
+  // The only read barrier implementation supporting the
+  // SystemArrayCopy intrinsic is the Baker-style read barriers.
+  if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
+    return;
+  }
+
+  CodeGenerator::CreateSystemArrayCopyLocationSummary(invoke);
+  LocationSummary* locations = invoke->GetLocations();
+  if (locations == nullptr) {
+    return;
+  }
+
+  HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant();
+  HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant();
+  HIntConstant* length = invoke->InputAt(4)->AsIntConstant();
+
+  if (src_pos != nullptr && !assembler_->ShifterOperandCanAlwaysHold(src_pos->GetValue())) {
+    locations->SetInAt(1, Location::RequiresRegister());
+  }
+  if (dest_pos != nullptr && !assembler_->ShifterOperandCanAlwaysHold(dest_pos->GetValue())) {
+    locations->SetInAt(3, Location::RequiresRegister());
+  }
+  if (length != nullptr && !assembler_->ShifterOperandCanAlwaysHold(length->GetValue())) {
+    locations->SetInAt(4, Location::RequiresRegister());
+  }
+  if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+    // Temporary register IP cannot be used in
+    // ReadBarrierSystemArrayCopySlowPathARM (because that register
+    // is clobbered by ReadBarrierMarkRegX entry points). Get an extra
+    // temporary register from the register allocator.
+    locations->AddTemp(Location::RequiresRegister());
+  }
+}
+
+static void CheckPosition(ArmVIXLAssembler* assembler,
+                          Location pos,
+                          vixl32::Register input,
+                          Location length,
+                          SlowPathCodeARMVIXL* slow_path,
+                          vixl32::Register temp,
+                          bool length_is_input_length = false) {
+  // Where is the length in the Array?
+  const uint32_t length_offset = mirror::Array::LengthOffset().Uint32Value();
+
+  if (pos.IsConstant()) {
+    int32_t pos_const = Int32ConstantFrom(pos);
+    if (pos_const == 0) {
+      if (!length_is_input_length) {
+        // Check that length(input) >= length.
+        __ Ldr(temp, MemOperand(input, length_offset));
+        if (length.IsConstant()) {
+          __ Cmp(temp, Int32ConstantFrom(length));
+        } else {
+          __ Cmp(temp, RegisterFrom(length));
+        }
+        __ B(lt, slow_path->GetEntryLabel());
+      }
+    } else {
+      // Check that length(input) >= pos.
+      __ Ldr(temp, MemOperand(input, length_offset));
+      __ Subs(temp, temp, pos_const);
+      __ B(lt, slow_path->GetEntryLabel());
+
+      // Check that (length(input) - pos) >= length.
+      if (length.IsConstant()) {
+        __ Cmp(temp, Int32ConstantFrom(length));
+      } else {
+        __ Cmp(temp, RegisterFrom(length));
+      }
+      __ B(lt, slow_path->GetEntryLabel());
+    }
+  } else if (length_is_input_length) {
+    // The only way the copy can succeed is if pos is zero.
+    vixl32::Register pos_reg = RegisterFrom(pos);
+    __ CompareAndBranchIfNonZero(pos_reg, slow_path->GetEntryLabel());
+  } else {
+    // Check that pos >= 0.
+    vixl32::Register pos_reg = RegisterFrom(pos);
+    __ Cmp(pos_reg, 0);
+    __ B(lt, slow_path->GetEntryLabel());
+
+    // Check that pos <= length(input).
+    __ Ldr(temp, MemOperand(input, length_offset));
+    __ Subs(temp, temp, pos_reg);
+    __ B(lt, slow_path->GetEntryLabel());
+
+    // Check that (length(input) - pos) >= length.
+    if (length.IsConstant()) {
+      __ Cmp(temp, Int32ConstantFrom(length));
+    } else {
+      __ Cmp(temp, RegisterFrom(length));
+    }
+    __ B(lt, slow_path->GetEntryLabel());
+  }
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) {
+  // The only read barrier implementation supporting the
+  // SystemArrayCopy intrinsic is the Baker-style read barriers.
+  DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
+
+  ArmVIXLAssembler* assembler = GetAssembler();
+  LocationSummary* locations = invoke->GetLocations();
+
+  uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
+  uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
+  uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
+  uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
+  uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
+
+  vixl32::Register src = InputRegisterAt(invoke, 0);
+  Location src_pos = locations->InAt(1);
+  vixl32::Register dest = InputRegisterAt(invoke, 2);
+  Location dest_pos = locations->InAt(3);
+  Location length = locations->InAt(4);
+  Location temp1_loc = locations->GetTemp(0);
+  vixl32::Register temp1 = RegisterFrom(temp1_loc);
+  Location temp2_loc = locations->GetTemp(1);
+  vixl32::Register temp2 = RegisterFrom(temp2_loc);
+  Location temp3_loc = locations->GetTemp(2);
+  vixl32::Register temp3 = RegisterFrom(temp3_loc);
+
+  SlowPathCodeARMVIXL* intrinsic_slow_path = new (GetAllocator()) IntrinsicSlowPathARMVIXL(invoke);
+  codegen_->AddSlowPath(intrinsic_slow_path);
+
+  vixl32::Label conditions_on_positions_validated;
+  SystemArrayCopyOptimizations optimizations(invoke);
+
+  // If source and destination are the same, we go to slow path if we need to do
+  // forward copying.
+  if (src_pos.IsConstant()) {
+    int32_t src_pos_constant = Int32ConstantFrom(src_pos);
+    if (dest_pos.IsConstant()) {
+      int32_t dest_pos_constant = Int32ConstantFrom(dest_pos);
+      if (optimizations.GetDestinationIsSource()) {
+        // Checked when building locations.
+        DCHECK_GE(src_pos_constant, dest_pos_constant);
+      } else if (src_pos_constant < dest_pos_constant) {
+        __ Cmp(src, dest);
+        __ B(eq, intrinsic_slow_path->GetEntryLabel());
+      }
+
+      // Checked when building locations.
+      DCHECK(!optimizations.GetDestinationIsSource()
+             || (src_pos_constant >= Int32ConstantFrom(dest_pos)));
+    } else {
+      if (!optimizations.GetDestinationIsSource()) {
+        __ Cmp(src, dest);
+        __ B(ne, &conditions_on_positions_validated);
+      }
+      __ Cmp(RegisterFrom(dest_pos), src_pos_constant);
+      __ B(gt, intrinsic_slow_path->GetEntryLabel());
+    }
+  } else {
+    if (!optimizations.GetDestinationIsSource()) {
+      __ Cmp(src, dest);
+      __ B(ne, &conditions_on_positions_validated);
+    }
+    if (dest_pos.IsConstant()) {
+      int32_t dest_pos_constant = Int32ConstantFrom(dest_pos);
+      __ Cmp(RegisterFrom(src_pos), dest_pos_constant);
+    } else {
+      __ Cmp(RegisterFrom(src_pos), RegisterFrom(dest_pos));
+    }
+    __ B(lt, intrinsic_slow_path->GetEntryLabel());
+  }
+
+  __ Bind(&conditions_on_positions_validated);
+
+  if (!optimizations.GetSourceIsNotNull()) {
+    // Bail out if the source is null.
+    __ CompareAndBranchIfZero(src, intrinsic_slow_path->GetEntryLabel());
+  }
+
+  if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
+    // Bail out if the destination is null.
+    __ CompareAndBranchIfZero(dest, intrinsic_slow_path->GetEntryLabel());
+  }
+
+  // If the length is negative, bail out.
+  // We have already checked in the LocationsBuilder for the constant case.
+  if (!length.IsConstant() &&
+      !optimizations.GetCountIsSourceLength() &&
+      !optimizations.GetCountIsDestinationLength()) {
+    __ Cmp(RegisterFrom(length), 0);
+    __ B(lt, intrinsic_slow_path->GetEntryLabel());
+  }
+
+  // Validity checks: source.
+  CheckPosition(assembler,
+                src_pos,
+                src,
+                length,
+                intrinsic_slow_path,
+                temp1,
+                optimizations.GetCountIsSourceLength());
+
+  // Validity checks: dest.
+  CheckPosition(assembler,
+                dest_pos,
+                dest,
+                length,
+                intrinsic_slow_path,
+                temp1,
+                optimizations.GetCountIsDestinationLength());
+
+  if (!optimizations.GetDoesNotNeedTypeCheck()) {
+    // Check whether all elements of the source array are assignable to the component
+    // type of the destination array. We do two checks: the classes are the same,
+    // or the destination is Object[]. If none of these checks succeed, we go to the
+    // slow path.
+
+    if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+      if (!optimizations.GetSourceIsNonPrimitiveArray()) {
+        // /* HeapReference<Class> */ temp1 = src->klass_
+        codegen_->GenerateFieldLoadWithBakerReadBarrier(
+            invoke, temp1_loc, src, class_offset, temp2_loc, /* needs_null_check */ false);
+        // Bail out if the source is not a non primitive array.
+        // /* HeapReference<Class> */ temp1 = temp1->component_type_
+        codegen_->GenerateFieldLoadWithBakerReadBarrier(
+            invoke, temp1_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false);
+        __ CompareAndBranchIfZero(temp1, intrinsic_slow_path->GetEntryLabel());
+        // If heap poisoning is enabled, `temp1` has been unpoisoned
+        // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
+        // /* uint16_t */ temp1 = static_cast<uint16>(temp1->primitive_type_);
+        __ Ldrh(temp1, MemOperand(temp1, primitive_offset));
+        static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
+        __ CompareAndBranchIfNonZero(temp1, intrinsic_slow_path->GetEntryLabel());
+      }
+
+      // /* HeapReference<Class> */ temp1 = dest->klass_
+      codegen_->GenerateFieldLoadWithBakerReadBarrier(
+          invoke, temp1_loc, dest, class_offset, temp2_loc, /* needs_null_check */ false);
+
+      if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
+        // Bail out if the destination is not a non primitive array.
+        //
+        // Register `temp1` is not trashed by the read barrier emitted
+        // by GenerateFieldLoadWithBakerReadBarrier below, as that
+        // method produces a call to a ReadBarrierMarkRegX entry point,
+        // which saves all potentially live registers, including
+        // temporaries such a `temp1`.
+        // /* HeapReference<Class> */ temp2 = temp1->component_type_
+        codegen_->GenerateFieldLoadWithBakerReadBarrier(
+            invoke, temp2_loc, temp1, component_offset, temp3_loc, /* needs_null_check */ false);
+        __ CompareAndBranchIfZero(temp2, intrinsic_slow_path->GetEntryLabel());
+        // If heap poisoning is enabled, `temp2` has been unpoisoned
+        // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
+        // /* uint16_t */ temp2 = static_cast<uint16>(temp2->primitive_type_);
+        __ Ldrh(temp2, MemOperand(temp2, primitive_offset));
+        static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
+        __ CompareAndBranchIfNonZero(temp2, intrinsic_slow_path->GetEntryLabel());
+      }
+
+      // For the same reason given earlier, `temp1` is not trashed by the
+      // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below.
+      // /* HeapReference<Class> */ temp2 = src->klass_
+      codegen_->GenerateFieldLoadWithBakerReadBarrier(
+          invoke, temp2_loc, src, class_offset, temp3_loc, /* needs_null_check */ false);
+      // Note: if heap poisoning is on, we are comparing two unpoisoned references here.
+      __ Cmp(temp1, temp2);
+
+      if (optimizations.GetDestinationIsTypedObjectArray()) {
+        vixl32::Label do_copy;
+        __ B(eq, &do_copy);
+        // /* HeapReference<Class> */ temp1 = temp1->component_type_
+        codegen_->GenerateFieldLoadWithBakerReadBarrier(
+            invoke, temp1_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false);
+        // /* HeapReference<Class> */ temp1 = temp1->super_class_
+        // We do not need to emit a read barrier for the following
+        // heap reference load, as `temp1` is only used in a
+        // comparison with null below, and this reference is not
+        // kept afterwards.
+        __ Ldr(temp1, MemOperand(temp1, super_offset));
+        __ CompareAndBranchIfNonZero(temp1, intrinsic_slow_path->GetEntryLabel());
+        __ Bind(&do_copy);
+      } else {
+        __ B(ne, intrinsic_slow_path->GetEntryLabel());
+      }
+    } else {
+      // Non read barrier code.
+
+      // /* HeapReference<Class> */ temp1 = dest->klass_
+      __ Ldr(temp1, MemOperand(dest, class_offset));
+      // /* HeapReference<Class> */ temp2 = src->klass_
+      __ Ldr(temp2, MemOperand(src, class_offset));
+      bool did_unpoison = false;
+      if (!optimizations.GetDestinationIsNonPrimitiveArray() ||
+          !optimizations.GetSourceIsNonPrimitiveArray()) {
+        // One or two of the references need to be unpoisoned. Unpoison them
+        // both to make the identity check valid.
+        assembler->MaybeUnpoisonHeapReference(temp1);
+        assembler->MaybeUnpoisonHeapReference(temp2);
+        did_unpoison = true;
+      }
+
+      if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
+        // Bail out if the destination is not a non primitive array.
+        // /* HeapReference<Class> */ temp3 = temp1->component_type_
+        __ Ldr(temp3, MemOperand(temp1, component_offset));
+        __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel());
+        assembler->MaybeUnpoisonHeapReference(temp3);
+        // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
+        __ Ldrh(temp3, MemOperand(temp3, primitive_offset));
+        static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
+        __ CompareAndBranchIfNonZero(temp3, intrinsic_slow_path->GetEntryLabel());
+      }
+
+      if (!optimizations.GetSourceIsNonPrimitiveArray()) {
+        // Bail out if the source is not a non primitive array.
+        // /* HeapReference<Class> */ temp3 = temp2->component_type_
+        __ Ldr(temp3, MemOperand(temp2, component_offset));
+        __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel());
+        assembler->MaybeUnpoisonHeapReference(temp3);
+        // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
+        __ Ldrh(temp3, MemOperand(temp3, primitive_offset));
+        static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
+        __ CompareAndBranchIfNonZero(temp3, intrinsic_slow_path->GetEntryLabel());
+      }
+
+      __ Cmp(temp1, temp2);
+
+      if (optimizations.GetDestinationIsTypedObjectArray()) {
+        vixl32::Label do_copy;
+        __ B(eq, &do_copy);
+        if (!did_unpoison) {
+          assembler->MaybeUnpoisonHeapReference(temp1);
+        }
+        // /* HeapReference<Class> */ temp1 = temp1->component_type_
+        __ Ldr(temp1, MemOperand(temp1, component_offset));
+        assembler->MaybeUnpoisonHeapReference(temp1);
+        // /* HeapReference<Class> */ temp1 = temp1->super_class_
+        __ Ldr(temp1, MemOperand(temp1, super_offset));
+        // No need to unpoison the result, we're comparing against null.
+        __ CompareAndBranchIfNonZero(temp1, intrinsic_slow_path->GetEntryLabel());
+        __ Bind(&do_copy);
+      } else {
+        __ B(ne, intrinsic_slow_path->GetEntryLabel());
+      }
+    }
+  } else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
+    DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
+    // Bail out if the source is not a non primitive array.
+    if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+      // /* HeapReference<Class> */ temp1 = src->klass_
+      codegen_->GenerateFieldLoadWithBakerReadBarrier(
+          invoke, temp1_loc, src, class_offset, temp2_loc, /* needs_null_check */ false);
+      // /* HeapReference<Class> */ temp3 = temp1->component_type_
+      codegen_->GenerateFieldLoadWithBakerReadBarrier(
+          invoke, temp3_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false);
+      __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel());
+      // If heap poisoning is enabled, `temp3` has been unpoisoned
+      // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
+    } else {
+      // /* HeapReference<Class> */ temp1 = src->klass_
+      __ Ldr(temp1, MemOperand(src, class_offset));
+      assembler->MaybeUnpoisonHeapReference(temp1);
+      // /* HeapReference<Class> */ temp3 = temp1->component_type_
+      __ Ldr(temp3, MemOperand(temp1, component_offset));
+      __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel());
+      assembler->MaybeUnpoisonHeapReference(temp3);
+    }
+    // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
+    __ Ldrh(temp3, MemOperand(temp3, primitive_offset));
+    static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
+    __ CompareAndBranchIfNonZero(temp3, intrinsic_slow_path->GetEntryLabel());
+  }
+
+  int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot);
+  uint32_t element_size_shift = Primitive::ComponentSizeShift(Primitive::kPrimNot);
+  uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value();
+
+  // Compute the base source address in `temp1`.
+  if (src_pos.IsConstant()) {
+    int32_t constant = Int32ConstantFrom(src_pos);
+    __ Add(temp1, src, element_size * constant + offset);
+  } else {
+    __ Add(temp1, src, Operand(RegisterFrom(src_pos), vixl32::LSL, element_size_shift));
+    __ Add(temp1, temp1, offset);
+  }
+
+  // Compute the end source address in `temp3`.
+  if (length.IsConstant()) {
+    int32_t constant = Int32ConstantFrom(length);
+    __ Add(temp3, temp1, element_size * constant);
+  } else {
+    __ Add(temp3, temp1, Operand(RegisterFrom(length), vixl32::LSL, element_size_shift));
+  }
+
+  if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+    // The base destination address is computed later, as `temp2` is
+    // used for intermediate computations.
+
+    // SystemArrayCopy implementation for Baker read barriers (see
+    // also CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier):
+    //
+    //   if (src_ptr != end_ptr) {
+    //     uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
+    //     lfence;  // Load fence or artificial data dependency to prevent load-load reordering
+    //     bool is_gray = (rb_state == ReadBarrier::GrayState());
+    //     if (is_gray) {
+    //       // Slow-path copy.
+    //       do {
+    //         *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++)));
+    //       } while (src_ptr != end_ptr)
+    //     } else {
+    //       // Fast-path copy.
+    //       do {
+    //         *dest_ptr++ = *src_ptr++;
+    //       } while (src_ptr != end_ptr)
+    //     }
+    //   }
+
+    vixl32::Label loop, done;
+
+    // Don't enter copy loop if `length == 0`.
+    __ Cmp(temp1, temp3);
+    __ B(eq, &done);
+
+    // /* int32_t */ monitor = src->monitor_
+    __ Ldr(temp2, MemOperand(src, monitor_offset));
+    // /* LockWord */ lock_word = LockWord(monitor)
+    static_assert(sizeof(LockWord) == sizeof(int32_t),
+                  "art::LockWord and int32_t have different sizes.");
+
+    // Introduce a dependency on the lock_word including the rb_state,
+    // which shall prevent load-load reordering without using
+    // a memory barrier (which would be more expensive).
+    // `src` is unchanged by this operation, but its value now depends
+    // on `temp2`.
+    __ Add(src, src, Operand(temp2, vixl32::LSR, 32));
+
+    // Slow path used to copy array when `src` is gray.
+    SlowPathCodeARMVIXL* read_barrier_slow_path =
+        new (GetAllocator()) ReadBarrierSystemArrayCopySlowPathARMVIXL(invoke);
+    codegen_->AddSlowPath(read_barrier_slow_path);
+
+    // Given the numeric representation, it's enough to check the low bit of the
+    // rb_state. We do that by shifting the bit out of the lock word with LSRS
+    // which can be a 16-bit instruction unlike the TST immediate.
+    static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
+    static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
+    __ Lsrs(temp2, temp2, LockWord::kReadBarrierStateShift + 1);
+    // Carry flag is the last bit shifted out by LSRS.
+    __ B(cs, read_barrier_slow_path->GetEntryLabel());
+
+    // Fast-path copy.
+
+    // Compute the base destination address in `temp2`.
+    if (dest_pos.IsConstant()) {
+      int32_t constant = Int32ConstantFrom(dest_pos);
+      __ Add(temp2, dest, element_size * constant + offset);
+    } else {
+      __ Add(temp2, dest, Operand(RegisterFrom(dest_pos), vixl32::LSL, element_size_shift));
+      __ Add(temp2, temp2, offset);
+    }
+
+    // Iterate over the arrays and do a raw copy of the objects. We don't need to
+    // poison/unpoison.
+    __ Bind(&loop);
+
+    {
+      UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
+      const vixl32::Register temp_reg = temps.Acquire();
+
+      __ Ldr(temp_reg, MemOperand(temp1, element_size, PostIndex));
+      __ Str(temp_reg, MemOperand(temp2, element_size, PostIndex));
+    }
+
+    __ Cmp(temp1, temp3);
+    __ B(ne, &loop);
+
+    __ Bind(read_barrier_slow_path->GetExitLabel());
+    __ Bind(&done);
+  } else {
+    // Non read barrier code.
+
+    // Compute the base destination address in `temp2`.
+    if (dest_pos.IsConstant()) {
+      int32_t constant = Int32ConstantFrom(dest_pos);
+      __ Add(temp2, dest, element_size * constant + offset);
+    } else {
+      __ Add(temp2, dest, Operand(RegisterFrom(dest_pos), vixl32::LSL, element_size_shift));
+      __ Add(temp2, temp2, offset);
+    }
+
+    // Iterate over the arrays and do a raw copy of the objects. We don't need to
+    // poison/unpoison.
+    vixl32::Label loop, done;
+    __ Cmp(temp1, temp3);
+    __ B(eq, &done);
+    __ Bind(&loop);
+
+    {
+      UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
+      const vixl32::Register temp_reg = temps.Acquire();
+
+      __ Ldr(temp_reg, MemOperand(temp1, element_size, PostIndex));
+      __ Str(temp_reg, MemOperand(temp2, element_size, PostIndex));
+    }
+
+    __ Cmp(temp1, temp3);
+    __ B(ne, &loop);
+    __ Bind(&done);
+  }
+
+  // We only need one card marking on the destination array.
+  codegen_->MarkGCCard(temp1, temp2, dest, NoReg, /* value_can_be_null */ false);
+
+  __ Bind(intrinsic_slow_path->GetExitLabel());
+}
+
+static void CreateFPToFPCallLocations(ArenaAllocator* arena, HInvoke* invoke) {
+  // If the graph is debuggable, all callee-saved floating-point registers are blocked by
+  // the code generator. Furthermore, the register allocator creates fixed live intervals
+  // for all caller-saved registers because we are doing a function call. As a result, if
+  // the input and output locations are unallocated, the register allocator runs out of
+  // registers and fails; however, a debuggable graph is not the common case.
+  if (invoke->GetBlock()->GetGraph()->IsDebuggable()) {
+    return;
+  }
+
+  DCHECK_EQ(invoke->GetNumberOfArguments(), 1U);
+  DCHECK_EQ(invoke->InputAt(0)->GetType(), Primitive::kPrimDouble);
+  DCHECK_EQ(invoke->GetType(), Primitive::kPrimDouble);
+
+  LocationSummary* const locations = new (arena) LocationSummary(invoke,
+                                                                 LocationSummary::kCallOnMainOnly,
+                                                                 kIntrinsified);
+  const InvokeRuntimeCallingConventionARMVIXL calling_convention;
+
+  locations->SetInAt(0, Location::RequiresFpuRegister());
+  locations->SetOut(Location::RequiresFpuRegister());
+  // Native code uses the soft float ABI.
+  locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(0)));
+  locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(1)));
+}
+
+static void CreateFPFPToFPCallLocations(ArenaAllocator* arena, HInvoke* invoke) {
+  // If the graph is debuggable, all callee-saved floating-point registers are blocked by
+  // the code generator. Furthermore, the register allocator creates fixed live intervals
+  // for all caller-saved registers because we are doing a function call. As a result, if
+  // the input and output locations are unallocated, the register allocator runs out of
+  // registers and fails; however, a debuggable graph is not the common case.
+  if (invoke->GetBlock()->GetGraph()->IsDebuggable()) {
+    return;
+  }
+
+  DCHECK_EQ(invoke->GetNumberOfArguments(), 2U);
+  DCHECK_EQ(invoke->InputAt(0)->GetType(), Primitive::kPrimDouble);
+  DCHECK_EQ(invoke->InputAt(1)->GetType(), Primitive::kPrimDouble);
+  DCHECK_EQ(invoke->GetType(), Primitive::kPrimDouble);
+
+  LocationSummary* const locations = new (arena) LocationSummary(invoke,
+                                                                 LocationSummary::kCallOnMainOnly,
+                                                                 kIntrinsified);
+  const InvokeRuntimeCallingConventionARMVIXL calling_convention;
+
+  locations->SetInAt(0, Location::RequiresFpuRegister());
+  locations->SetInAt(1, Location::RequiresFpuRegister());
+  locations->SetOut(Location::RequiresFpuRegister());
+  // Native code uses the soft float ABI.
+  locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(0)));
+  locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(1)));
+  locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(2)));
+  locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(3)));
+}
+
+static void GenFPToFPCall(HInvoke* invoke,
+                          ArmVIXLAssembler* assembler,
+                          CodeGeneratorARMVIXL* codegen,
+                          QuickEntrypointEnum entry) {
+  LocationSummary* const locations = invoke->GetLocations();
+
+  DCHECK_EQ(invoke->GetNumberOfArguments(), 1U);
+  DCHECK(locations->WillCall() && locations->Intrinsified());
+
+  // Native code uses the soft float ABI.
+  __ Vmov(RegisterFrom(locations->GetTemp(0)),
+          RegisterFrom(locations->GetTemp(1)),
+          InputDRegisterAt(invoke, 0));
+  codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc());
+  __ Vmov(OutputDRegister(invoke),
+          RegisterFrom(locations->GetTemp(0)),
+          RegisterFrom(locations->GetTemp(1)));
+}
+
+static void GenFPFPToFPCall(HInvoke* invoke,
+                            ArmVIXLAssembler* assembler,
+                            CodeGeneratorARMVIXL* codegen,
+                            QuickEntrypointEnum entry) {
+  LocationSummary* const locations = invoke->GetLocations();
+
+  DCHECK_EQ(invoke->GetNumberOfArguments(), 2U);
+  DCHECK(locations->WillCall() && locations->Intrinsified());
+
+  // Native code uses the soft float ABI.
+  __ Vmov(RegisterFrom(locations->GetTemp(0)),
+          RegisterFrom(locations->GetTemp(1)),
+          InputDRegisterAt(invoke, 0));
+  __ Vmov(RegisterFrom(locations->GetTemp(2)),
+          RegisterFrom(locations->GetTemp(3)),
+          InputDRegisterAt(invoke, 1));
+  codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc());
+  __ Vmov(OutputDRegister(invoke),
+          RegisterFrom(locations->GetTemp(0)),
+          RegisterFrom(locations->GetTemp(1)));
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitMathCos(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitMathCos(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickCos);
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitMathSin(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitMathSin(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickSin);
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitMathAcos(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitMathAcos(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAcos);
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitMathAsin(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitMathAsin(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAsin);
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitMathAtan(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitMathAtan(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAtan);
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitMathCbrt(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitMathCbrt(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickCbrt);
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitMathCosh(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitMathCosh(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickCosh);
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitMathExp(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitMathExp(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickExp);
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitMathExpm1(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitMathExpm1(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickExpm1);
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitMathLog(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitMathLog(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickLog);
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitMathLog10(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitMathLog10(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickLog10);
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitMathSinh(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitMathSinh(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickSinh);
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitMathTan(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitMathTan(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickTan);
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitMathTanh(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitMathTanh(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickTanh);
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitMathAtan2(HInvoke* invoke) {
+  CreateFPFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitMathAtan2(HInvoke* invoke) {
+  GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAtan2);
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitMathHypot(HInvoke* invoke) {
+  CreateFPFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitMathHypot(HInvoke* invoke) {
+  GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickHypot);
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitMathNextAfter(HInvoke* invoke) {
+  CreateFPFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitMathNextAfter(HInvoke* invoke) {
+  GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickNextAfter);
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitIntegerReverse(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitIntegerReverse(HInvoke* invoke) {
+  ArmVIXLAssembler* assembler = GetAssembler();
+  __ Rbit(OutputRegister(invoke), InputRegisterAt(invoke, 0));
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitLongReverse(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kNoCall,
+                                                            kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitLongReverse(HInvoke* invoke) {
+  ArmVIXLAssembler* assembler = GetAssembler();
+  LocationSummary* locations = invoke->GetLocations();
+
+  vixl32::Register in_reg_lo  = LowRegisterFrom(locations->InAt(0));
+  vixl32::Register in_reg_hi  = HighRegisterFrom(locations->InAt(0));
+  vixl32::Register out_reg_lo = LowRegisterFrom(locations->Out());
+  vixl32::Register out_reg_hi = HighRegisterFrom(locations->Out());
+
+  __ Rbit(out_reg_lo, in_reg_hi);
+  __ Rbit(out_reg_hi, in_reg_lo);
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitIntegerReverseBytes(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitIntegerReverseBytes(HInvoke* invoke) {
+  ArmVIXLAssembler* assembler = GetAssembler();
+  __ Rev(OutputRegister(invoke), InputRegisterAt(invoke, 0));
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitLongReverseBytes(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kNoCall,
+                                                            kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitLongReverseBytes(HInvoke* invoke) {
+  ArmVIXLAssembler* assembler = GetAssembler();
+  LocationSummary* locations = invoke->GetLocations();
+
+  vixl32::Register in_reg_lo  = LowRegisterFrom(locations->InAt(0));
+  vixl32::Register in_reg_hi  = HighRegisterFrom(locations->InAt(0));
+  vixl32::Register out_reg_lo = LowRegisterFrom(locations->Out());
+  vixl32::Register out_reg_hi = HighRegisterFrom(locations->Out());
+
+  __ Rev(out_reg_lo, in_reg_hi);
+  __ Rev(out_reg_hi, in_reg_lo);
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitShortReverseBytes(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitShortReverseBytes(HInvoke* invoke) {
+  ArmVIXLAssembler* assembler = GetAssembler();
+  __ Revsh(OutputRegister(invoke), InputRegisterAt(invoke, 0));
+}
+
+static void GenBitCount(HInvoke* instr, Primitive::Type type, ArmVIXLAssembler* assembler) {
+  DCHECK(Primitive::IsIntOrLongType(type)) << type;
+  DCHECK_EQ(instr->GetType(), Primitive::kPrimInt);
+  DCHECK_EQ(Primitive::PrimitiveKind(instr->InputAt(0)->GetType()), type);
+
+  bool is_long = type == Primitive::kPrimLong;
+  LocationSummary* locations = instr->GetLocations();
+  Location in = locations->InAt(0);
+  vixl32::Register src_0 = is_long ? LowRegisterFrom(in) : RegisterFrom(in);
+  vixl32::Register src_1 = is_long ? HighRegisterFrom(in) : src_0;
+  vixl32::SRegister tmp_s = LowSRegisterFrom(locations->GetTemp(0));
+  vixl32::DRegister tmp_d = DRegisterFrom(locations->GetTemp(0));
+  vixl32::Register  out_r = OutputRegister(instr);
+
+  // Move data from core register(s) to temp D-reg for bit count calculation, then move back.
+  // According to Cortex A57 and A72 optimization guides, compared to transferring to full D-reg,
+  // transferring data from core reg to upper or lower half of vfp D-reg requires extra latency,
+  // That's why for integer bit count, we use 'vmov d0, r0, r0' instead of 'vmov d0[0], r0'.
+  __ Vmov(tmp_d, src_1, src_0);     // Temp DReg |--src_1|--src_0|
+  __ Vcnt(Untyped8, tmp_d, tmp_d);  // Temp DReg |c|c|c|c|c|c|c|c|
+  __ Vpaddl(U8, tmp_d, tmp_d);      // Temp DReg |--c|--c|--c|--c|
+  __ Vpaddl(U16, tmp_d, tmp_d);     // Temp DReg |------c|------c|
+  if (is_long) {
+    __ Vpaddl(U32, tmp_d, tmp_d);   // Temp DReg |--------------c|
+  }
+  __ Vmov(out_r, tmp_s);
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitIntegerBitCount(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+  invoke->GetLocations()->AddTemp(Location::RequiresFpuRegister());
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitIntegerBitCount(HInvoke* invoke) {
+  GenBitCount(invoke, Primitive::kPrimInt, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitLongBitCount(HInvoke* invoke) {
+  VisitIntegerBitCount(invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitLongBitCount(HInvoke* invoke) {
+  GenBitCount(invoke, Primitive::kPrimLong, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kNoCall,
+                                                            kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+  locations->SetInAt(2, Location::RequiresRegister());
+  locations->SetInAt(3, Location::RequiresRegister());
+  locations->SetInAt(4, Location::RequiresRegister());
+
+  // Temporary registers to store lengths of strings and for calculations.
+  locations->AddTemp(Location::RequiresRegister());
+  locations->AddTemp(Location::RequiresRegister());
+  locations->AddTemp(Location::RequiresRegister());
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke) {
+  ArmVIXLAssembler* assembler = GetAssembler();
+  LocationSummary* locations = invoke->GetLocations();
+
+  // Check assumption that sizeof(Char) is 2 (used in scaling below).
+  const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar);
+  DCHECK_EQ(char_size, 2u);
+
+  // Location of data in char array buffer.
+  const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value();
+
+  // Location of char array data in string.
+  const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
+
+  // void getCharsNoCheck(int srcBegin, int srcEnd, char[] dst, int dstBegin);
+  // Since getChars() calls getCharsNoCheck() - we use registers rather than constants.
+  vixl32::Register srcObj = InputRegisterAt(invoke, 0);
+  vixl32::Register srcBegin = InputRegisterAt(invoke, 1);
+  vixl32::Register srcEnd = InputRegisterAt(invoke, 2);
+  vixl32::Register dstObj = InputRegisterAt(invoke, 3);
+  vixl32::Register dstBegin = InputRegisterAt(invoke, 4);
+
+  vixl32::Register num_chr = RegisterFrom(locations->GetTemp(0));
+  vixl32::Register src_ptr = RegisterFrom(locations->GetTemp(1));
+  vixl32::Register dst_ptr = RegisterFrom(locations->GetTemp(2));
+
+  vixl32::Label done, compressed_string_loop;
+  // dst to be copied.
+  __ Add(dst_ptr, dstObj, data_offset);
+  __ Add(dst_ptr, dst_ptr, Operand(dstBegin, vixl32::LSL, 1));
+
+  __ Subs(num_chr, srcEnd, srcBegin);
+  // Early out for valid zero-length retrievals.
+  __ B(eq, &done);
+
+  // src range to copy.
+  __ Add(src_ptr, srcObj, value_offset);
+
+  UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
+  vixl32::Register temp;
+  vixl32::Label compressed_string_preloop;
+  if (mirror::kUseStringCompression) {
+    // Location of count in string.
+    const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
+    temp = temps.Acquire();
+    // String's length.
+    __ Ldr(temp, MemOperand(srcObj, count_offset));
+    __ Tst(temp, 1);
+    temps.Release(temp);
+    __ B(eq, &compressed_string_preloop);
+  }
+  __ Add(src_ptr, src_ptr, Operand(srcBegin, vixl32::LSL, 1));
+
+  // Do the copy.
+  vixl32::Label loop, remainder;
+
+  temp = temps.Acquire();
+  // Save repairing the value of num_chr on the < 4 character path.
+  __ Subs(temp, num_chr, 4);
+  __ B(lt, &remainder);
+
+  // Keep the result of the earlier subs, we are going to fetch at least 4 characters.
+  __ Mov(num_chr, temp);
+
+  // Main loop used for longer fetches loads and stores 4x16-bit characters at a time.
+  // (LDRD/STRD fault on unaligned addresses and it's not worth inlining extra code
+  // to rectify these everywhere this intrinsic applies.)
+  __ Bind(&loop);
+  __ Ldr(temp, MemOperand(src_ptr, char_size * 2));
+  __ Subs(num_chr, num_chr, 4);
+  __ Str(temp, MemOperand(dst_ptr, char_size * 2));
+  __ Ldr(temp, MemOperand(src_ptr, char_size * 4, PostIndex));
+  __ Str(temp, MemOperand(dst_ptr, char_size * 4, PostIndex));
+  temps.Release(temp);
+  __ B(ge, &loop);
+
+  __ Adds(num_chr, num_chr, 4);
+  __ B(eq, &done);
+
+  // Main loop for < 4 character case and remainder handling. Loads and stores one
+  // 16-bit Java character at a time.
+  __ Bind(&remainder);
+  temp = temps.Acquire();
+  __ Ldrh(temp, MemOperand(src_ptr, char_size, PostIndex));
+  __ Subs(num_chr, num_chr, 1);
+  __ Strh(temp, MemOperand(dst_ptr, char_size, PostIndex));
+  temps.Release(temp);
+  __ B(gt, &remainder);
+
+  if (mirror::kUseStringCompression) {
+    __ B(&done);
+
+    const size_t c_char_size = Primitive::ComponentSize(Primitive::kPrimByte);
+    DCHECK_EQ(c_char_size, 1u);
+    // Copy loop for compressed src, copying 1 character (8-bit) to (16-bit) at a time.
+    __ Bind(&compressed_string_preloop);
+    __ Add(src_ptr, src_ptr, srcBegin);
+    __ Bind(&compressed_string_loop);
+    temp = temps.Acquire();
+    __ Ldrb(temp, MemOperand(src_ptr, c_char_size, PostIndex));
+    __ Strh(temp, MemOperand(dst_ptr, char_size, PostIndex));
+    temps.Release(temp);
+    __ Subs(num_chr, num_chr, 1);
+    __ B(gt, &compressed_string_loop);
+  }
+
+  __ Bind(&done);
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitFloatIsInfinite(HInvoke* invoke) {
+  CreateFPToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitFloatIsInfinite(HInvoke* invoke) {
+  ArmVIXLAssembler* const assembler = GetAssembler();
+  const vixl32::Register out = OutputRegister(invoke);
+  // Shifting left by 1 bit makes the value encodable as an immediate operand;
+  // we don't care about the sign bit anyway.
+  constexpr uint32_t infinity = kPositiveInfinityFloat << 1U;
+
+  __ Vmov(out, InputSRegisterAt(invoke, 0));
+  // We don't care about the sign bit, so shift left.
+  __ Lsl(out, out, 1);
+  __ Eor(out, out, infinity);
+  // If the result is 0, then it has 32 leading zeros, and less than that otherwise.
+  __ Clz(out, out);
+  // Any number less than 32 logically shifted right by 5 bits results in 0;
+  // the same operation on 32 yields 1.
+  __ Lsr(out, out, 5);
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitDoubleIsInfinite(HInvoke* invoke) {
+  CreateFPToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitDoubleIsInfinite(HInvoke* invoke) {
+  ArmVIXLAssembler* const assembler = GetAssembler();
+  const vixl32::Register out = OutputRegister(invoke);
+  UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
+  const vixl32::Register temp = temps.Acquire();
+  // The highest 32 bits of double precision positive infinity separated into
+  // two constants encodable as immediate operands.
+  constexpr uint32_t infinity_high  = 0x7f000000U;
+  constexpr uint32_t infinity_high2 = 0x00f00000U;
+
+  static_assert((infinity_high | infinity_high2) ==
+                    static_cast<uint32_t>(kPositiveInfinityDouble >> 32U),
+                "The constants do not add up to the high 32 bits of double "
+                "precision positive infinity.");
+  __ Vmov(temp, out, InputDRegisterAt(invoke, 0));
+  __ Eor(out, out, infinity_high);
+  __ Eor(out, out, infinity_high2);
+  // We don't care about the sign bit, so shift left.
+  __ Orr(out, temp, Operand(out, vixl32::LSL, 1));
+  // If the result is 0, then it has 32 leading zeros, and less than that otherwise.
+  __ Clz(out, out);
+  // Any number less than 32 logically shifted right by 5 bits results in 0;
+  // the same operation on 32 yields 1.
+  __ Lsr(out, out, 5);
+}
+
+UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathMinDoubleDouble)
+UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathMinFloatFloat)
+UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathMaxDoubleDouble)
+UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathMaxFloatFloat)
+UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathMinLongLong)
+UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathMaxLongLong)
+UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathCeil)          // Could be done by changing rounding mode, maybe?
+UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathFloor)         // Could be done by changing rounding mode, maybe?
+UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathRint)
+UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathRoundDouble)   // Could be done by changing rounding mode, maybe?
+UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathRoundFloat)    // Could be done by changing rounding mode, maybe?
+UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeCASLong)     // High register pressure.
+UNIMPLEMENTED_INTRINSIC(ARMVIXL, SystemArrayCopyChar)
+UNIMPLEMENTED_INTRINSIC(ARMVIXL, ReferenceGetReferent)
+UNIMPLEMENTED_INTRINSIC(ARMVIXL, IntegerHighestOneBit)
+UNIMPLEMENTED_INTRINSIC(ARMVIXL, LongHighestOneBit)
+UNIMPLEMENTED_INTRINSIC(ARMVIXL, IntegerLowestOneBit)
+UNIMPLEMENTED_INTRINSIC(ARMVIXL, LongLowestOneBit)
+
+UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringStringIndexOf);
+UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringStringIndexOfAfter);
+UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBufferAppend);
+UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBufferLength);
+UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBufferToString);
+UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppend);
+UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderLength);
+UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderToString);
+
+// 1.8.
+UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndAddInt)
+UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndAddLong)
+UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndSetInt)
+UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndSetLong)
+UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndSetObject)
+
+UNREACHABLE_INTRINSICS(ARMVIXL)
+
+#undef __
+
+}  // namespace arm
+}  // namespace art
diff --git a/compiler/optimizing/intrinsics_arm_vixl.h b/compiler/optimizing/intrinsics_arm_vixl.h
new file mode 100644
index 0000000..6e79cb7
--- /dev/null
+++ b/compiler/optimizing/intrinsics_arm_vixl.h
@@ -0,0 +1,81 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_INTRINSICS_ARM_VIXL_H_
+#define ART_COMPILER_OPTIMIZING_INTRINSICS_ARM_VIXL_H_
+
+#include "intrinsics.h"
+#include "utils/arm/assembler_arm_vixl.h"
+
+namespace art {
+
+namespace arm {
+
+class ArmVIXLAssembler;
+class CodeGeneratorARMVIXL;
+
+class IntrinsicLocationsBuilderARMVIXL FINAL : public IntrinsicVisitor {
+ public:
+  explicit IntrinsicLocationsBuilderARMVIXL(CodeGeneratorARMVIXL* codegen);
+
+  // Define visitor methods.
+
+#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \
+  void Visit ## Name(HInvoke* invoke) OVERRIDE;
+#include "intrinsics_list.h"
+INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
+#undef INTRINSICS_LIST
+#undef OPTIMIZING_INTRINSICS
+
+  // Check whether an invoke is an intrinsic, and if so, create a location summary. Returns whether
+  // a corresponding LocationSummary with the intrinsified_ flag set was generated and attached to
+  // the invoke.
+  bool TryDispatch(HInvoke* invoke);
+
+ private:
+  ArenaAllocator* arena_;
+  ArmVIXLAssembler* assembler_;
+  const ArmInstructionSetFeatures& features_;
+
+  DISALLOW_COPY_AND_ASSIGN(IntrinsicLocationsBuilderARMVIXL);
+};
+
+class IntrinsicCodeGeneratorARMVIXL FINAL : public IntrinsicVisitor {
+ public:
+  explicit IntrinsicCodeGeneratorARMVIXL(CodeGeneratorARMVIXL* codegen) : codegen_(codegen) {}
+
+  // Define visitor methods.
+
+#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \
+  void Visit ## Name(HInvoke* invoke) OVERRIDE;
+#include "intrinsics_list.h"
+INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
+#undef INTRINSICS_LIST
+#undef OPTIMIZING_INTRINSICS
+
+ private:
+  ArenaAllocator* GetAllocator();
+  ArmVIXLAssembler* GetAssembler();
+
+  CodeGeneratorARMVIXL* codegen_;
+
+  DISALLOW_COPY_AND_ASSIGN(IntrinsicCodeGeneratorARMVIXL);
+};
+
+}  // namespace arm
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_INTRINSICS_ARM_VIXL_H_
diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc
index 5239f8f..9b5d7a0 100644
--- a/compiler/optimizing/intrinsics_mips.cc
+++ b/compiler/optimizing/intrinsics_mips.cc
@@ -2495,6 +2495,15 @@
 UNIMPLEMENTED_INTRINSIC(MIPS, MathTan)
 UNIMPLEMENTED_INTRINSIC(MIPS, MathTanh)
 
+UNIMPLEMENTED_INTRINSIC(MIPS, StringStringIndexOf);
+UNIMPLEMENTED_INTRINSIC(MIPS, StringStringIndexOfAfter);
+UNIMPLEMENTED_INTRINSIC(MIPS, StringBufferAppend);
+UNIMPLEMENTED_INTRINSIC(MIPS, StringBufferLength);
+UNIMPLEMENTED_INTRINSIC(MIPS, StringBufferToString);
+UNIMPLEMENTED_INTRINSIC(MIPS, StringBuilderAppend);
+UNIMPLEMENTED_INTRINSIC(MIPS, StringBuilderLength);
+UNIMPLEMENTED_INTRINSIC(MIPS, StringBuilderToString);
+
 // 1.8.
 UNIMPLEMENTED_INTRINSIC(MIPS, UnsafeGetAndAddInt)
 UNIMPLEMENTED_INTRINSIC(MIPS, UnsafeGetAndAddLong)
diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc
index 1d153e2..5a99886 100644
--- a/compiler/optimizing/intrinsics_mips64.cc
+++ b/compiler/optimizing/intrinsics_mips64.cc
@@ -1947,6 +1947,15 @@
 UNIMPLEMENTED_INTRINSIC(MIPS64, MathTan)
 UNIMPLEMENTED_INTRINSIC(MIPS64, MathTanh)
 
+UNIMPLEMENTED_INTRINSIC(MIPS64, StringStringIndexOf);
+UNIMPLEMENTED_INTRINSIC(MIPS64, StringStringIndexOfAfter);
+UNIMPLEMENTED_INTRINSIC(MIPS64, StringBufferAppend);
+UNIMPLEMENTED_INTRINSIC(MIPS64, StringBufferLength);
+UNIMPLEMENTED_INTRINSIC(MIPS64, StringBufferToString);
+UNIMPLEMENTED_INTRINSIC(MIPS64, StringBuilderAppend);
+UNIMPLEMENTED_INTRINSIC(MIPS64, StringBuilderLength);
+UNIMPLEMENTED_INTRINSIC(MIPS64, StringBuilderToString);
+
 // 1.8.
 UNIMPLEMENTED_INTRINSIC(MIPS64, UnsafeGetAndAddInt)
 UNIMPLEMENTED_INTRINSIC(MIPS64, UnsafeGetAndAddLong)
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index aae3899..922c3bc 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -1408,21 +1408,22 @@
   // compression style is decided on alloc.
   __ cmpl(ecx, Address(arg, count_offset));
   __ j(kNotEqual, &return_false);
+  // Return true if strings are empty. Even with string compression `count == 0` means empty.
+  static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
+                "Expecting 0=compressed, 1=uncompressed");
+  __ jecxz(&return_true);
 
   if (mirror::kUseStringCompression) {
     NearLabel string_uncompressed;
-    // Differ cases into both compressed or both uncompressed. Different compression style
-    // is cut above.
-    __ cmpl(ecx, Immediate(0));
-    __ j(kGreaterEqual, &string_uncompressed);
+    // Extract length and differentiate between both compressed or both uncompressed.
+    // Different compression style is cut above.
+    __ shrl(ecx, Immediate(1));
+    __ j(kCarrySet, &string_uncompressed);
     // Divide string length by 2, rounding up, and continue as if uncompressed.
-    // Merge clearing the compression flag (+0x80000000) with +1 for rounding.
-    __ addl(ecx, Immediate(0x80000001));
+    __ addl(ecx, Immediate(1));
     __ shrl(ecx, Immediate(1));
     __ Bind(&string_uncompressed);
   }
-  // Return true if strings are empty.
-  __ jecxz(&return_true);
   // Load starting addresses of string values into ESI/EDI as required for repe_cmpsl instruction.
   __ leal(esi, Address(str, value_offset));
   __ leal(edi, Address(arg, value_offset));
@@ -1535,21 +1536,24 @@
   // Location of count within the String object.
   int32_t count_offset = mirror::String::CountOffset().Int32Value();
 
-  // Load string length, i.e., the count field of the string.
+  // Load the count field of the string containing the length and compression flag.
   __ movl(string_length, Address(string_obj, count_offset));
-  if (mirror::kUseStringCompression) {
-    string_length_flagged = locations->GetTemp(2).AsRegister<Register>();
-    __ movl(string_length_flagged, string_length);
-    // Mask out first bit used as compression flag.
-    __ andl(string_length, Immediate(INT32_MAX));
-  }
 
-  // Do a zero-length check.
+  // Do a zero-length check. Even with string compression `count == 0` means empty.
+  static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
+                "Expecting 0=compressed, 1=uncompressed");
   // TODO: Support jecxz.
   NearLabel not_found_label;
   __ testl(string_length, string_length);
   __ j(kEqual, &not_found_label);
 
+  if (mirror::kUseStringCompression) {
+    string_length_flagged = locations->GetTemp(2).AsRegister<Register>();
+    __ movl(string_length_flagged, string_length);
+    // Extract the length and shift out the least significant bit used as compression flag.
+    __ shrl(string_length, Immediate(1));
+  }
+
   if (start_at_zero) {
     // Number of chars to scan is the same as the string length.
     __ movl(counter, string_length);
@@ -1570,8 +1574,8 @@
 
     if (mirror::kUseStringCompression) {
       NearLabel modify_counter, offset_uncompressed_label;
-      __ cmpl(string_length_flagged, Immediate(0));
-      __ j(kGreaterEqual, &offset_uncompressed_label);
+      __ testl(string_length_flagged, Immediate(1));
+      __ j(kNotZero, &offset_uncompressed_label);
       // Move to the start of the string: string_obj + value_offset + start_index.
       __ leal(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_1, value_offset));
       __ jmp(&modify_counter);
@@ -1593,8 +1597,8 @@
   if (mirror::kUseStringCompression) {
     NearLabel uncompressed_string_comparison;
     NearLabel comparison_done;
-    __ cmpl(string_length_flagged, Immediate(0));
-    __ j(kGreater, &uncompressed_string_comparison);
+    __ testl(string_length_flagged, Immediate(1));
+    __ j(kNotZero, &uncompressed_string_comparison);
 
     // Check if EAX (search_value) is ASCII.
     __ cmpl(search_value, Immediate(127));
@@ -1787,8 +1791,10 @@
     __ cfi().AdjustCFAOffset(stack_adjust);
 
     NearLabel copy_loop, copy_uncompressed;
-    __ cmpl(Address(obj, count_offset), Immediate(0));
-    __ j(kGreaterEqual, &copy_uncompressed);
+    __ testl(Address(obj, count_offset), Immediate(1));
+    static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
+                  "Expecting 0=compressed, 1=uncompressed");
+    __ j(kNotZero, &copy_uncompressed);
     // Compute the address of the source string by adding the number of chars from
     // the source beginning to the value offset of a string.
     __ leal(ESI, CodeGeneratorX86::ArrayAddress(obj, srcBegin, TIMES_1, value_offset));
@@ -3200,7 +3206,7 @@
     //   if (src_ptr != end_ptr) {
     //     uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
     //     lfence;  // Load fence or artificial data dependency to prevent load-load reordering
-    //     bool is_gray = (rb_state == ReadBarrier::gray_ptr_);
+    //     bool is_gray = (rb_state == ReadBarrier::GrayState());
     //     if (is_gray) {
     //       // Slow-path copy.
     //       for (size_t i = 0; i != length; ++i) {
@@ -3222,14 +3228,13 @@
     __ j(kEqual, &done);
 
     // Given the numeric representation, it's enough to check the low bit of the rb_state.
-    static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0");
-    static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1");
-    static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2");
+    static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
+    static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
     constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte;
     constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte;
     constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position);
 
-    // if (rb_state == ReadBarrier::gray_ptr_)
+    // if (rb_state == ReadBarrier::GrayState())
     //   goto slow_path;
     // At this point, just do the "if" and make sure that flags are preserved until the branch.
     __ testb(Address(src, monitor_offset + gray_byte_position), Immediate(test_value));
@@ -3324,6 +3329,15 @@
 UNIMPLEMENTED_INTRINSIC(X86, IntegerLowestOneBit)
 UNIMPLEMENTED_INTRINSIC(X86, LongLowestOneBit)
 
+UNIMPLEMENTED_INTRINSIC(X86, StringStringIndexOf);
+UNIMPLEMENTED_INTRINSIC(X86, StringStringIndexOfAfter);
+UNIMPLEMENTED_INTRINSIC(X86, StringBufferAppend);
+UNIMPLEMENTED_INTRINSIC(X86, StringBufferLength);
+UNIMPLEMENTED_INTRINSIC(X86, StringBufferToString);
+UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppend);
+UNIMPLEMENTED_INTRINSIC(X86, StringBuilderLength);
+UNIMPLEMENTED_INTRINSIC(X86, StringBuilderToString);
+
 // 1.8.
 UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndAddInt)
 UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndAddLong)
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index cdef22f..05d270a 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -1399,7 +1399,7 @@
     //   if (src_ptr != end_ptr) {
     //     uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
     //     lfence;  // Load fence or artificial data dependency to prevent load-load reordering
-    //     bool is_gray = (rb_state == ReadBarrier::gray_ptr_);
+    //     bool is_gray = (rb_state == ReadBarrier::GrayState());
     //     if (is_gray) {
     //       // Slow-path copy.
     //       do {
@@ -1420,14 +1420,13 @@
     __ j(kEqual, &done);
 
     // Given the numeric representation, it's enough to check the low bit of the rb_state.
-    static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0");
-    static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1");
-    static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2");
+    static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
+    static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
     constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte;
     constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte;
     constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position);
 
-    // if (rb_state == ReadBarrier::gray_ptr_)
+    // if (rb_state == ReadBarrier::GrayState())
     //   goto slow_path;
     // At this point, just do the "if" and make sure that flags are preserved until the branch.
     __ testb(Address(src, monitor_offset + gray_byte_position), Immediate(test_value));
@@ -1575,20 +1574,23 @@
   // compression style is decided on alloc.
   __ cmpl(rcx, Address(arg, count_offset));
   __ j(kNotEqual, &return_false);
+  // Return true if both strings are empty. Even with string compression `count == 0` means empty.
+  static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
+                "Expecting 0=compressed, 1=uncompressed");
+  __ jrcxz(&return_true);
 
   if (mirror::kUseStringCompression) {
     NearLabel string_uncompressed;
-    // Both string are compressed.
-    __ cmpl(rcx, Immediate(0));
-    __ j(kGreaterEqual, &string_uncompressed);
+    // Extract length and differentiate between both compressed or both uncompressed.
+    // Different compression style is cut above.
+    __ shrl(rcx, Immediate(1));
+    __ j(kCarrySet, &string_uncompressed);
     // Divide string length by 2, rounding up, and continue as if uncompressed.
     // Merge clearing the compression flag with +1 for rounding.
-    __ addl(rcx, Immediate(static_cast<int32_t>(0x80000001)));
+    __ addl(rcx, Immediate(1));
     __ shrl(rcx, Immediate(1));
     __ Bind(&string_uncompressed);
   }
-  // Return true if both strings are empty.
-  __ jrcxz(&return_true);
   // Load starting addresses of string values into RSI/RDI as required for repe_cmpsq instruction.
   __ leal(rsi, Address(str, value_offset));
   __ leal(rdi, Address(arg, value_offset));
@@ -1695,21 +1697,22 @@
   // Location of count within the String object.
   int32_t count_offset = mirror::String::CountOffset().Int32Value();
 
-  // Load string length, i.e., the count field of the string.
+  // Load the count field of the string containing the length and compression flag.
   __ movl(string_length, Address(string_obj, count_offset));
-  if (mirror::kUseStringCompression) {
-    // Use TMP to keep string_length_flagged.
-    __ movl(CpuRegister(TMP), string_length);
-    // Mask out first bit used as compression flag.
-    __ andl(string_length, Immediate(INT32_MAX));
-  }
 
-  // Do a length check.
+  // Do a zero-length check. Even with string compression `count == 0` means empty.
   // TODO: Support jecxz.
   NearLabel not_found_label;
   __ testl(string_length, string_length);
   __ j(kEqual, &not_found_label);
 
+  if (mirror::kUseStringCompression) {
+    // Use TMP to keep string_length_flagged.
+    __ movl(CpuRegister(TMP), string_length);
+    // Mask out first bit used as compression flag.
+    __ shrl(string_length, Immediate(1));
+  }
+
   if (start_at_zero) {
     // Number of chars to scan is the same as the string length.
     __ movl(counter, string_length);
@@ -1729,8 +1732,8 @@
 
     if (mirror::kUseStringCompression) {
       NearLabel modify_counter, offset_uncompressed_label;
-      __ cmpl(CpuRegister(TMP), Immediate(0));
-      __ j(kGreaterEqual, &offset_uncompressed_label);
+      __ testl(CpuRegister(TMP), Immediate(1));
+      __ j(kNotZero, &offset_uncompressed_label);
       __ leaq(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_1, value_offset));
       __ jmp(&modify_counter);
       // Move to the start of the string: string_obj + value_offset + 2 * start_index.
@@ -1748,8 +1751,8 @@
   if (mirror::kUseStringCompression) {
     NearLabel uncompressed_string_comparison;
     NearLabel comparison_done;
-    __ cmpl(CpuRegister(TMP), Immediate(0));
-    __ j(kGreater, &uncompressed_string_comparison);
+    __ testl(CpuRegister(TMP), Immediate(1));
+    __ j(kNotZero, &uncompressed_string_comparison);
     // Check if RAX (search_value) is ASCII.
     __ cmpl(search_value, Immediate(127));
     __ j(kGreater, &not_found_label);
@@ -1932,8 +1935,10 @@
     // Location of count in string.
     const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
 
-    __ cmpl(Address(obj, count_offset), Immediate(0));
-    __ j(kGreaterEqual, &copy_uncompressed);
+    __ testl(Address(obj, count_offset), Immediate(1));
+    static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
+                  "Expecting 0=compressed, 1=uncompressed");
+    __ j(kNotZero, &copy_uncompressed);
     // Compute the address of the source string by adding the number of chars from
     // the source beginning to the value offset of a string.
     __ leaq(CpuRegister(RSI),
@@ -2993,6 +2998,15 @@
 UNIMPLEMENTED_INTRINSIC(X86_64, FloatIsInfinite)
 UNIMPLEMENTED_INTRINSIC(X86_64, DoubleIsInfinite)
 
+UNIMPLEMENTED_INTRINSIC(X86_64, StringStringIndexOf);
+UNIMPLEMENTED_INTRINSIC(X86_64, StringStringIndexOfAfter);
+UNIMPLEMENTED_INTRINSIC(X86_64, StringBufferAppend);
+UNIMPLEMENTED_INTRINSIC(X86_64, StringBufferLength);
+UNIMPLEMENTED_INTRINSIC(X86_64, StringBufferToString);
+UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderAppend);
+UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderLength);
+UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderToString);
+
 // 1.8.
 UNIMPLEMENTED_INTRINSIC(X86_64, UnsafeGetAndAddInt)
 UNIMPLEMENTED_INTRINSIC(X86_64, UnsafeGetAndAddLong)
diff --git a/compiler/optimizing/licm.cc b/compiler/optimizing/licm.cc
index eb2d18d..f0086fb 100644
--- a/compiler/optimizing/licm.cc
+++ b/compiler/optimizing/licm.cc
@@ -120,17 +120,17 @@
       }
       DCHECK(!loop_info->IsIrreducible());
 
-      // We can move an instruction that can throw only if it is the first
-      // throwing instruction in the loop. Note that the first potentially
-      // throwing instruction encountered that is not hoisted stops this
-      // optimization. Non-throwing instruction can still be hoisted.
-      bool found_first_non_hoisted_throwing_instruction_in_loop = !inner->IsLoopHeader();
+      // We can move an instruction that can throw only as long as it is the first visible
+      // instruction (throw or write) in the loop. Note that the first potentially visible
+      // instruction that is not hoisted stops this optimization. Non-throwing instructions,
+      // on the other hand, can still be hoisted.
+      bool found_first_non_hoisted_visible_instruction_in_loop = !inner->IsLoopHeader();
       for (HInstructionIterator inst_it(inner->GetInstructions());
            !inst_it.Done();
            inst_it.Advance()) {
         HInstruction* instruction = inst_it.Current();
         if (instruction->CanBeMoved()
-            && (!instruction->CanThrow() || !found_first_non_hoisted_throwing_instruction_in_loop)
+            && (!instruction->CanThrow() || !found_first_non_hoisted_visible_instruction_in_loop)
             && !instruction->GetSideEffects().MayDependOn(loop_effects)
             && InputsAreDefinedBeforeLoop(instruction)) {
           // We need to update the environment if the instruction has a loop header
@@ -142,10 +142,10 @@
           }
           instruction->MoveBefore(pre_header->GetLastInstruction());
           MaybeRecordStat(MethodCompilationStat::kLoopInvariantMoved);
-        } else if (instruction->CanThrow()) {
-          // If `instruction` can throw, we cannot move further instructions
-          // that can throw as well.
-          found_first_non_hoisted_throwing_instruction_in_loop = true;
+        } else if (instruction->CanThrow() || instruction->DoesAnyWrite()) {
+          // If `instruction` can do something visible (throw or write),
+          // we cannot move further instructions that can throw.
+          found_first_non_hoisted_visible_instruction_in_loop = true;
         }
       }
     }
diff --git a/compiler/optimizing/licm_test.cc b/compiler/optimizing/licm_test.cc
index 2a62643..8c34dc6 100644
--- a/compiler/optimizing/licm_test.cc
+++ b/compiler/optimizing/licm_test.cc
@@ -63,7 +63,10 @@
     return_->AddSuccessor(exit_);
 
     // Provide boiler-plate instructions.
-    parameter_ = new (&allocator_) HParameterValue(graph_->GetDexFile(), 0, 0, Primitive::kPrimNot);
+    parameter_ = new (&allocator_) HParameterValue(graph_->GetDexFile(),
+                                                   dex::TypeIndex(0),
+                                                   0,
+                                                   Primitive::kPrimNot);
     entry_->AddInstruction(parameter_);
     int_constant_ = graph_->GetIntConstant(42);
     float_constant_ = graph_->GetFloatConstant(42.0f);
diff --git a/compiler/optimizing/load_store_elimination.cc b/compiler/optimizing/load_store_elimination.cc
index b91e9e6..2856c3e 100644
--- a/compiler/optimizing/load_store_elimination.cc
+++ b/compiler/optimizing/load_store_elimination.cc
@@ -15,6 +15,8 @@
  */
 
 #include "load_store_elimination.h"
+
+#include "escape.h"
 #include "side_effects_analysis.h"
 
 #include <iostream>
@@ -31,54 +33,17 @@
 // whether it's a singleton, returned, etc.
 class ReferenceInfo : public ArenaObject<kArenaAllocMisc> {
  public:
-  ReferenceInfo(HInstruction* reference, size_t pos) : reference_(reference), position_(pos) {
-    is_singleton_ = true;
-    is_singleton_and_not_returned_ = true;
-    if (!reference_->IsNewInstance() && !reference_->IsNewArray()) {
-      // For references not allocated in the method, don't assume anything.
-      is_singleton_ = false;
-      is_singleton_and_not_returned_ = false;
-      return;
-    }
-
-    // Visit all uses to determine if this reference can spread into the heap,
-    // a method call, etc.
-    for (const HUseListNode<HInstruction*>& use : reference_->GetUses()) {
-      HInstruction* user = use.GetUser();
-      DCHECK(!user->IsNullCheck()) << "NullCheck should have been eliminated";
-      if (user->IsBoundType()) {
-        // BoundType shouldn't normally be necessary for a NewInstance.
-        // Just be conservative for the uncommon cases.
-        is_singleton_ = false;
-        is_singleton_and_not_returned_ = false;
-        return;
-      }
-      if (user->IsPhi() || user->IsSelect() || user->IsInvoke() ||
-          (user->IsInstanceFieldSet() && (reference_ == user->InputAt(1))) ||
-          (user->IsUnresolvedInstanceFieldSet() && (reference_ == user->InputAt(1))) ||
-          (user->IsStaticFieldSet() && (reference_ == user->InputAt(1))) ||
-          (user->IsUnresolvedStaticFieldSet() && (reference_ == user->InputAt(0))) ||
-          (user->IsArraySet() && (reference_ == user->InputAt(2)))) {
-        // reference_ is merged to HPhi/HSelect, passed to a callee, or stored to heap.
-        // reference_ isn't the only name that can refer to its value anymore.
-        is_singleton_ = false;
-        is_singleton_and_not_returned_ = false;
-        return;
-      }
-      if ((user->IsUnresolvedInstanceFieldGet() && (reference_ == user->InputAt(0))) ||
-          (user->IsUnresolvedInstanceFieldSet() && (reference_ == user->InputAt(0)))) {
-        // The field is accessed in an unresolved way. We mark the object as a singleton to
-        // disable load/store optimizations on it.
-        // Note that we could optimize this case and still perform some optimizations until
-        // we hit the unresolved access, but disabling is the simplest.
-        is_singleton_ = false;
-        is_singleton_and_not_returned_ = false;
-        return;
-      }
-      if (user->IsReturn()) {
-        is_singleton_and_not_returned_ = false;
-      }
-    }
+  ReferenceInfo(HInstruction* reference, size_t pos)
+      : reference_(reference),
+        position_(pos),
+        is_singleton_(true),
+        is_singleton_and_not_returned_(true),
+        is_singleton_and_not_deopt_visible_(true) {
+    CalculateEscape(reference_,
+                    nullptr,
+                    &is_singleton_,
+                    &is_singleton_and_not_returned_,
+                    &is_singleton_and_not_deopt_visible_);
   }
 
   HInstruction* GetReference() const {
@@ -96,17 +61,20 @@
     return is_singleton_;
   }
 
-  // Returns true if reference_ is a singleton and not returned to the caller.
+  // Returns true if reference_ is a singleton and not returned to the caller or
+  // used as an environment local of an HDeoptimize instruction.
   // The allocation and stores into reference_ may be eliminated for such cases.
-  bool IsSingletonAndNotReturned() const {
-    return is_singleton_and_not_returned_;
+  bool IsSingletonAndRemovable() const {
+    return is_singleton_and_not_returned_ && is_singleton_and_not_deopt_visible_;
   }
 
  private:
   HInstruction* const reference_;
-  const size_t position_;     // position in HeapLocationCollector's ref_info_array_.
-  bool is_singleton_;         // can only be referred to by a single name in the method.
-  bool is_singleton_and_not_returned_;  // reference_ is singleton and not returned to caller.
+  const size_t position_;  // position in HeapLocationCollector's ref_info_array_.
+
+  bool is_singleton_;                        // can only be referred to by a single name in the method,
+  bool is_singleton_and_not_returned_;       // and not returned to caller,
+  bool is_singleton_and_not_deopt_visible_;  // and not used as an environment local of HDeoptimize.
 
   DISALLOW_COPY_AND_ASSIGN(ReferenceInfo);
 };
@@ -202,8 +170,7 @@
                          kArenaAllocLSE),
         has_heap_stores_(false),
         has_volatile_(false),
-        has_monitor_operations_(false),
-        may_deoptimize_(false) {}
+        has_monitor_operations_(false) {}
 
   size_t GetNumberOfHeapLocations() const {
     return heap_locations_.size();
@@ -236,13 +203,6 @@
     return has_monitor_operations_;
   }
 
-  // Returns whether this method may be deoptimized.
-  // Currently we don't have meta data support for deoptimizing
-  // a method that eliminates allocations/stores.
-  bool MayDeoptimize() const {
-    return may_deoptimize_;
-  }
-
   // Find and return the heap location index in heap_locations_.
   size_t FindHeapLocationIndex(ReferenceInfo* ref_info,
                                size_t offset,
@@ -493,10 +453,6 @@
     CreateReferenceInfoForReferenceType(instruction);
   }
 
-  void VisitDeoptimize(HDeoptimize* instruction ATTRIBUTE_UNUSED) OVERRIDE {
-    may_deoptimize_ = true;
-  }
-
   void VisitMonitorOperation(HMonitorOperation* monitor ATTRIBUTE_UNUSED) OVERRIDE {
     has_monitor_operations_ = true;
   }
@@ -508,7 +464,6 @@
                             // alias analysis and won't be as effective.
   bool has_volatile_;       // If there are volatile field accesses.
   bool has_monitor_operations_;    // If there are monitor operations.
-  bool may_deoptimize_;     // Only true for HDeoptimize with single-frame deoptimization.
 
   DISALLOW_COPY_AND_ASSIGN(HeapLocationCollector);
 };
@@ -663,27 +618,59 @@
     if (predecessors.size() == 0) {
       return;
     }
+
     ArenaVector<HInstruction*>& heap_values = heap_values_for_[block->GetBlockId()];
     for (size_t i = 0; i < heap_values.size(); i++) {
-      HInstruction* pred0_value = heap_values_for_[predecessors[0]->GetBlockId()][i];
-      heap_values[i] = pred0_value;
-      if (pred0_value != kUnknownHeapValue) {
-        for (size_t j = 1; j < predecessors.size(); j++) {
-          HInstruction* pred_value = heap_values_for_[predecessors[j]->GetBlockId()][i];
-          if (pred_value != pred0_value) {
-            heap_values[i] = kUnknownHeapValue;
-            break;
-          }
+      HInstruction* merged_value = nullptr;
+      // Whether merged_value is a result that's merged from all predecessors.
+      bool from_all_predecessors = true;
+      ReferenceInfo* ref_info = heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo();
+      HInstruction* singleton_ref = nullptr;
+      if (ref_info->IsSingletonAndRemovable()) {
+        // We do more analysis of liveness when merging heap values for such
+        // cases since stores into such references may potentially be eliminated.
+        singleton_ref = ref_info->GetReference();
+      }
+
+      for (HBasicBlock* predecessor : predecessors) {
+        HInstruction* pred_value = heap_values_for_[predecessor->GetBlockId()][i];
+        if ((singleton_ref != nullptr) &&
+            !singleton_ref->GetBlock()->Dominates(predecessor)) {
+          // singleton_ref is not live in this predecessor. Skip this predecessor since
+          // it does not really have the location.
+          DCHECK_EQ(pred_value, kUnknownHeapValue);
+          from_all_predecessors = false;
+          continue;
+        }
+        if (merged_value == nullptr) {
+          // First seen heap value.
+          merged_value = pred_value;
+        } else if (pred_value != merged_value) {
+          // There are conflicting values.
+          merged_value = kUnknownHeapValue;
+          break;
         }
       }
 
-      if (heap_values[i] == kUnknownHeapValue) {
+      if (merged_value == kUnknownHeapValue) {
+        // There are conflicting heap values from different predecessors.
         // Keep the last store in each predecessor since future loads cannot be eliminated.
-        for (size_t j = 0; j < predecessors.size(); j++) {
-          ArenaVector<HInstruction*>& pred_values = heap_values_for_[predecessors[j]->GetBlockId()];
+        for (HBasicBlock* predecessor : predecessors) {
+          ArenaVector<HInstruction*>& pred_values = heap_values_for_[predecessor->GetBlockId()];
           KeepIfIsStore(pred_values[i]);
         }
       }
+
+      if ((merged_value == nullptr) || !from_all_predecessors) {
+        DCHECK(singleton_ref != nullptr);
+        DCHECK((singleton_ref->GetBlock() == block) ||
+               !singleton_ref->GetBlock()->Dominates(block));
+        // singleton_ref is not defined before block or defined only in some of its
+        // predecessors, so block doesn't really have the location at its entry.
+        heap_values[i] = kUnknownHeapValue;
+      } else {
+        heap_values[i] = merged_value;
+      }
     }
   }
 
@@ -812,8 +799,7 @@
     } else if (index != nullptr) {
       // For array element, don't eliminate stores since it can be easily aliased
       // with non-constant index.
-    } else if (!heap_location_collector_.MayDeoptimize() &&
-               ref_info->IsSingletonAndNotReturned()) {
+    } else if (ref_info->IsSingletonAndRemovable()) {
       // Store into a field of a singleton that's not returned. The value cannot be
       // killed due to aliasing/invocation. It can be redundant since future loads can
       // directly get the value set by this instruction. The value can still be killed due to
@@ -987,8 +973,7 @@
       // new_instance isn't used for field accesses. No need to process it.
       return;
     }
-    if (!heap_location_collector_.MayDeoptimize() &&
-        ref_info->IsSingletonAndNotReturned() &&
+    if (ref_info->IsSingletonAndRemovable() &&
         !new_instance->IsFinalizable() &&
         !new_instance->NeedsAccessCheck()) {
       singleton_new_instances_.push_back(new_instance);
diff --git a/compiler/optimizing/locations.cc b/compiler/optimizing/locations.cc
index d157509..a9fe209 100644
--- a/compiler/optimizing/locations.cc
+++ b/compiler/optimizing/locations.cc
@@ -16,11 +16,16 @@
 
 #include "locations.h"
 
+#include <type_traits>
+
 #include "nodes.h"
 #include "code_generator.h"
 
 namespace art {
 
+// Verify that Location is trivially copyable.
+static_assert(std::is_trivially_copyable<Location>::value, "Location should be trivially copyable");
+
 LocationSummary::LocationSummary(HInstruction* instruction,
                                  CallKind call_kind,
                                  bool intrinsified)
diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h
index da27928..091b58a 100644
--- a/compiler/optimizing/locations.h
+++ b/compiler/optimizing/locations.h
@@ -91,12 +91,9 @@
     DCHECK(!IsValid());
   }
 
-  Location(const Location& other) : value_(other.value_) {}
+  Location(const Location& other) = default;
 
-  Location& operator=(const Location& other) {
-    value_ = other.value_;
-    return *this;
-  }
+  Location& operator=(const Location& other) = default;
 
   bool IsConstant() const {
     return (value_ & kLocationConstantMask) == kConstant;
@@ -328,7 +325,6 @@
         LOG(FATAL) << "Should not use this location kind";
     }
     UNREACHABLE();
-    return "?";
   }
 
   // Unallocated locations.
@@ -529,6 +525,12 @@
     temps_.push_back(location);
   }
 
+  void AddRegisterTemps(size_t count) {
+    for (size_t i = 0; i < count; ++i) {
+      AddTemp(Location::RequiresRegister());
+    }
+  }
+
   Location GetTemp(uint32_t at) const {
     return temps_[at];
   }
diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc
index 51be1d1..f4616e3 100644
--- a/compiler/optimizing/loop_optimization.cc
+++ b/compiler/optimizing/loop_optimization.cc
@@ -28,6 +28,30 @@
   instruction->GetBlock()->RemoveInstructionOrPhi(instruction, /*ensure_safety=*/ false);
 }
 
+// Detect a goto block and sets succ to the single successor.
+static bool IsGotoBlock(HBasicBlock* block, /*out*/ HBasicBlock** succ) {
+  if (block->GetPredecessors().size() == 1 &&
+      block->GetSuccessors().size() == 1 &&
+      block->IsSingleGoto()) {
+    *succ = block->GetSingleSuccessor();
+    return true;
+  }
+  return false;
+}
+
+// Detect an early exit loop.
+static bool IsEarlyExit(HLoopInformation* loop_info) {
+  HBlocksInLoopReversePostOrderIterator it_loop(*loop_info);
+  for (it_loop.Advance(); !it_loop.Done(); it_loop.Advance()) {
+    for (HBasicBlock* successor : it_loop.Current()->GetSuccessors()) {
+      if (!loop_info->Contains(*successor)) {
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
 //
 // Class methods.
 //
@@ -168,7 +192,9 @@
     int32_t use_count = 0;
     if (IsPhiInduction(phi) &&
         IsOnlyUsedAfterLoop(node->loop_info, phi, &use_count) &&
-        TryReplaceWithLastValue(phi, use_count, preheader)) {
+        // No uses, or no early-exit with proper replacement.
+        (use_count == 0 ||
+         (!IsEarlyExit(node->loop_info) && TryReplaceWithLastValue(phi, preheader)))) {
       for (HInstruction* i : *iset_) {
         RemoveFromCycle(i);
       }
@@ -178,31 +204,57 @@
 }
 
 void HLoopOptimization::SimplifyBlocks(LoopNode* node) {
-  for (HBlocksInLoopIterator it(*node->loop_info); !it.Done(); it.Advance()) {
-    HBasicBlock* block = it.Current();
-    // Remove instructions that are dead.
-    for (HBackwardInstructionIterator i(block->GetInstructions()); !i.Done(); i.Advance()) {
-      HInstruction* instruction = i.Current();
-      if (instruction->IsDeadAndRemovable()) {
-        block->RemoveInstruction(instruction);
+  // Repeat the block simplifications until no more changes occur. Note that since
+  // each simplification consists of eliminating code (without introducing new code),
+  // this process is always finite.
+  bool changed;
+  do {
+    changed = false;
+    // Iterate over all basic blocks in the loop-body.
+    for (HBlocksInLoopIterator it(*node->loop_info); !it.Done(); it.Advance()) {
+      HBasicBlock* block = it.Current();
+      // Remove dead instructions from the loop-body.
+      for (HBackwardInstructionIterator i(block->GetInstructions()); !i.Done(); i.Advance()) {
+        HInstruction* instruction = i.Current();
+        if (instruction->IsDeadAndRemovable()) {
+          changed = true;
+          block->RemoveInstruction(instruction);
+        }
       }
-    }
-    // Remove trivial control flow blocks from the loop-body.
-    if (block->GetPredecessors().size() == 1 &&
-        block->GetSuccessors().size() == 1 &&
-        block->GetFirstInstruction()->IsGoto()) {
-      HBasicBlock* pred = block->GetSinglePredecessor();
-      HBasicBlock* succ = block->GetSingleSuccessor();
-      if (succ->GetPredecessors().size() == 1) {
+      // Remove trivial control flow blocks from the loop-body.
+      HBasicBlock* succ = nullptr;
+      if (IsGotoBlock(block, &succ) && succ->GetPredecessors().size() == 1) {
+        // Trivial goto block can be removed.
+        HBasicBlock* pred = block->GetSinglePredecessor();
+        changed = true;
         pred->ReplaceSuccessor(block, succ);
-        block->ClearDominanceInformation();
-        block->SetDominator(pred);  // needed by next disconnect.
+        block->RemoveDominatedBlock(succ);
         block->DisconnectAndDelete();
         pred->AddDominatedBlock(succ);
         succ->SetDominator(pred);
+      } else if (block->GetSuccessors().size() == 2) {
+        // Trivial if block can be bypassed to either branch.
+        HBasicBlock* succ0 = block->GetSuccessors()[0];
+        HBasicBlock* succ1 = block->GetSuccessors()[1];
+        HBasicBlock* meet0 = nullptr;
+        HBasicBlock* meet1 = nullptr;
+        if (succ0 != succ1 &&
+            IsGotoBlock(succ0, &meet0) &&
+            IsGotoBlock(succ1, &meet1) &&
+            meet0 == meet1 &&  // meets again
+            meet0 != block &&  // no self-loop
+            meet0->GetPhis().IsEmpty()) {  // not used for merging
+          changed = true;
+          succ0->DisconnectAndDelete();
+          if (block->Dominates(meet0)) {
+            block->RemoveDominatedBlock(meet0);
+            succ1->AddDominatedBlock(meet0);
+            meet0->SetDominator(succ1);
+          }
+        }
       }
     }
-  }
+  } while (changed);
 }
 
 void HLoopOptimization::RemoveIfEmptyInnerLoop(LoopNode* node) {
@@ -240,12 +292,12 @@
   if (IsEmptyHeader(header) &&
       IsEmptyBody(body) &&
       IsOnlyUsedAfterLoop(node->loop_info, header->GetFirstPhi(), &use_count) &&
-      TryReplaceWithLastValue(header->GetFirstPhi(), use_count, preheader)) {
+      // No uses, or proper replacement.
+      (use_count == 0 || TryReplaceWithLastValue(header->GetFirstPhi(), preheader))) {
     body->DisconnectAndDelete();
     exit->RemovePredecessor(header);
     header->RemoveSuccessor(exit);
-    header->ClearDominanceInformation();
-    header->SetDominator(preheader);  // needed by next disconnect.
+    header->RemoveDominatedBlock(exit);
     header->DisconnectAndDelete();
     preheader->AddSuccessor(exit);
     preheader->AddInstruction(new (graph_->GetArena()) HGoto());  // global allocator
@@ -259,22 +311,23 @@
 bool HLoopOptimization::IsPhiInduction(HPhi* phi) {
   ArenaSet<HInstruction*>* set = induction_range_.LookupCycle(phi);
   if (set != nullptr) {
+    DCHECK(iset_->empty());
     for (HInstruction* i : *set) {
-      // Check that, other than phi, instruction are removable with uses contained in the cycle.
-      // TODO: investigate what cases are no longer in the graph.
-      if (i != phi) {
-        if (!i->IsInBlock() || !i->IsRemovable()) {
-          return false;
-        }
+      // Check that, other than instructions that are no longer in the graph (removed earlier)
+      // each instruction is removable and, other than the phi, uses are contained in the cycle.
+      if (!i->IsInBlock()) {
+        continue;
+      } else if (!i->IsRemovable()) {
+        return false;
+      } else if (i != phi) {
         for (const HUseListNode<HInstruction*>& use : i->GetUses()) {
           if (set->find(use.GetUser()) == set->end()) {
             return false;
           }
         }
       }
+      iset_->insert(i);  // copy
     }
-    DCHECK(iset_->empty());
-    iset_->insert(set->begin(), set->end());  // copy
     return true;
   }
   return false;
@@ -358,20 +411,16 @@
   }
 }
 
-bool HLoopOptimization::TryReplaceWithLastValue(HInstruction* instruction,
-                                                int32_t use_count,
-                                                HBasicBlock* block) {
-  // If true uses appear after the loop, replace these uses with the last value. Environment
-  // uses can consume this value too, since any first true use is outside the loop (although
-  // this may imply that de-opting may look "ahead" a bit on the phi value). If there are only
-  // environment uses, the value is dropped altogether, since the computations have no effect.
-  if (use_count > 0) {
-    if (!induction_range_.CanGenerateLastValue(instruction)) {
-      return false;
-    }
+bool HLoopOptimization::TryReplaceWithLastValue(HInstruction* instruction, HBasicBlock* block) {
+  // Try to replace outside uses with the last value. Environment uses can consume this
+  // value too, since any first true use is outside the loop (although this may imply
+  // that de-opting may look "ahead" a bit on the phi value). If there are only environment
+  // uses, the value is dropped altogether, since the computations have no effect.
+  if (induction_range_.CanGenerateLastValue(instruction)) {
     ReplaceAllUses(instruction, induction_range_.GenerateLastValue(instruction, graph_, block));
+    return true;
   }
-  return true;
+  return false;
 }
 
 }  // namespace art
diff --git a/compiler/optimizing/loop_optimization.h b/compiler/optimizing/loop_optimization.h
index e18d175..3391bef 100644
--- a/compiler/optimizing/loop_optimization.h
+++ b/compiler/optimizing/loop_optimization.h
@@ -72,9 +72,7 @@
                            HInstruction* instruction,
                            /*out*/ int32_t* use_count);
   void ReplaceAllUses(HInstruction* instruction, HInstruction* replacement);
-  bool TryReplaceWithLastValue(HInstruction* instruction,
-                               int32_t use_count,
-                               HBasicBlock* block);
+  bool TryReplaceWithLastValue(HInstruction* instruction, HBasicBlock* block);
 
   // Range information based on prior induction variable analysis.
   InductionVarRange induction_range_;
diff --git a/compiler/optimizing/loop_optimization_test.cc b/compiler/optimizing/loop_optimization_test.cc
index 7805a69..9a6b493 100644
--- a/compiler/optimizing/loop_optimization_test.cc
+++ b/compiler/optimizing/loop_optimization_test.cc
@@ -48,7 +48,10 @@
     graph_->AddBlock(exit_block_);
     graph_->SetEntryBlock(entry_block_);
     graph_->SetExitBlock(exit_block_);
-    parameter_ = new (&allocator_) HParameterValue(graph_->GetDexFile(), 0, 0, Primitive::kPrimInt);
+    parameter_ = new (&allocator_) HParameterValue(graph_->GetDexFile(),
+                                                   dex::TypeIndex(0),
+                                                   0,
+                                                   Primitive::kPrimInt);
     entry_block_->AddInstruction(parameter_);
     return_block_->AddInstruction(new (&allocator_) HReturnVoid());
     exit_block_->AddInstruction(new (&allocator_) HExit());
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index 45c7eb1..594255c 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -735,6 +735,20 @@
   return true;
 }
 
+
+bool HLoopInformation::HasExitEdge() const {
+  // Determine if this loop has at least one exit edge.
+  HBlocksInLoopReversePostOrderIterator it_loop(*this);
+  for (; !it_loop.Done(); it_loop.Advance()) {
+    for (HBasicBlock* successor : it_loop.Current()->GetSuccessors()) {
+      if (!Contains(*successor)) {
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
 bool HBasicBlock::Dominates(HBasicBlock* other) const {
   // Walk up the dominator tree from `other`, to find out if `this`
   // is an ancestor.
@@ -1432,10 +1446,10 @@
   AddInstruction(new (GetGraph()->GetArena()) HGoto(new_block->GetDexPc()));
 
   for (HBasicBlock* successor : GetSuccessors()) {
-    new_block->successors_.push_back(successor);
     successor->predecessors_[successor->GetPredecessorIndexOf(this)] = new_block;
   }
-  successors_.clear();
+  new_block->successors_.swap(successors_);
+  DCHECK(successors_.empty());
   AddSuccessor(new_block);
 
   GetGraph()->AddBlock(new_block);
@@ -1449,10 +1463,10 @@
   HBasicBlock* new_block = new (GetGraph()->GetArena()) HBasicBlock(GetGraph(), GetDexPc());
 
   for (HBasicBlock* predecessor : GetPredecessors()) {
-    new_block->predecessors_.push_back(predecessor);
     predecessor->successors_[predecessor->GetSuccessorIndexOf(this)] = new_block;
   }
-  predecessors_.clear();
+  new_block->predecessors_.swap(predecessors_);
+  DCHECK(predecessors_.empty());
   AddPredecessor(new_block);
 
   GetGraph()->AddBlock(new_block);
@@ -1477,16 +1491,16 @@
   new_block->instructions_.SetBlockOfInstructions(new_block);
 
   for (HBasicBlock* successor : GetSuccessors()) {
-    new_block->successors_.push_back(successor);
     successor->predecessors_[successor->GetPredecessorIndexOf(this)] = new_block;
   }
-  successors_.clear();
+  new_block->successors_.swap(successors_);
+  DCHECK(successors_.empty());
 
   for (HBasicBlock* dominated : GetDominatedBlocks()) {
     dominated->dominator_ = new_block;
-    new_block->dominated_blocks_.push_back(dominated);
   }
-  dominated_blocks_.clear();
+  new_block->dominated_blocks_.swap(dominated_blocks_);
+  DCHECK(dominated_blocks_.empty());
   return new_block;
 }
 
@@ -1504,16 +1518,16 @@
 
   new_block->instructions_.SetBlockOfInstructions(new_block);
   for (HBasicBlock* successor : GetSuccessors()) {
-    new_block->successors_.push_back(successor);
     successor->predecessors_[successor->GetPredecessorIndexOf(this)] = new_block;
   }
-  successors_.clear();
+  new_block->successors_.swap(successors_);
+  DCHECK(successors_.empty());
 
   for (HBasicBlock* dominated : GetDominatedBlocks()) {
     dominated->dominator_ = new_block;
-    new_block->dominated_blocks_.push_back(dominated);
   }
-  dominated_blocks_.clear();
+  new_block->dominated_blocks_.swap(dominated_blocks_);
+  DCHECK(dominated_blocks_.empty());
   return new_block;
 }
 
@@ -1852,17 +1866,19 @@
 
   // Update links to the successors of `other`.
   successors_.clear();
-  while (!other->successors_.empty()) {
-    HBasicBlock* successor = other->GetSuccessors()[0];
-    successor->ReplacePredecessor(other, this);
+  for (HBasicBlock* successor : other->GetSuccessors()) {
+    successor->predecessors_[successor->GetPredecessorIndexOf(other)] = this;
   }
+  successors_.swap(other->successors_);
+  DCHECK(other->successors_.empty());
 
   // Update the dominator tree.
   RemoveDominatedBlock(other);
   for (HBasicBlock* dominated : other->GetDominatedBlocks()) {
-    dominated_blocks_.push_back(dominated);
     dominated->SetDominator(this);
   }
+  dominated_blocks_.insert(
+      dominated_blocks_.end(), other->dominated_blocks_.begin(), other->dominated_blocks_.end());
   other->dominated_blocks_.clear();
   other->dominator_ = nullptr;
 
@@ -1889,16 +1905,18 @@
 
   // Update links to the successors of `other`.
   successors_.clear();
-  while (!other->successors_.empty()) {
-    HBasicBlock* successor = other->GetSuccessors()[0];
-    successor->ReplacePredecessor(other, this);
+  for (HBasicBlock* successor : other->GetSuccessors()) {
+    successor->predecessors_[successor->GetPredecessorIndexOf(other)] = this;
   }
+  successors_.swap(other->successors_);
+  DCHECK(other->successors_.empty());
 
   // Update the dominator tree.
   for (HBasicBlock* dominated : other->GetDominatedBlocks()) {
-    dominated_blocks_.push_back(dominated);
     dominated->SetDominator(this);
   }
+  dominated_blocks_.insert(
+      dominated_blocks_.end(), other->dominated_blocks_.begin(), other->dominated_blocks_.end());
   other->dominated_blocks_.clear();
   other->dominator_ = nullptr;
   other->graph_ = nullptr;
@@ -2521,12 +2539,12 @@
       return os << "BootImageLinkTimePcRelative";
     case HLoadString::LoadKind::kBootImageAddress:
       return os << "BootImageAddress";
-    case HLoadString::LoadKind::kDexCacheAddress:
-      return os << "DexCacheAddress";
     case HLoadString::LoadKind::kBssEntry:
       return os << "BssEntry";
     case HLoadString::LoadKind::kDexCacheViaMethod:
       return os << "DexCacheViaMethod";
+    case HLoadString::LoadKind::kJitTableAddress:
+      return os << "JitTableAddress";
     default:
       LOG(FATAL) << "Unknown HLoadString::LoadKind: " << static_cast<int>(rhs);
       UNREACHABLE();
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 6a45149..e3f4d8f 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -29,6 +29,7 @@
 #include "base/stl_util.h"
 #include "base/transform_array_ref.h"
 #include "dex_file.h"
+#include "dex_file_types.h"
 #include "entrypoints/quick/quick_entrypoints_enum.h"
 #include "handle.h"
 #include "handle_scope.h"
@@ -332,7 +333,8 @@
         cached_double_constants_(std::less<int64_t>(), arena->Adapter(kArenaAllocConstantsMap)),
         cached_current_method_(nullptr),
         inexact_object_rti_(ReferenceTypeInfo::CreateInvalid()),
-        osr_(osr) {
+        osr_(osr),
+        cha_single_implementation_list_(arena->Adapter(kArenaAllocCHA)) {
     blocks_.reserve(kDefaultNumberOfBlocks);
   }
 
@@ -535,6 +537,20 @@
 
   bool IsCompilingOsr() const { return osr_; }
 
+  ArenaSet<ArtMethod*>& GetCHASingleImplementationList() {
+    return cha_single_implementation_list_;
+  }
+
+  void AddCHASingleImplementationDependency(ArtMethod* method) {
+    cha_single_implementation_list_.insert(method);
+  }
+
+  bool HasShouldDeoptimizeFlag() const {
+    // TODO: if all CHA guards can be eliminated, there is no need for the flag
+    // even if cha_single_implementation_list_ is not empty.
+    return !cha_single_implementation_list_.empty();
+  }
+
   bool HasTryCatch() const { return has_try_catch_; }
   void SetHasTryCatch(bool value) { has_try_catch_ = value; }
 
@@ -671,6 +687,9 @@
   // compiled code entries which the interpreter can directly jump to.
   const bool osr_;
 
+  // List of methods that are assumed to have single implementation.
+  ArenaSet<ArtMethod*> cha_single_implementation_list_;
+
   friend class SsaBuilder;           // For caching constants.
   friend class SsaLivenessAnalysis;  // For the linear order.
   friend class HInliner;             // For the reverse post order.
@@ -769,6 +788,8 @@
 
   bool DominatesAllBackEdges(HBasicBlock* block);
 
+  bool HasExitEdge() const;
+
  private:
   // Internal recursive implementation of `Populate`.
   void PopulateRecursive(HBasicBlock* block);
@@ -798,7 +819,7 @@
   }
 
   // Catch block information constructor.
-  TryCatchInformation(uint16_t catch_type_index, const DexFile& dex_file)
+  TryCatchInformation(dex::TypeIndex catch_type_index, const DexFile& dex_file)
       : try_entry_(nullptr),
         catch_dex_file_(&dex_file),
         catch_type_index_(catch_type_index) {}
@@ -814,10 +835,10 @@
 
   bool IsCatchAllTypeIndex() const {
     DCHECK(IsCatchBlock());
-    return catch_type_index_ == DexFile::kDexNoIndex16;
+    return !catch_type_index_.IsValid();
   }
 
-  uint16_t GetCatchTypeIndex() const {
+  dex::TypeIndex GetCatchTypeIndex() const {
     DCHECK(IsCatchBlock());
     return catch_type_index_;
   }
@@ -834,7 +855,7 @@
 
   // Exception type information. Only set for catch blocks.
   const DexFile* catch_dex_file_;
-  const uint16_t catch_type_index_;
+  const dex::TypeIndex catch_type_index_;
 };
 
 static constexpr size_t kNoLifetime = -1;
@@ -1237,6 +1258,7 @@
   M(ClinitCheck, Instruction)                                           \
   M(Compare, BinaryOperation)                                           \
   M(CurrentMethod, Instruction)                                         \
+  M(ShouldDeoptimizeFlag, Instruction)                                  \
   M(Deoptimize, Instruction)                                            \
   M(Div, BinaryOperation)                                               \
   M(DivZeroCheck, Instruction)                                          \
@@ -1855,6 +1877,15 @@
   size_t InputCount() const { return GetInputRecords().size(); }
   HInstruction* InputAt(size_t i) const { return InputRecordAt(i).GetInstruction(); }
 
+  bool HasInput(HInstruction* input) const {
+    for (const HInstruction* i : GetInputs()) {
+      if (i == input) {
+        return true;
+      }
+    }
+    return false;
+  }
+
   void SetRawInputAt(size_t index, HInstruction* input) {
     SetRawInputRecordAt(index, HUserRecord<HInstruction*>(input));
   }
@@ -1947,7 +1978,7 @@
 
   bool IsRemovable() const {
     return
-        !HasSideEffects() &&
+        !DoesAnyWrite() &&
         !CanThrow() &&
         !IsSuspendCheck() &&
         !IsControlFlow() &&
@@ -2060,6 +2091,8 @@
 #undef INSTRUCTION_TYPE_CHECK
 
   // Returns whether the instruction can be moved within the graph.
+  // TODO: this method is used by LICM and GVN with possibly different
+  //       meanings? split and rename?
   virtual bool CanBeMoved() const { return false; }
 
   // Returns whether the two instructions are of the same kind.
@@ -2861,6 +2894,27 @@
   DISALLOW_COPY_AND_ASSIGN(HDeoptimize);
 };
 
+// Represents a should_deoptimize flag. Currently used for CHA-based devirtualization.
+// The compiled code checks this flag value in a guard before devirtualized call and
+// if it's true, starts to do deoptimization.
+// It has a 4-byte slot on stack.
+// TODO: allocate a register for this flag.
+class HShouldDeoptimizeFlag FINAL : public HExpression<0> {
+ public:
+  // TODO: use SideEffects to aid eliminating some CHA guards.
+  explicit HShouldDeoptimizeFlag(uint32_t dex_pc)
+      : HExpression(Primitive::kPrimInt, SideEffects::None(), dex_pc) {
+  }
+
+  // We don't eliminate CHA guards yet.
+  bool CanBeMoved() const OVERRIDE { return false; }
+
+  DECLARE_INSTRUCTION(ShouldDeoptimizeFlag);
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(HShouldDeoptimizeFlag);
+};
+
 // Represents the ArtMethod that was passed as a first argument to
 // the method. It is used by instructions that depend on it, like
 // instructions that work with the dex cache.
@@ -3660,7 +3714,7 @@
   HNewInstance(HInstruction* cls,
                HCurrentMethod* current_method,
                uint32_t dex_pc,
-               uint16_t type_index,
+               dex::TypeIndex type_index,
                const DexFile& dex_file,
                bool needs_access_check,
                bool finalizable,
@@ -3675,7 +3729,7 @@
     SetRawInputAt(1, current_method);
   }
 
-  uint16_t GetTypeIndex() const { return type_index_; }
+  dex::TypeIndex GetTypeIndex() const { return type_index_; }
   const DexFile& GetDexFile() const { return dex_file_; }
 
   // Calls runtime so needs an environment.
@@ -3708,7 +3762,7 @@
   static_assert(kNumberOfNewInstancePackedBits <= kMaxNumberOfPackedBits,
                 "Too many packed fields.");
 
-  const uint16_t type_index_;
+  const dex::TypeIndex type_index_;
   const DexFile& dex_file_;
   QuickEntrypointEnum entrypoint_;
 
@@ -3773,9 +3827,11 @@
     return GetEnvironment()->IsFromInlinedInvoke();
   }
 
+  void SetCanThrow(bool can_throw) { SetPackedFlag<kFlagCanThrow>(can_throw); }
+
   bool CanThrow() const OVERRIDE { return GetPackedFlag<kFlagCanThrow>(); }
 
-  bool CanBeMoved() const OVERRIDE { return IsIntrinsic(); }
+  bool CanBeMoved() const OVERRIDE { return IsIntrinsic() && !DoesAnyWrite(); }
 
   bool InstructionDataEquals(const HInstruction* other) const OVERRIDE {
     return intrinsic_ != Intrinsics::kNone && intrinsic_ == other->AsInvoke()->intrinsic_;
@@ -3831,8 +3887,6 @@
     SetPackedFlag<kFlagCanThrow>(true);
   }
 
-  void SetCanThrow(bool can_throw) { SetPackedFlag<kFlagCanThrow>(can_throw); }
-
   uint32_t number_of_arguments_;
   ArtMethod* const resolved_method_;
   ArenaVector<HUserRecord<HInstruction*>> inputs_;
@@ -4169,6 +4223,19 @@
                 kVirtual),
         vtable_index_(vtable_index) {}
 
+  bool CanBeNull() const OVERRIDE {
+    switch (GetIntrinsic()) {
+      case Intrinsics::kThreadCurrentThread:
+      case Intrinsics::kStringBufferAppend:
+      case Intrinsics::kStringBufferToString:
+      case Intrinsics::kStringBuilderAppend:
+      case Intrinsics::kStringBuilderToString:
+        return false;
+      default:
+        return HInvoke::CanBeNull();
+    }
+  }
+
   bool CanDoImplicitNullCheckOn(HInstruction* obj) const OVERRIDE {
     // TODO: Add implicit null checks in intrinsics.
     return (obj == InputAt(0)) && !GetLocations()->Intrinsified();
@@ -4254,7 +4321,7 @@
   HNewArray(HInstruction* length,
             HCurrentMethod* current_method,
             uint32_t dex_pc,
-            uint16_t type_index,
+            dex::TypeIndex type_index,
             const DexFile& dex_file,
             QuickEntrypointEnum entrypoint)
       : HExpression(Primitive::kPrimNot, SideEffects::CanTriggerGC(), dex_pc),
@@ -4265,7 +4332,7 @@
     SetRawInputAt(1, current_method);
   }
 
-  uint16_t GetTypeIndex() const { return type_index_; }
+  dex::TypeIndex GetTypeIndex() const { return type_index_; }
   const DexFile& GetDexFile() const { return dex_file_; }
 
   // Calls runtime so needs an environment.
@@ -4281,7 +4348,7 @@
   DECLARE_INSTRUCTION(NewArray);
 
  private:
-  const uint16_t type_index_;
+  const dex::TypeIndex type_index_;
   const DexFile& dex_file_;
   const QuickEntrypointEnum entrypoint_;
 
@@ -4818,7 +4885,7 @@
 class HParameterValue FINAL : public HExpression<0> {
  public:
   HParameterValue(const DexFile& dex_file,
-                  uint16_t type_index,
+                  dex::TypeIndex type_index,
                   uint8_t index,
                   Primitive::Type parameter_type,
                   bool is_this = false)
@@ -4831,7 +4898,7 @@
   }
 
   const DexFile& GetDexFile() const { return dex_file_; }
-  uint16_t GetTypeIndex() const { return type_index_; }
+  dex::TypeIndex GetTypeIndex() const { return type_index_; }
   uint8_t GetIndex() const { return index_; }
   bool IsThis() const { return GetPackedFlag<kFlagIsThis>(); }
 
@@ -4849,7 +4916,7 @@
                 "Too many packed fields.");
 
   const DexFile& dex_file_;
-  const uint16_t type_index_;
+  const dex::TypeIndex type_index_;
   // The index of this parameter in the parameters list. Must be less
   // than HGraph::number_of_in_vregs_.
   const uint8_t index_;
@@ -5444,7 +5511,7 @@
   };
 
   HLoadClass(HCurrentMethod* current_method,
-             uint16_t type_index,
+             dex::TypeIndex type_index,
              const DexFile& dex_file,
              bool is_referrers_class,
              uint32_t dex_pc,
@@ -5476,7 +5543,7 @@
 
   void SetLoadKindWithTypeReference(LoadKind load_kind,
                                     const DexFile& dex_file,
-                                    uint32_t type_index) {
+                                    dex::TypeIndex type_index) {
     DCHECK(HasTypeReference(load_kind));
     DCHECK(IsSameDexFile(dex_file_, dex_file));
     DCHECK_EQ(type_index_, type_index);
@@ -5500,7 +5567,7 @@
 
   bool InstructionDataEquals(const HInstruction* other) const;
 
-  size_t ComputeHashCode() const OVERRIDE { return type_index_; }
+  size_t ComputeHashCode() const OVERRIDE { return type_index_.index_; }
 
   bool CanBeNull() const OVERRIDE { return false; }
 
@@ -5536,7 +5603,7 @@
     loaded_class_rti_ = rti;
   }
 
-  uint32_t GetTypeIndex() const { return type_index_; }
+  dex::TypeIndex GetTypeIndex() const { return type_index_; }
   const DexFile& GetDexFile() const { return dex_file_; }
 
   uint32_t GetDexCacheElementOffset() const;
@@ -5619,7 +5686,7 @@
   // for PC-relative loads, i.e. kDexCachePcRelative or kBootImageLinkTimePcRelative.
   HUserRecord<HInstruction*> special_input_;
 
-  const uint16_t type_index_;
+  const dex::TypeIndex type_index_;
   const DexFile& dex_file_;
 
   union {
@@ -5670,10 +5737,6 @@
     // GetIncludePatchInformation().
     kBootImageAddress,
 
-    // Load from the resolved strings array at an absolute address.
-    // Used for strings outside the boot image referenced by JIT-compiled code.
-    kDexCacheAddress,
-
     // Load from an entry in the .bss section using a PC-relative load.
     // Used for strings outside boot image when .bss is accessible with a PC-relative load.
     kBssEntry,
@@ -5683,11 +5746,14 @@
     // all other types are unavailable.
     kDexCacheViaMethod,
 
-    kLast = kDexCacheViaMethod
+    // Load from the root table associated with the JIT compiled method.
+    kJitTableAddress,
+
+    kLast = kJitTableAddress,
   };
 
   HLoadString(HCurrentMethod* current_method,
-              uint32_t string_index,
+              dex::StringIndex string_index,
               const DexFile& dex_file,
               uint32_t dex_pc)
       : HInstruction(SideEffectsForArchRuntimeCalls(), dex_pc),
@@ -5706,7 +5772,7 @@
 
   void SetLoadKindWithStringReference(LoadKind load_kind,
                                       const DexFile& dex_file,
-                                      uint32_t string_index) {
+                                      dex::StringIndex string_index) {
     DCHECK(HasStringReference(load_kind));
     load_data_.dex_file_ = &dex_file;
     string_index_ = string_index;
@@ -5719,7 +5785,7 @@
 
   const DexFile& GetDexFile() const;
 
-  uint32_t GetStringIndex() const {
+  dex::StringIndex GetStringIndex() const {
     DCHECK(HasStringReference(GetLoadKind()) || /* For slow paths. */ !IsInDexCache());
     return string_index_;
   }
@@ -5733,7 +5799,7 @@
 
   bool InstructionDataEquals(const HInstruction* other) const OVERRIDE;
 
-  size_t ComputeHashCode() const OVERRIDE { return string_index_; }
+  size_t ComputeHashCode() const OVERRIDE { return string_index_.index_; }
 
   // Will call the runtime if we need to load the string through
   // the dex cache and the string is not guaranteed to be there yet.
@@ -5741,7 +5807,8 @@
     LoadKind load_kind = GetLoadKind();
     if (load_kind == LoadKind::kBootImageLinkTimeAddress ||
         load_kind == LoadKind::kBootImageLinkTimePcRelative ||
-        load_kind == LoadKind::kBootImageAddress) {
+        load_kind == LoadKind::kBootImageAddress ||
+        load_kind == LoadKind::kJitTableAddress) {
       return false;
     }
     return !IsInDexCache();
@@ -5794,11 +5861,12 @@
     return load_kind == LoadKind::kBootImageLinkTimeAddress ||
         load_kind == LoadKind::kBootImageLinkTimePcRelative ||
         load_kind == LoadKind::kBssEntry ||
-        load_kind == LoadKind::kDexCacheViaMethod;
+        load_kind == LoadKind::kDexCacheViaMethod ||
+        load_kind == LoadKind::kJitTableAddress;
   }
 
   static bool HasAddress(LoadKind load_kind) {
-    return load_kind == LoadKind::kBootImageAddress || load_kind == LoadKind::kDexCacheAddress;
+    return load_kind == LoadKind::kBootImageAddress;
   }
 
   void SetLoadKindInternal(LoadKind load_kind);
@@ -5810,7 +5878,7 @@
 
   // String index serves also as the hash code and it's also needed for slow-paths,
   // so it must not be overwritten with other load data.
-  uint32_t string_index_;
+  dex::StringIndex string_index_;
 
   union {
     const DexFile* dex_file_;            // For string reference.
diff --git a/compiler/optimizing/nodes_test.cc b/compiler/optimizing/nodes_test.cc
index d4e2a58..5d9a652 100644
--- a/compiler/optimizing/nodes_test.cc
+++ b/compiler/optimizing/nodes_test.cc
@@ -35,7 +35,7 @@
   graph->AddBlock(entry);
   graph->SetEntryBlock(entry);
   HInstruction* parameter = new (&allocator) HParameterValue(
-      graph->GetDexFile(), 0, 0, Primitive::kPrimNot);
+      graph->GetDexFile(), dex::TypeIndex(0), 0, Primitive::kPrimNot);
   entry->AddInstruction(parameter);
   entry->AddInstruction(new (&allocator) HGoto());
 
@@ -78,9 +78,9 @@
   graph->AddBlock(entry);
   graph->SetEntryBlock(entry);
   HInstruction* parameter1 = new (&allocator) HParameterValue(
-      graph->GetDexFile(), 0, 0, Primitive::kPrimNot);
+      graph->GetDexFile(), dex::TypeIndex(0), 0, Primitive::kPrimNot);
   HInstruction* parameter2 = new (&allocator) HParameterValue(
-      graph->GetDexFile(), 0, 0, Primitive::kPrimNot);
+      graph->GetDexFile(), dex::TypeIndex(0), 0, Primitive::kPrimNot);
   entry->AddInstruction(parameter1);
   entry->AddInstruction(parameter2);
   entry->AddInstruction(new (&allocator) HExit());
@@ -106,7 +106,7 @@
   graph->AddBlock(entry);
   graph->SetEntryBlock(entry);
   HInstruction* parameter = new (&allocator) HParameterValue(
-      graph->GetDexFile(), 0, 0, Primitive::kPrimNot);
+      graph->GetDexFile(), dex::TypeIndex(0), 0, Primitive::kPrimNot);
   entry->AddInstruction(parameter);
 
   ASSERT_FALSE(parameter->HasUses());
@@ -127,7 +127,7 @@
   graph->AddBlock(entry);
   graph->SetEntryBlock(entry);
   HInstruction* parameter1 = new (&allocator) HParameterValue(
-      graph->GetDexFile(), 0, 0, Primitive::kPrimNot);
+      graph->GetDexFile(), dex::TypeIndex(0), 0, Primitive::kPrimNot);
   HInstruction* with_environment = new (&allocator) HNullCheck(parameter1, 0);
   entry->AddInstruction(parameter1);
   entry->AddInstruction(with_environment);
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index 8c76927..e15e33d 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -61,6 +61,7 @@
 #include "debug/method_debug_info.h"
 #include "dex/verification_results.h"
 #include "dex/verified_method.h"
+#include "dex_file_types.h"
 #include "driver/compiler_driver-inl.h"
 #include "driver/compiler_options.h"
 #include "driver/dex_compilation_unit.h"
@@ -117,6 +118,7 @@
 
   size_t GetSize() const { return size_; }
   const ArenaVector<uint8_t>& GetMemory() const { return memory_; }
+  uint8_t* GetData() { return memory_.data(); }
 
  private:
   ArenaVector<uint8_t> memory_;
@@ -167,15 +169,13 @@
       LOG(INFO) << "TIMINGS " << GetMethodName();
       LOG(INFO) << Dumpable<TimingLogger>(timing_logger_);
     }
-    if (visualizer_enabled_) {
-      MutexLock mu(Thread::Current(), visualizer_dump_mutex_);
-      *visualizer_output_ << visualizer_oss_.str();
-    }
+    DCHECK(visualizer_oss_.str().empty());
   }
 
-  void DumpDisassembly() const {
+  void DumpDisassembly() REQUIRES(!visualizer_dump_mutex_) {
     if (visualizer_enabled_) {
       visualizer_.DumpGraphWithDisassembly();
+      FlushVisualizer();
     }
   }
 
@@ -190,24 +190,34 @@
   }
 
  private:
-  void StartPass(const char* pass_name) {
+  void StartPass(const char* pass_name) REQUIRES(!visualizer_dump_mutex_) {
     VLOG(compiler) << "Starting pass: " << pass_name;
     // Dump graph first, then start timer.
     if (visualizer_enabled_) {
       visualizer_.DumpGraph(pass_name, /* is_after_pass */ false, graph_in_bad_state_);
+      FlushVisualizer();
     }
     if (timing_logger_enabled_) {
       timing_logger_.StartTiming(pass_name);
     }
   }
 
-  void EndPass(const char* pass_name) {
+  void FlushVisualizer() REQUIRES(!visualizer_dump_mutex_) {
+    MutexLock mu(Thread::Current(), visualizer_dump_mutex_);
+    *visualizer_output_ << visualizer_oss_.str();
+    visualizer_output_->flush();
+    visualizer_oss_.str("");
+    visualizer_oss_.clear();
+  }
+
+  void EndPass(const char* pass_name) REQUIRES(!visualizer_dump_mutex_) {
     // Pause timer first, then dump graph.
     if (timing_logger_enabled_) {
       timing_logger_.EndTiming();
     }
     if (visualizer_enabled_) {
       visualizer_.DumpGraph(pass_name, /* is_after_pass */ true, graph_in_bad_state_);
+      FlushVisualizer();
     }
 
     // Validate the HGraph if running in debug mode.
@@ -616,17 +626,14 @@
   UNUSED(codegen);  // To avoid compilation error when compiling for svelte
   OptimizingCompilerStats* stats = compilation_stats_.get();
   ArenaAllocator* arena = graph->GetArena();
-#ifdef ART_USE_VIXL_ARM_BACKEND
-  UNUSED(arena);
-  UNUSED(pass_observer);
-  UNUSED(stats);
-#endif
   switch (instruction_set) {
-#if defined(ART_ENABLE_CODEGEN_arm) && !defined(ART_USE_VIXL_ARM_BACKEND)
+#if defined(ART_ENABLE_CODEGEN_arm)
     case kThumb2:
     case kArm: {
+#ifndef ART_USE_VIXL_ARM_BACKEND
       arm::DexCacheArrayFixups* fixups =
           new (arena) arm::DexCacheArrayFixups(graph, codegen, stats);
+#endif
       arm::InstructionSimplifierArm* simplifier =
           new (arena) arm::InstructionSimplifierArm(graph, stats);
       SideEffectsAnalysis* side_effects = new (arena) SideEffectsAnalysis(graph);
@@ -635,7 +642,9 @@
         simplifier,
         side_effects,
         gvn,
+#ifndef ART_USE_VIXL_ARM_BACKEND
         fixups
+#endif
       };
       RunOptimizations(arm_optimizations, arraysize(arm_optimizations), pass_observer);
       break;
@@ -746,8 +755,10 @@
   HDeadCodeElimination* dce1 = new (arena) HDeadCodeElimination(
       graph, stats, "dead_code_elimination$initial");
   HDeadCodeElimination* dce2 = new (arena) HDeadCodeElimination(
+      graph, stats, "dead_code_elimination$after_inlining");
+  HDeadCodeElimination* dce3 = new (arena) HDeadCodeElimination(
       graph, stats, "dead_code_elimination$final");
-  HConstantFolding* fold1 = new (arena) HConstantFolding(graph);
+  HConstantFolding* fold1 = new (arena) HConstantFolding(graph, "constant_folding");
   InstructionSimplifier* simplify1 = new (arena) InstructionSimplifier(graph, stats);
   HSelectGenerator* select_generator = new (arena) HSelectGenerator(graph, stats);
   HConstantFolding* fold2 = new (arena) HConstantFolding(
@@ -786,6 +797,7 @@
     select_generator,
     fold2,  // TODO: if we don't inline we can also skip fold2.
     simplify2,
+    dce2,
     side_effects,
     gvn,
     licm,
@@ -795,7 +807,7 @@
     fold3,  // evaluates code generated by dynamic bce
     simplify3,
     lse,
-    dce2,
+    dce3,
     // The codegen has a few assumptions that only the instruction simplifier
     // can satisfy. For example, the code generator does not expect to see a
     // HTypeConversion from a type to the same type.
@@ -937,7 +949,7 @@
     graph->SetArtMethod(method);
     ScopedObjectAccess soa(Thread::Current());
     interpreter_metadata = method->GetQuickenedInfo(class_linker->GetImagePointerSize());
-    uint16_t type_index = method->GetDeclaringClass()->GetDexTypeIndex();
+    dex::TypeIndex type_index = method->GetDeclaringClass()->GetDexTypeIndex();
 
     // Update the dex cache if the type is not in it yet. Note that under AOT,
     // the verifier must have set it, but under JIT, there's no guarantee, as we
@@ -1114,7 +1126,7 @@
                                     jit::JitCodeCache* code_cache,
                                     ArtMethod* method,
                                     bool osr) {
-  StackHandleScope<2> hs(self);
+  StackHandleScope<3> hs(self);
   Handle<mirror::ClassLoader> class_loader(hs.NewHandle(
       method->GetDeclaringClass()->GetClassLoader()));
   Handle<mirror::DexCache> dex_cache(hs.NewHandle(method->GetDexCache()));
@@ -1160,25 +1172,48 @@
   }
 
   size_t stack_map_size = codegen->ComputeStackMapsSize();
-  uint8_t* stack_map_data = code_cache->ReserveData(self, stack_map_size, method);
-  if (stack_map_data == nullptr) {
+  size_t number_of_roots = codegen->GetNumberOfJitRoots();
+  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+  // We allocate an object array to ensure the JIT roots that we will collect in EmitJitRoots
+  // will be visible by the GC between EmitLiterals and CommitCode. Once CommitCode is
+  // executed, this array is not needed.
+  Handle<mirror::ObjectArray<mirror::Object>> roots(
+      hs.NewHandle(mirror::ObjectArray<mirror::Object>::Alloc(
+          self, class_linker->GetClassRoot(ClassLinker::kObjectArrayClass), number_of_roots)));
+  if (roots.Get() == nullptr) {
+    // Out of memory, just clear the exception to avoid any Java exception uncaught problems.
+    DCHECK(self->IsExceptionPending());
+    self->ClearException();
+    return false;
+  }
+  uint8_t* stack_map_data = nullptr;
+  uint8_t* roots_data = nullptr;
+  code_cache->ReserveData(
+      self, stack_map_size, number_of_roots, method, &stack_map_data, &roots_data);
+  if (stack_map_data == nullptr || roots_data == nullptr) {
     return false;
   }
   MaybeRecordStat(MethodCompilationStat::kCompiled);
   codegen->BuildStackMaps(MemoryRegion(stack_map_data, stack_map_size), *code_item);
+  codegen->EmitJitRoots(code_allocator.GetData(), roots, roots_data, dex_cache);
+
   const void* code = code_cache->CommitCode(
       self,
       method,
       stack_map_data,
+      roots_data,
       codegen->HasEmptyFrame() ? 0 : codegen->GetFrameSize(),
       codegen->GetCoreSpillMask(),
       codegen->GetFpuSpillMask(),
       code_allocator.GetMemory().data(),
       code_allocator.GetSize(),
-      osr);
+      osr,
+      roots,
+      codegen->GetGraph()->HasShouldDeoptimizeFlag(),
+      codegen->GetGraph()->GetCHASingleImplementationList());
 
   if (code == nullptr) {
-    code_cache->ClearData(self, stack_map_data);
+    code_cache->ClearData(self, stack_map_data, roots_data);
     return false;
   }
 
diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h
index c8d1ce0..203b1ec 100644
--- a/compiler/optimizing/optimizing_compiler_stats.h
+++ b/compiler/optimizing/optimizing_compiler_stats.h
@@ -27,6 +27,7 @@
 
 enum MethodCompilationStat {
   kAttemptCompilation = 0,
+  kCHAInline,
   kCompiled,
   kInlinedInvoke,
   kReplacedInvokeWithSimplePattern,
@@ -106,6 +107,7 @@
     std::string name;
     switch (stat) {
       case kAttemptCompilation : name = "AttemptCompilation"; break;
+      case kCHAInline : name = "CHAInline"; break;
       case kCompiled : name = "Compiled"; break;
       case kInlinedInvoke : name = "InlinedInvoke"; break;
       case kReplacedInvokeWithSimplePattern: name = "ReplacedInvokeWithSimplePattern"; break;
diff --git a/compiler/optimizing/prepare_for_register_allocation.cc b/compiler/optimizing/prepare_for_register_allocation.cc
index 0db6088..f9ac3a0 100644
--- a/compiler/optimizing/prepare_for_register_allocation.cc
+++ b/compiler/optimizing/prepare_for_register_allocation.cc
@@ -143,7 +143,7 @@
   // - or the load class has only one use.
   if (instruction->IsFinalizable() || has_only_one_use || load_class->NeedsAccessCheck()) {
     instruction->SetEntrypoint(kQuickAllocObject);
-    instruction->ReplaceInput(GetGraph()->GetIntConstant(load_class->GetTypeIndex()), 0);
+    instruction->ReplaceInput(GetGraph()->GetIntConstant(load_class->GetTypeIndex().index_), 0);
     if (has_only_one_use) {
       // We've just removed the only use of the HLoadClass. Since we don't run DCE after this pass,
       // do it manually if possible.
diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc
index d588dea..c191c66 100644
--- a/compiler/optimizing/reference_type_propagation.cc
+++ b/compiler/optimizing/reference_type_propagation.cc
@@ -96,7 +96,7 @@
   void VisitBoundType(HBoundType* instr) OVERRIDE;
   void VisitNullCheck(HNullCheck* instr) OVERRIDE;
   void UpdateReferenceTypeInfo(HInstruction* instr,
-                               uint16_t type_idx,
+                               dex::TypeIndex type_idx,
                                const DexFile& dex_file,
                                bool is_exact);
 
@@ -463,7 +463,7 @@
 }
 
 void ReferenceTypePropagation::RTPVisitor::UpdateReferenceTypeInfo(HInstruction* instr,
-                                                                   uint16_t type_idx,
+                                                                   dex::TypeIndex type_idx,
                                                                    const DexFile& dex_file,
                                                                    bool is_exact) {
   DCHECK_EQ(instr->GetType(), Primitive::kPrimNot);
@@ -484,7 +484,7 @@
 
 static mirror::Class* GetClassFromDexCache(Thread* self,
                                            const DexFile& dex_file,
-                                           uint16_t type_idx,
+                                           dex::TypeIndex type_idx,
                                            Handle<mirror::DexCache> hint_dex_cache)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   mirror::DexCache* dex_cache = FindDexCacheWithHint(self, dex_file, hint_dex_cache);
diff --git a/compiler/optimizing/register_allocation_resolver.cc b/compiler/optimizing/register_allocation_resolver.cc
index caf6647..59523a9 100644
--- a/compiler/optimizing/register_allocation_resolver.cc
+++ b/compiler/optimizing/register_allocation_resolver.cc
@@ -87,6 +87,10 @@
       // Adjust the stack slot, now that we know the number of them for each type.
       // The way this implementation lays out the stack is the following:
       // [parameter slots       ]
+      // [art method (caller)   ]
+      // [entry spill (core)    ]
+      // [entry spill (float)   ]
+      // [should_deoptimize flag] (this is optional)
       // [catch phi spill slots ]
       // [double spill slots    ]
       // [long spill slots      ]
@@ -374,7 +378,9 @@
       if (current->GetType() == Primitive::kPrimNot) {
         DCHECK(interval->GetDefinedBy()->IsActualObject())
             << interval->GetDefinedBy()->DebugName()
-            << "@" << safepoint_position->GetInstruction()->DebugName();
+            << '(' << interval->GetDefinedBy()->GetId() << ')'
+            << "@" << safepoint_position->GetInstruction()->DebugName()
+            << '(' << safepoint_position->GetInstruction()->GetId() << ')';
         LocationSummary* locations = safepoint_position->GetLocations();
         if (current->GetParent()->HasSpillSlot()) {
           locations->SetStackBit(current->GetParent()->GetSpillSlot() / kVRegSize);
diff --git a/compiler/optimizing/register_allocator_test.cc b/compiler/optimizing/register_allocator_test.cc
index 55ea99e..559f409 100644
--- a/compiler/optimizing/register_allocator_test.cc
+++ b/compiler/optimizing/register_allocator_test.cc
@@ -20,6 +20,7 @@
 #include "code_generator.h"
 #include "code_generator_x86.h"
 #include "dex_file.h"
+#include "dex_file_types.h"
 #include "dex_instruction.h"
 #include "driver/compiler_options.h"
 #include "nodes.h"
@@ -495,7 +496,7 @@
   graph->AddBlock(entry);
   graph->SetEntryBlock(entry);
   HInstruction* parameter = new (allocator) HParameterValue(
-      graph->GetDexFile(), 0, 0, Primitive::kPrimNot);
+      graph->GetDexFile(), dex::TypeIndex(0), 0, Primitive::kPrimNot);
   entry->AddInstruction(parameter);
 
   HBasicBlock* block = new (allocator) HBasicBlock(graph);
@@ -658,7 +659,7 @@
   graph->AddBlock(entry);
   graph->SetEntryBlock(entry);
   HInstruction* parameter = new (allocator) HParameterValue(
-      graph->GetDexFile(), 0, 0, Primitive::kPrimNot);
+      graph->GetDexFile(), dex::TypeIndex(0), 0, Primitive::kPrimNot);
   entry->AddInstruction(parameter);
 
   HBasicBlock* block = new (allocator) HBasicBlock(graph);
@@ -742,7 +743,7 @@
   graph->AddBlock(entry);
   graph->SetEntryBlock(entry);
   HInstruction* parameter = new (allocator) HParameterValue(
-      graph->GetDexFile(), 0, 0, Primitive::kPrimInt);
+      graph->GetDexFile(), dex::TypeIndex(0), 0, Primitive::kPrimInt);
   entry->AddInstruction(parameter);
 
   HInstruction* constant1 = graph->GetIntConstant(1);
@@ -821,9 +822,9 @@
   graph->AddBlock(entry);
   graph->SetEntryBlock(entry);
   HInstruction* first = new (allocator) HParameterValue(
-      graph->GetDexFile(), 0, 0, Primitive::kPrimInt);
+      graph->GetDexFile(), dex::TypeIndex(0), 0, Primitive::kPrimInt);
   HInstruction* second = new (allocator) HParameterValue(
-      graph->GetDexFile(), 0, 0, Primitive::kPrimInt);
+      graph->GetDexFile(), dex::TypeIndex(0), 0, Primitive::kPrimInt);
   entry->AddInstruction(first);
   entry->AddInstruction(second);
 
@@ -883,13 +884,13 @@
   graph->AddBlock(entry);
   graph->SetEntryBlock(entry);
   HInstruction* one = new (&allocator) HParameterValue(
-      graph->GetDexFile(), 0, 0, Primitive::kPrimInt);
+      graph->GetDexFile(), dex::TypeIndex(0), 0, Primitive::kPrimInt);
   HInstruction* two = new (&allocator) HParameterValue(
-      graph->GetDexFile(), 0, 0, Primitive::kPrimInt);
+      graph->GetDexFile(), dex::TypeIndex(0), 0, Primitive::kPrimInt);
   HInstruction* three = new (&allocator) HParameterValue(
-      graph->GetDexFile(), 0, 0, Primitive::kPrimInt);
+      graph->GetDexFile(), dex::TypeIndex(0), 0, Primitive::kPrimInt);
   HInstruction* four = new (&allocator) HParameterValue(
-      graph->GetDexFile(), 0, 0, Primitive::kPrimInt);
+      graph->GetDexFile(), dex::TypeIndex(0), 0, Primitive::kPrimInt);
   entry->AddInstruction(one);
   entry->AddInstruction(two);
   entry->AddInstruction(three);
diff --git a/compiler/optimizing/sharpening.cc b/compiler/optimizing/sharpening.cc
index fd1db59..daf160a 100644
--- a/compiler/optimizing/sharpening.cc
+++ b/compiler/optimizing/sharpening.cc
@@ -147,7 +147,7 @@
   DCHECK(!load_class->IsInBootImage()) << "HLoadClass should not be optimized before sharpening.";
 
   const DexFile& dex_file = load_class->GetDexFile();
-  uint32_t type_index = load_class->GetTypeIndex();
+  dex::TypeIndex type_index = load_class->GetTypeIndex();
 
   bool is_in_dex_cache = false;
   bool is_in_boot_image = false;
@@ -197,7 +197,7 @@
           // inlined frames are used correctly for OOM stack trace.
           // TODO: Write a test for this. Bug: 29416588
           desired_load_kind = HLoadClass::LoadKind::kDexCacheAddress;
-          void* dex_cache_element_address = &dex_cache->GetResolvedTypes()[type_index];
+          void* dex_cache_element_address = &dex_cache->GetResolvedTypes()[type_index.index_];
           address = reinterpret_cast64<uint64_t>(dex_cache_element_address);
         }
         // AOT app compilation. Check if the class is in the boot image.
@@ -267,7 +267,7 @@
   DCHECK(!load_string->IsInDexCache());
 
   const DexFile& dex_file = load_string->GetDexFile();
-  uint32_t string_index = load_string->GetStringIndex();
+  dex::StringIndex string_index = load_string->GetStringIndex();
 
   HLoadString::LoadKind desired_load_kind = HLoadString::LoadKind::kDexCacheViaMethod;
   uint64_t address = 0u;  // String or dex cache element address.
@@ -281,7 +281,8 @@
         : hs.NewHandle(class_linker->FindDexCache(soa.Self(), dex_file));
 
     if (codegen_->GetCompilerOptions().IsBootImage()) {
-      // Compiling boot image. Resolve the string and allocate it if needed.
+      // Compiling boot image. Resolve the string and allocate it if needed, to ensure
+      // the string will be added to the boot image.
       DCHECK(!runtime->UseJitCompilation());
       mirror::String* string = class_linker->ResolveString(dex_file, string_index, dex_cache);
       CHECK(string != nullptr);
@@ -297,10 +298,14 @@
     } else if (runtime->UseJitCompilation()) {
       // TODO: Make sure we don't set the "compile PIC" flag for JIT as that's bogus.
       // DCHECK(!codegen_->GetCompilerOptions().GetCompilePic());
-      mirror::String* string = dex_cache->GetResolvedString(string_index);
-      if (string != nullptr && runtime->GetHeap()->ObjectIsInBootImageSpace(string)) {
-        desired_load_kind = HLoadString::LoadKind::kBootImageAddress;
-        address = reinterpret_cast64<uint64_t>(string);
+      mirror::String* string = class_linker->LookupString(dex_file, string_index, dex_cache);
+      if (string != nullptr) {
+        if (runtime->GetHeap()->ObjectIsInBootImageSpace(string)) {
+          desired_load_kind = HLoadString::LoadKind::kBootImageAddress;
+          address = reinterpret_cast64<uint64_t>(string);
+        } else {
+          desired_load_kind = HLoadString::LoadKind::kJitTableAddress;
+        }
       }
     } else {
       // AOT app compilation. Try to lookup the string without allocating if not found.
@@ -322,10 +327,10 @@
     case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
     case HLoadString::LoadKind::kBssEntry:
     case HLoadString::LoadKind::kDexCacheViaMethod:
+    case HLoadString::LoadKind::kJitTableAddress:
       load_string->SetLoadKindWithStringReference(load_kind, dex_file, string_index);
       break;
     case HLoadString::LoadKind::kBootImageAddress:
-    case HLoadString::LoadKind::kDexCacheAddress:
       DCHECK_NE(address, 0u);
       load_string->SetLoadKindWithAddress(load_kind, address);
       break;
diff --git a/compiler/utils/arm/assembler_arm_vixl.cc b/compiler/utils/arm/assembler_arm_vixl.cc
index 8045bd2..c35c393 100644
--- a/compiler/utils/arm/assembler_arm_vixl.cc
+++ b/compiler/utils/arm/assembler_arm_vixl.cc
@@ -43,12 +43,12 @@
 }
 
 const uint8_t* ArmVIXLAssembler::CodeBufferBaseAddress() const {
-  return vixl_masm_.GetStartAddress<uint8_t*>();
+  return vixl_masm_.GetBuffer().GetStartAddress<const uint8_t*>();
 }
 
 void ArmVIXLAssembler::FinalizeInstructions(const MemoryRegion& region) {
   // Copy the instructions from the buffer.
-  MemoryRegion from(vixl_masm_.GetStartAddress<void*>(), CodeSize());
+  MemoryRegion from(vixl_masm_.GetBuffer()->GetStartAddress<void*>(), CodeSize());
   region.CopyFrom(0, from);
 }
 
@@ -62,6 +62,12 @@
   ___ Rsb(reg, reg, 0);
 }
 
+void ArmVIXLAssembler::MaybePoisonHeapReference(vixl32::Register reg) {
+  if (kPoisonHeapReferences) {
+    PoisonHeapReference(reg);
+  }
+}
+
 void ArmVIXLAssembler::MaybeUnpoisonHeapReference(vixl32::Register reg) {
   if (kPoisonHeapReferences) {
     UnpoisonHeapReference(reg);
@@ -359,7 +365,7 @@
       if (stack_offset != 0) {
         base = temps.Acquire();
         DCHECK_EQ(regs & (1u << base.GetCode()), 0u);
-        ___ Add(base, sp, stack_offset);
+        ___ Add(base, sp, Operand::From(stack_offset));
       }
       ___ Stm(base, NO_WRITE_BACK, RegisterList(regs));
     } else {
@@ -379,7 +385,7 @@
       vixl32::Register base = sp;
       if (stack_offset != 0) {
         base = temps.Acquire();
-        ___ Add(base, sp, stack_offset);
+        ___ Add(base, sp, Operand::From(stack_offset));
       }
       ___ Ldm(base, NO_WRITE_BACK, RegisterList(regs));
     } else {
@@ -423,5 +429,31 @@
   }
 }
 
+void ArmVIXLMacroAssembler::CompareAndBranchIfZero(vixl32::Register rn,
+                                                   vixl32::Label* label,
+                                                   bool is_far_target) {
+  if (!is_far_target && rn.IsLow() && !label->IsBound()) {
+    // In T32, Cbz/Cbnz instructions have following limitations:
+    // - There are only 7 bits (i:imm5:0) to encode branch target address (cannot be far target).
+    // - Only low registers (i.e R0 .. R7) can be encoded.
+    // - Only forward branches (unbound labels) are supported.
+    Cbz(rn, label);
+    return;
+  }
+  Cmp(rn, 0);
+  B(eq, label);
+}
+
+void ArmVIXLMacroAssembler::CompareAndBranchIfNonZero(vixl32::Register rn,
+                                                      vixl32::Label* label,
+                                                      bool is_far_target) {
+  if (!is_far_target && rn.IsLow() && !label->IsBound()) {
+    Cbnz(rn, label);
+    return;
+  }
+  Cmp(rn, 0);
+  B(ne, label);
+}
+
 }  // namespace arm
 }  // namespace art
diff --git a/compiler/utils/arm/assembler_arm_vixl.h b/compiler/utils/arm/assembler_arm_vixl.h
index 68fd32e..b4a4abc 100644
--- a/compiler/utils/arm/assembler_arm_vixl.h
+++ b/compiler/utils/arm/assembler_arm_vixl.h
@@ -37,6 +37,25 @@
 namespace art {
 namespace arm {
 
+class ArmVIXLMacroAssembler FINAL : public vixl32::MacroAssembler {
+ public:
+  // The following interfaces can generate CMP+Bcc or Cbz/Cbnz.
+  // CMP+Bcc are generated by default.
+  // If a hint is given (is_far_target = false) and rn and label can all fit into Cbz/Cbnz,
+  // then Cbz/Cbnz is generated.
+  // Prefer following interfaces to using vixl32::MacroAssembler::Cbz/Cbnz.
+  // In T32, Cbz/Cbnz instructions have following limitations:
+  // - Far targets, which are over 126 bytes away, are not supported.
+  // - Only low registers can be encoded.
+  // - Backward branches are not supported.
+  void CompareAndBranchIfZero(vixl32::Register rn,
+                              vixl32::Label* label,
+                              bool is_far_target = true);
+  void CompareAndBranchIfNonZero(vixl32::Register rn,
+                                 vixl32::Label* label,
+                                 bool is_far_target = true);
+};
+
 class ArmVIXLAssembler FINAL : public Assembler {
  private:
   class ArmException;
@@ -48,7 +67,7 @@
   }
 
   virtual ~ArmVIXLAssembler() {}
-  vixl32::MacroAssembler* GetVIXLAssembler() { return &vixl_masm_; }
+  ArmVIXLMacroAssembler* GetVIXLAssembler() { return &vixl_masm_; }
   void FinalizeCode() OVERRIDE;
 
   // Size of generated code.
@@ -72,6 +91,8 @@
   void PoisonHeapReference(vixl32::Register reg);
   // Unpoison a heap reference contained in `reg`.
   void UnpoisonHeapReference(vixl32::Register reg);
+  // Poison a heap reference contained in `reg` if heap poisoning is enabled.
+  void MaybePoisonHeapReference(vixl32::Register reg);
   // Unpoison a heap reference contained in `reg` if heap poisoning is enabled.
   void MaybeUnpoisonHeapReference(vixl32::Register reg);
 
@@ -115,7 +136,7 @@
 
  private:
   // VIXL assembler.
-  vixl32::MacroAssembler vixl_masm_;
+  ArmVIXLMacroAssembler vixl_masm_;
 };
 
 // Thread register declaration.
diff --git a/compiler/utils/arm/jni_macro_assembler_arm.cc b/compiler/utils/arm/jni_macro_assembler_arm.cc
index cf7a4d1..3f425df 100644
--- a/compiler/utils/arm/jni_macro_assembler_arm.cc
+++ b/compiler/utils/arm/jni_macro_assembler_arm.cc
@@ -594,6 +594,41 @@
   __ b(slow->Entry(), NE);
 }
 
+std::unique_ptr<JNIMacroLabel> ArmJNIMacroAssembler::CreateLabel() {
+  return std::unique_ptr<JNIMacroLabel>(new ArmJNIMacroLabel());
+}
+
+void ArmJNIMacroAssembler::Jump(JNIMacroLabel* label) {
+  CHECK(label != nullptr);
+  __ b(ArmJNIMacroLabel::Cast(label)->AsArm());
+}
+
+void ArmJNIMacroAssembler::Jump(JNIMacroLabel* label,
+                                JNIMacroUnaryCondition condition,
+                                ManagedRegister test) {
+  CHECK(label != nullptr);
+
+  arm::Condition arm_cond;
+  switch (condition) {
+    case JNIMacroUnaryCondition::kZero:
+      arm_cond = EQ;
+      break;
+    case JNIMacroUnaryCondition::kNotZero:
+      arm_cond = NE;
+      break;
+    default:
+      LOG(FATAL) << "Not implemented condition: " << static_cast<int>(condition);
+      UNREACHABLE();
+  }
+  __ cmp(test.AsArm().AsCoreRegister(), ShifterOperand(0));
+  __ b(ArmJNIMacroLabel::Cast(label)->AsArm(), arm_cond);
+}
+
+void ArmJNIMacroAssembler::Bind(JNIMacroLabel* label) {
+  CHECK(label != nullptr);
+  __ Bind(ArmJNIMacroLabel::Cast(label)->AsArm());
+}
+
 #undef __
 
 void ArmExceptionSlowPath::Emit(Assembler* sasm) {
diff --git a/compiler/utils/arm/jni_macro_assembler_arm.h b/compiler/utils/arm/jni_macro_assembler_arm.h
index 4471906..809ac8b 100644
--- a/compiler/utils/arm/jni_macro_assembler_arm.h
+++ b/compiler/utils/arm/jni_macro_assembler_arm.h
@@ -25,6 +25,7 @@
 #include "base/enums.h"
 #include "base/macros.h"
 #include "utils/jni_macro_assembler.h"
+#include "utils/label.h"
 #include "offsets.h"
 
 namespace art {
@@ -159,10 +160,26 @@
 
   void MemoryBarrier(ManagedRegister scratch) OVERRIDE;
 
+  // Create a new label that can be used with Jump/Bind calls.
+  std::unique_ptr<JNIMacroLabel> CreateLabel() OVERRIDE;
+  // Emit an unconditional jump to the label.
+  void Jump(JNIMacroLabel* label) OVERRIDE;
+  // Emit a conditional jump to the label by applying a unary condition test to the register.
+  void Jump(JNIMacroLabel* label, JNIMacroUnaryCondition cond, ManagedRegister test) OVERRIDE;
+  // Code at this offset will serve as the target for the Jump call.
+  void Bind(JNIMacroLabel* label) OVERRIDE;
+
  private:
   std::unique_ptr<ArmAssembler> asm_;
 };
 
+class ArmJNIMacroLabel FINAL : public JNIMacroLabelCommon<ArmJNIMacroLabel, art::Label, kArm> {
+ public:
+  art::Label* AsArm() {
+    return AsPlatformLabel();
+  }
+};
+
 }  // namespace arm
 }  // namespace art
 
diff --git a/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc b/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc
index 8a9fd90..fb6f172 100644
--- a/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc
+++ b/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc
@@ -49,7 +49,7 @@
   return dwarf::Reg::ArmFp(static_cast<int>(reg.GetCode()));
 }
 
-static constexpr size_t kFramePointerSize = static_cast<size_t>(kArmPointerSize);;
+static constexpr size_t kFramePointerSize = static_cast<size_t>(kArmPointerSize);
 
 void ArmVIXLJNIMacroAssembler::BuildFrame(size_t frame_size,
                                           ManagedRegister method_reg,
@@ -168,6 +168,8 @@
     CHECK_EQ(0u, size);
   } else if (src.IsCoreRegister()) {
     CHECK_EQ(4u, size);
+    UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
+    temps.Exclude(src.AsVIXLRegister());
     asm_.StoreToOffset(kStoreWord, src.AsVIXLRegister(), sp, dest.Int32Value());
   } else if (src.IsRegisterPair()) {
     CHECK_EQ(8u, size);
@@ -186,12 +188,16 @@
 void ArmVIXLJNIMacroAssembler::StoreRef(FrameOffset dest, ManagedRegister msrc) {
   ArmManagedRegister src = msrc.AsArm();
   CHECK(src.IsCoreRegister()) << src;
+  UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
+  temps.Exclude(src.AsVIXLRegister());
   asm_.StoreToOffset(kStoreWord, src.AsVIXLRegister(), sp, dest.Int32Value());
 }
 
 void ArmVIXLJNIMacroAssembler::StoreRawPtr(FrameOffset dest, ManagedRegister msrc) {
   ArmManagedRegister src = msrc.AsArm();
   CHECK(src.IsCoreRegister()) << src;
+  UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
+  temps.Exclude(src.AsVIXLRegister());
   asm_.StoreToOffset(kStoreWord, src.AsVIXLRegister(), sp, dest.Int32Value());
 }
 
@@ -202,6 +208,8 @@
   ArmManagedRegister src = msrc.AsArm();
   ArmManagedRegister scratch = mscratch.AsArm();
   asm_.StoreToOffset(kStoreWord, src.AsVIXLRegister(), sp, dest.Int32Value());
+  UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
+  temps.Exclude(scratch.AsVIXLRegister());
   asm_.LoadFromOffset(kLoadWord, scratch.AsVIXLRegister(), sp, in_off.Int32Value());
   asm_.StoreToOffset(kStoreWord, scratch.AsVIXLRegister(), sp, dest.Int32Value() + 4);
 }
@@ -210,6 +218,8 @@
                                        FrameOffset src,
                                        ManagedRegister mscratch) {
   ArmManagedRegister scratch = mscratch.AsArm();
+  UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
+  temps.Exclude(scratch.AsVIXLRegister());
   asm_.LoadFromOffset(kLoadWord, scratch.AsVIXLRegister(), sp, src.Int32Value());
   asm_.StoreToOffset(kStoreWord, scratch.AsVIXLRegister(), sp, dest.Int32Value());
 }
@@ -220,6 +230,8 @@
                                        bool unpoison_reference) {
   ArmManagedRegister dst = dest.AsArm();
   CHECK(dst.IsCoreRegister() && dst.IsCoreRegister()) << dst;
+  UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
+  temps.Exclude(dst.AsVIXLRegister(), base.AsArm().AsVIXLRegister());
   asm_.LoadFromOffset(kLoadWord,
                       dst.AsVIXLRegister(),
                       base.AsArm().AsVIXLRegister(),
@@ -246,6 +258,8 @@
                                                      ManagedRegister scratch) {
   ArmManagedRegister mscratch = scratch.AsArm();
   CHECK(mscratch.IsCoreRegister()) << mscratch;
+  UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
+  temps.Exclude(mscratch.AsVIXLRegister());
   asm_.LoadImmediate(mscratch.AsVIXLRegister(), imm);
   asm_.StoreToOffset(kStoreWord, mscratch.AsVIXLRegister(), sp, dest.Int32Value());
 }
@@ -263,6 +277,8 @@
 void ArmVIXLJNIMacroAssembler::LoadRawPtrFromThread(ManagedRegister m_dst, ThreadOffset32 offs) {
   ArmManagedRegister dst = m_dst.AsArm();
   CHECK(dst.IsCoreRegister()) << dst;
+  UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
+  temps.Exclude(dst.AsVIXLRegister());
   asm_.LoadFromOffset(kLoadWord, dst.AsVIXLRegister(), tr, offs.Int32Value());
 }
 
@@ -271,6 +287,8 @@
                                                     ManagedRegister mscratch) {
   ArmManagedRegister scratch = mscratch.AsArm();
   CHECK(scratch.IsCoreRegister()) << scratch;
+  UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
+  temps.Exclude(scratch.AsVIXLRegister());
   asm_.LoadFromOffset(kLoadWord, scratch.AsVIXLRegister(), tr, thr_offs.Int32Value());
   asm_.StoreToOffset(kStoreWord, scratch.AsVIXLRegister(), sp, fr_offs.Int32Value());
 }
@@ -286,6 +304,8 @@
                                                         ManagedRegister mscratch) {
   ArmManagedRegister scratch = mscratch.AsArm();
   CHECK(scratch.IsCoreRegister()) << scratch;
+  UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
+  temps.Exclude(scratch.AsVIXLRegister());
   asm_.AddConstant(scratch.AsVIXLRegister(), sp, fr_offs.Int32Value());
   asm_.StoreToOffset(kStoreWord, scratch.AsVIXLRegister(), tr, thr_offs.Int32Value());
 }
@@ -312,6 +332,8 @@
   if (!dst.Equals(src)) {
     if (dst.IsCoreRegister()) {
       CHECK(src.IsCoreRegister()) << src;
+      UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
+      temps.Exclude(dst.AsVIXLRegister());
       ___ Mov(dst.AsVIXLRegister(), src.AsVIXLRegister());
     } else if (dst.IsDRegister()) {
       if (src.IsDRegister()) {
@@ -351,6 +373,8 @@
   ArmManagedRegister temp = scratch.AsArm();
   CHECK(temp.IsCoreRegister()) << temp;
   CHECK(size == 4 || size == 8) << size;
+  UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
+  temps.Exclude(temp.AsVIXLRegister());
   if (size == 4) {
     asm_.LoadFromOffset(kLoadWord, temp.AsVIXLRegister(), sp, src.Int32Value());
     asm_.StoreToOffset(kStoreWord, temp.AsVIXLRegister(), sp, dest.Int32Value());
@@ -414,6 +438,8 @@
   ArmManagedRegister in_reg = min_reg.AsArm();
   CHECK(in_reg.IsNoRegister() || in_reg.IsCoreRegister()) << in_reg;
   CHECK(out_reg.IsCoreRegister()) << out_reg;
+  UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
+  temps.Exclude(out_reg.AsVIXLRegister());
   if (null_allowed) {
     // Null values get a handle scope entry value of 0.  Otherwise, the handle scope entry is
     // the address in the handle scope holding the reference.
@@ -425,6 +451,8 @@
                           handle_scope_offset.Int32Value());
       in_reg = out_reg;
     }
+
+    temps.Exclude(in_reg.AsVIXLRegister());
     ___ Cmp(in_reg.AsVIXLRegister(), 0);
 
     if (asm_.ShifterOperandCanHold(ADD, handle_scope_offset.Int32Value(), kCcDontCare)) {
@@ -457,6 +485,8 @@
                                                       bool null_allowed) {
   ArmManagedRegister scratch = mscratch.AsArm();
   CHECK(scratch.IsCoreRegister()) << scratch;
+  UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
+  temps.Exclude(scratch.AsVIXLRegister());
   if (null_allowed) {
     asm_.LoadFromOffset(kLoadWord, scratch.AsVIXLRegister(), sp, handle_scope_offset.Int32Value());
     // Null values get a handle scope entry value of 0.  Otherwise, the handle scope entry is
@@ -503,6 +533,8 @@
   ArmManagedRegister scratch = mscratch.AsArm();
   CHECK(base.IsCoreRegister()) << base;
   CHECK(scratch.IsCoreRegister()) << scratch;
+  UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
+  temps.Exclude(scratch.AsVIXLRegister());
   asm_.LoadFromOffset(kLoadWord,
                       scratch.AsVIXLRegister(),
                       base.AsVIXLRegister(),
@@ -514,6 +546,8 @@
 void ArmVIXLJNIMacroAssembler::Call(FrameOffset base, Offset offset, ManagedRegister mscratch) {
   ArmManagedRegister scratch = mscratch.AsArm();
   CHECK(scratch.IsCoreRegister()) << scratch;
+  UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
+  temps.Exclude(scratch.AsVIXLRegister());
   // Call *(*(SP + base) + offset)
   asm_.LoadFromOffset(kLoadWord, scratch.AsVIXLRegister(), sp, base.Int32Value());
   asm_.LoadFromOffset(kLoadWord,
@@ -530,6 +564,8 @@
 }
 
 void ArmVIXLJNIMacroAssembler::GetCurrentThread(ManagedRegister mtr) {
+  UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
+  temps.Exclude(mtr.AsArm().AsVIXLRegister());
   ___ Mov(mtr.AsArm().AsVIXLRegister(), tr);
 }
 
@@ -541,6 +577,8 @@
 void ArmVIXLJNIMacroAssembler::ExceptionPoll(ManagedRegister m_scratch, size_t stack_adjust) {
   CHECK_ALIGNED(stack_adjust, kStackAlignment);
   ArmManagedRegister scratch = m_scratch.AsArm();
+  UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
+  temps.Exclude(scratch.AsVIXLRegister());
   exception_blocks_.emplace_back(
       new ArmVIXLJNIMacroAssembler::ArmException(scratch, stack_adjust));
   asm_.LoadFromOffset(kLoadWord,
@@ -572,12 +610,16 @@
                                     ManagedRegister test) {
   CHECK(label != nullptr);
 
+  UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
+  temps.Exclude(test.AsArm().AsVIXLRegister());
   switch (condition) {
     case JNIMacroUnaryCondition::kZero:
-      ___ Cbz(test.AsArm().AsVIXLRegister(), ArmVIXLJNIMacroLabel::Cast(label)->AsArm());
+      ___ CompareAndBranchIfZero(test.AsArm().AsVIXLRegister(),
+                                 ArmVIXLJNIMacroLabel::Cast(label)->AsArm());
       break;
     case JNIMacroUnaryCondition::kNotZero:
-      ___ Cbnz(test.AsArm().AsVIXLRegister(), ArmVIXLJNIMacroLabel::Cast(label)->AsArm());
+      ___ CompareAndBranchIfNonZero(test.AsArm().AsVIXLRegister(),
+                                    ArmVIXLJNIMacroLabel::Cast(label)->AsArm());
       break;
     default:
       LOG(FATAL) << "Not implemented unary condition: " << static_cast<int>(condition);
@@ -596,11 +638,14 @@
   if (exception->stack_adjust_ != 0) {  // Fix up the frame.
     DecreaseFrameSize(exception->stack_adjust_);
   }
+
+  UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
+  temps.Exclude(exception->scratch_.AsVIXLRegister());
   // Pass exception object as argument.
   // Don't care about preserving r0 as this won't return.
   ___ Mov(r0, exception->scratch_.AsVIXLRegister());
+  temps.Include(exception->scratch_.AsVIXLRegister());
   // TODO: check that exception->scratch_ is dead by this point.
-  UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
   vixl32::Register temp = temps.Acquire();
   ___ Ldr(temp,
           MemOperand(tr,
@@ -622,6 +667,9 @@
   } else if (dest.IsCoreRegister()) {
     CHECK(!dest.AsVIXLRegister().Is(sp)) << dest;
 
+    UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
+    temps.Exclude(dest.AsVIXLRegister());
+
     if (size == 1u) {
       ___ Ldrb(dest.AsVIXLRegister(), MemOperand(base, offset));
     } else {
diff --git a/compiler/utils/arm64/assembler_arm64.cc b/compiler/utils/arm64/assembler_arm64.cc
index f91bcfa..6ed0e9b 100644
--- a/compiler/utils/arm64/assembler_arm64.cc
+++ b/compiler/utils/arm64/assembler_arm64.cc
@@ -40,12 +40,12 @@
 }
 
 const uint8_t* Arm64Assembler::CodeBufferBaseAddress() const {
-  return vixl_masm_.GetStartAddress<uint8_t*>();
+  return vixl_masm_.GetBuffer().GetStartAddress<const uint8_t*>();
 }
 
 void Arm64Assembler::FinalizeInstructions(const MemoryRegion& region) {
   // Copy the instructions from the buffer.
-  MemoryRegion from(vixl_masm_.GetStartAddress<void*>(), CodeSize());
+  MemoryRegion from(vixl_masm_.GetBuffer()->GetStartAddress<void*>(), CodeSize());
   region.CopyFrom(0, from);
 }
 
diff --git a/compiler/utils/assembler_test.h b/compiler/utils/assembler_test.h
index 9c65280..b34e125 100644
--- a/compiler/utils/assembler_test.h
+++ b/compiler/utils/assembler_test.h
@@ -51,30 +51,30 @@
 
   typedef std::string (*TestFn)(AssemblerTest* assembler_test, Ass* assembler);
 
-  void DriverFn(TestFn f, std::string test_name) {
+  void DriverFn(TestFn f, const std::string& test_name) {
     DriverWrapper(f(this, assembler_.get()), test_name);
   }
 
   // This driver assumes the assembler has already been called.
-  void DriverStr(std::string assembly_string, std::string test_name) {
+  void DriverStr(const std::string& assembly_string, const std::string& test_name) {
     DriverWrapper(assembly_string, test_name);
   }
 
-  std::string RepeatR(void (Ass::*f)(Reg), std::string fmt) {
+  std::string RepeatR(void (Ass::*f)(Reg), const std::string& fmt) {
     return RepeatTemplatedRegister<Reg>(f,
         GetRegisters(),
         &AssemblerTest::GetRegName<RegisterView::kUsePrimaryName>,
         fmt);
   }
 
-  std::string Repeatr(void (Ass::*f)(Reg), std::string fmt) {
+  std::string Repeatr(void (Ass::*f)(Reg), const std::string& fmt) {
     return RepeatTemplatedRegister<Reg>(f,
         GetRegisters(),
         &AssemblerTest::GetRegName<RegisterView::kUseSecondaryName>,
         fmt);
   }
 
-  std::string RepeatRR(void (Ass::*f)(Reg, Reg), std::string fmt) {
+  std::string RepeatRR(void (Ass::*f)(Reg, Reg), const std::string& fmt) {
     return RepeatTemplatedRegisters<Reg, Reg>(f,
         GetRegisters(),
         GetRegisters(),
@@ -83,7 +83,7 @@
         fmt);
   }
 
-  std::string RepeatRRNoDupes(void (Ass::*f)(Reg, Reg), std::string fmt) {
+  std::string RepeatRRNoDupes(void (Ass::*f)(Reg, Reg), const std::string& fmt) {
     return RepeatTemplatedRegistersNoDupes<Reg, Reg>(f,
         GetRegisters(),
         GetRegisters(),
@@ -92,7 +92,7 @@
         fmt);
   }
 
-  std::string Repeatrr(void (Ass::*f)(Reg, Reg), std::string fmt) {
+  std::string Repeatrr(void (Ass::*f)(Reg, Reg), const std::string& fmt) {
     return RepeatTemplatedRegisters<Reg, Reg>(f,
         GetRegisters(),
         GetRegisters(),
@@ -101,7 +101,7 @@
         fmt);
   }
 
-  std::string RepeatRRR(void (Ass::*f)(Reg, Reg, Reg), std::string fmt) {
+  std::string RepeatRRR(void (Ass::*f)(Reg, Reg, Reg), const std::string& fmt) {
     return RepeatTemplatedRegisters<Reg, Reg, Reg>(f,
         GetRegisters(),
         GetRegisters(),
@@ -112,7 +112,7 @@
         fmt);
   }
 
-  std::string Repeatrb(void (Ass::*f)(Reg, Reg), std::string fmt) {
+  std::string Repeatrb(void (Ass::*f)(Reg, Reg), const std::string& fmt) {
     return RepeatTemplatedRegisters<Reg, Reg>(f,
         GetRegisters(),
         GetRegisters(),
@@ -121,7 +121,7 @@
         fmt);
   }
 
-  std::string RepeatRr(void (Ass::*f)(Reg, Reg), std::string fmt) {
+  std::string RepeatRr(void (Ass::*f)(Reg, Reg), const std::string& fmt) {
     return RepeatTemplatedRegisters<Reg, Reg>(f,
         GetRegisters(),
         GetRegisters(),
@@ -130,11 +130,11 @@
         fmt);
   }
 
-  std::string RepeatRI(void (Ass::*f)(Reg, const Imm&), size_t imm_bytes, std::string fmt) {
+  std::string RepeatRI(void (Ass::*f)(Reg, const Imm&), size_t imm_bytes, const std::string& fmt) {
     return RepeatRegisterImm<RegisterView::kUsePrimaryName>(f, imm_bytes, fmt);
   }
 
-  std::string Repeatri(void (Ass::*f)(Reg, const Imm&), size_t imm_bytes, std::string fmt) {
+  std::string Repeatri(void (Ass::*f)(Reg, const Imm&), size_t imm_bytes, const std::string& fmt) {
     return RepeatRegisterImm<RegisterView::kUseSecondaryName>(f, imm_bytes, fmt);
   }
 
@@ -145,7 +145,7 @@
                                               const std::vector<Reg2*> reg2_registers,
                                               std::string (AssemblerTest::*GetName1)(const Reg1&),
                                               std::string (AssemblerTest::*GetName2)(const Reg2&),
-                                              std::string fmt) {
+                                              const std::string& fmt) {
     std::string str;
     std::vector<int64_t> imms = CreateImmediateValuesBits(abs(imm_bits), (imm_bits > 0));
 
@@ -195,7 +195,7 @@
                                               std::string (AssemblerTest::*GetName1)(const Reg1&),
                                               std::string (AssemblerTest::*GetName2)(const Reg2&),
                                               int imm_bits,
-                                              std::string fmt) {
+                                              const std::string& fmt) {
     std::vector<int64_t> imms = CreateImmediateValuesBits(abs(imm_bits), (imm_bits > 0));
 
     WarnOnCombinations(reg1_registers.size() * reg2_registers.size() * imms.size());
@@ -245,7 +245,7 @@
                                              int imm_bits,
                                              const std::vector<Reg*> registers,
                                              std::string (AssemblerTest::*GetName)(const RegType&),
-                                             std::string fmt) {
+                                             const std::string& fmt) {
     std::string str;
     std::vector<int64_t> imms = CreateImmediateValuesBits(abs(imm_bits), (imm_bits > 0));
 
@@ -281,7 +281,7 @@
   }
 
   template <typename ImmType>
-  std::string RepeatRRIb(void (Ass::*f)(Reg, Reg, ImmType), int imm_bits, std::string fmt) {
+  std::string RepeatRRIb(void (Ass::*f)(Reg, Reg, ImmType), int imm_bits, const std::string& fmt) {
     return RepeatTemplatedRegistersImmBits<Reg, Reg, ImmType>(f,
         imm_bits,
         GetRegisters(),
@@ -292,7 +292,7 @@
   }
 
   template <typename ImmType>
-  std::string RepeatRIb(void (Ass::*f)(Reg, ImmType), int imm_bits, std::string fmt) {
+  std::string RepeatRIb(void (Ass::*f)(Reg, ImmType), int imm_bits, const std::string& fmt) {
     return RepeatTemplatedRegisterImmBits<Reg, ImmType>(f,
         imm_bits,
         GetRegisters(),
@@ -301,7 +301,9 @@
   }
 
   template <typename ImmType>
-  std::string RepeatFRIb(void (Ass::*f)(FPReg, Reg, ImmType), int imm_bits, std::string fmt) {
+  std::string RepeatFRIb(void (Ass::*f)(FPReg, Reg, ImmType),
+                         int imm_bits,
+                         const std::string& fmt) {
     return RepeatTemplatedRegistersImmBits<FPReg, Reg, ImmType>(f,
         imm_bits,
         GetFPRegisters(),
@@ -311,7 +313,7 @@
         fmt);
   }
 
-  std::string RepeatFF(void (Ass::*f)(FPReg, FPReg), std::string fmt) {
+  std::string RepeatFF(void (Ass::*f)(FPReg, FPReg), const std::string& fmt) {
     return RepeatTemplatedRegisters<FPReg, FPReg>(f,
                                                   GetFPRegisters(),
                                                   GetFPRegisters(),
@@ -320,7 +322,7 @@
                                                   fmt);
   }
 
-  std::string RepeatFFF(void (Ass::*f)(FPReg, FPReg, FPReg), std::string fmt) {
+  std::string RepeatFFF(void (Ass::*f)(FPReg, FPReg, FPReg), const std::string& fmt) {
     return RepeatTemplatedRegisters<FPReg, FPReg, FPReg>(f,
                                                          GetFPRegisters(),
                                                          GetFPRegisters(),
@@ -331,7 +333,7 @@
                                                          fmt);
   }
 
-  std::string RepeatFFR(void (Ass::*f)(FPReg, FPReg, Reg), std::string fmt) {
+  std::string RepeatFFR(void (Ass::*f)(FPReg, FPReg, Reg), const std::string& fmt) {
     return RepeatTemplatedRegisters<FPReg, FPReg, Reg>(
         f,
         GetFPRegisters(),
@@ -345,7 +347,7 @@
 
   std::string RepeatFFI(void (Ass::*f)(FPReg, FPReg, const Imm&),
                         size_t imm_bytes,
-                        std::string fmt) {
+                        const std::string& fmt) {
     return RepeatTemplatedRegistersImm<FPReg, FPReg>(f,
                                                      GetFPRegisters(),
                                                      GetFPRegisters(),
@@ -356,7 +358,9 @@
   }
 
   template <typename ImmType>
-  std::string RepeatFFIb(void (Ass::*f)(FPReg, FPReg, ImmType), int imm_bits, std::string fmt) {
+  std::string RepeatFFIb(void (Ass::*f)(FPReg, FPReg, ImmType),
+                         int imm_bits,
+                         const std::string& fmt) {
     return RepeatTemplatedRegistersImmBits<FPReg, FPReg, ImmType>(f,
                                                                   imm_bits,
                                                                   GetFPRegisters(),
@@ -367,7 +371,9 @@
   }
 
   template <typename ImmType>
-  std::string RepeatIbFF(void (Ass::*f)(ImmType, FPReg, FPReg), int imm_bits, std::string fmt) {
+  std::string RepeatIbFF(void (Ass::*f)(ImmType, FPReg, FPReg),
+                         int imm_bits,
+                         const std::string& fmt) {
     return RepeatTemplatedImmBitsRegisters<ImmType, FPReg, FPReg>(f,
                                                                   GetFPRegisters(),
                                                                   GetFPRegisters(),
@@ -377,7 +383,7 @@
                                                                   fmt);
   }
 
-  std::string RepeatFR(void (Ass::*f)(FPReg, Reg), std::string fmt) {
+  std::string RepeatFR(void (Ass::*f)(FPReg, Reg), const std::string& fmt) {
     return RepeatTemplatedRegisters<FPReg, Reg>(f,
         GetFPRegisters(),
         GetRegisters(),
@@ -386,7 +392,7 @@
         fmt);
   }
 
-  std::string RepeatFr(void (Ass::*f)(FPReg, Reg), std::string fmt) {
+  std::string RepeatFr(void (Ass::*f)(FPReg, Reg), const std::string& fmt) {
     return RepeatTemplatedRegisters<FPReg, Reg>(f,
         GetFPRegisters(),
         GetRegisters(),
@@ -395,7 +401,7 @@
         fmt);
   }
 
-  std::string RepeatRF(void (Ass::*f)(Reg, FPReg), std::string fmt) {
+  std::string RepeatRF(void (Ass::*f)(Reg, FPReg), const std::string& fmt) {
     return RepeatTemplatedRegisters<Reg, FPReg>(f,
         GetRegisters(),
         GetFPRegisters(),
@@ -404,7 +410,7 @@
         fmt);
   }
 
-  std::string RepeatrF(void (Ass::*f)(Reg, FPReg), std::string fmt) {
+  std::string RepeatrF(void (Ass::*f)(Reg, FPReg), const std::string& fmt) {
     return RepeatTemplatedRegisters<Reg, FPReg>(f,
         GetRegisters(),
         GetFPRegisters(),
@@ -413,7 +419,9 @@
         fmt);
   }
 
-  std::string RepeatI(void (Ass::*f)(const Imm&), size_t imm_bytes, std::string fmt,
+  std::string RepeatI(void (Ass::*f)(const Imm&),
+                      size_t imm_bytes,
+                      const std::string& fmt,
                       bool as_uint = false) {
     std::string str;
     std::vector<int64_t> imms = CreateImmediateValues(imm_bytes, as_uint);
@@ -651,7 +659,7 @@
   std::string RepeatTemplatedRegister(void (Ass::*f)(RegType),
                                       const std::vector<RegType*> registers,
                                       std::string (AssemblerTest::*GetName)(const RegType&),
-                                      std::string fmt) {
+                                      const std::string& fmt) {
     std::string str;
     for (auto reg : registers) {
       (assembler_.get()->*f)(*reg);
@@ -679,7 +687,7 @@
                                        const std::vector<Reg2*> reg2_registers,
                                        std::string (AssemblerTest::*GetName1)(const Reg1&),
                                        std::string (AssemblerTest::*GetName2)(const Reg2&),
-                                       std::string fmt) {
+                                       const std::string& fmt) {
     WarnOnCombinations(reg1_registers.size() * reg2_registers.size());
 
     std::string str;
@@ -717,7 +725,7 @@
                                               const std::vector<Reg2*> reg2_registers,
                                               std::string (AssemblerTest::*GetName1)(const Reg1&),
                                               std::string (AssemblerTest::*GetName2)(const Reg2&),
-                                              std::string fmt) {
+                                              const std::string& fmt) {
     WarnOnCombinations(reg1_registers.size() * reg2_registers.size());
 
     std::string str;
@@ -758,7 +766,7 @@
                                        std::string (AssemblerTest::*GetName1)(const Reg1&),
                                        std::string (AssemblerTest::*GetName2)(const Reg2&),
                                        std::string (AssemblerTest::*GetName3)(const Reg3&),
-                                       std::string fmt) {
+                                       const std::string& fmt) {
     std::string str;
     for (auto reg1 : reg1_registers) {
       for (auto reg2 : reg2_registers) {
@@ -803,7 +811,7 @@
                                           std::string (AssemblerTest::*GetName1)(const Reg1&),
                                           std::string (AssemblerTest::*GetName2)(const Reg2&),
                                           size_t imm_bytes,
-                                          std::string fmt) {
+                                          const std::string& fmt) {
     std::vector<int64_t> imms = CreateImmediateValues(imm_bytes);
     WarnOnCombinations(reg1_registers.size() * reg2_registers.size() * imms.size());
 
@@ -895,8 +903,9 @@
 
  private:
   template <RegisterView kRegView>
-  std::string RepeatRegisterImm(void (Ass::*f)(Reg, const Imm&), size_t imm_bytes,
-                                  std::string fmt) {
+  std::string RepeatRegisterImm(void (Ass::*f)(Reg, const Imm&),
+                                size_t imm_bytes,
+                                const std::string& fmt) {
     const std::vector<Reg*> registers = GetRegisters();
     std::string str;
     std::vector<int64_t> imms = CreateImmediateValues(imm_bytes);
@@ -938,7 +947,7 @@
   virtual void Pad(std::vector<uint8_t>& data ATTRIBUTE_UNUSED) {
   }
 
-  void DriverWrapper(std::string assembly_text, std::string test_name) {
+  void DriverWrapper(const std::string& assembly_text, const std::string& test_name) {
     assembler_->FinalizeCode();
     size_t cs = assembler_->CodeSize();
     std::unique_ptr<std::vector<uint8_t>> data(new std::vector<uint8_t>(cs));
diff --git a/compiler/utils/assembler_test_base.h b/compiler/utils/assembler_test_base.h
index 8c71292..ac24ee9 100644
--- a/compiler/utils/assembler_test_base.h
+++ b/compiler/utils/assembler_test_base.h
@@ -106,7 +106,9 @@
   // Driver() assembles and compares the results. If the results are not equal and we have a
   // disassembler, disassemble both and check whether they have the same mnemonics (in which case
   // we just warn).
-  void Driver(const std::vector<uint8_t>& data, std::string assembly_text, std::string test_name) {
+  void Driver(const std::vector<uint8_t>& data,
+              const std::string& assembly_text,
+              const std::string& test_name) {
     EXPECT_NE(assembly_text.length(), 0U) << "Empty assembly";
 
     NativeAssemblerResult res;
@@ -229,7 +231,7 @@
     bool success = Exec(args, error_msg);
     if (!success) {
       LOG(ERROR) << "Assembler command line:";
-      for (std::string arg : args) {
+      for (const std::string& arg : args) {
         LOG(ERROR) << arg;
       }
     }
@@ -238,7 +240,7 @@
 
   // Runs objdump -h on the binary file and extracts the first line with .text.
   // Returns "" on failure.
-  std::string Objdump(std::string file) {
+  std::string Objdump(const std::string& file) {
     bool have_objdump = FileExists(FindTool(objdump_cmd_name_));
     EXPECT_TRUE(have_objdump) << "Cannot find objdump: " << GetObjdumpCommand();
     if (!have_objdump) {
@@ -287,8 +289,9 @@
   }
 
   // Disassemble both binaries and compare the text.
-  bool DisassembleBinaries(const std::vector<uint8_t>& data, const std::vector<uint8_t>& as,
-                           std::string test_name) {
+  bool DisassembleBinaries(const std::vector<uint8_t>& data,
+                           const std::vector<uint8_t>& as,
+                           const std::string& test_name) {
     std::string disassembler = GetDisassembleCommand();
     if (disassembler.length() == 0) {
       LOG(WARNING) << "No dissassembler command.";
@@ -324,7 +327,7 @@
     return result;
   }
 
-  bool DisassembleBinary(std::string file, std::string* error_msg) {
+  bool DisassembleBinary(const std::string& file, std::string* error_msg) {
     std::vector<std::string> args;
 
     // Encaspulate the whole command line in a single string passed to
@@ -345,7 +348,7 @@
     return Exec(args, error_msg);
   }
 
-  std::string WriteToFile(const std::vector<uint8_t>& buffer, std::string test_name) {
+  std::string WriteToFile(const std::vector<uint8_t>& buffer, const std::string& test_name) {
     std::string file_name = GetTmpnam() + std::string("---") + test_name;
     const char* data = reinterpret_cast<const char*>(buffer.data());
     std::ofstream s_out(file_name + ".o");
@@ -354,7 +357,7 @@
     return file_name + ".o";
   }
 
-  bool CompareFiles(std::string f1, std::string f2) {
+  bool CompareFiles(const std::string& f1, const std::string& f2) {
     std::ifstream f1_in(f1);
     std::ifstream f2_in(f2);
 
@@ -369,7 +372,9 @@
   }
 
   // Compile the given assembly code and extract the binary, if possible. Put result into res.
-  bool Compile(std::string assembly_code, NativeAssemblerResult* res, std::string test_name) {
+  bool Compile(const std::string& assembly_code,
+               NativeAssemblerResult* res,
+               const std::string& test_name) {
     res->ok = false;
     res->code.reset(nullptr);
 
@@ -438,7 +443,7 @@
   // Check whether file exists. Is used for commands, so strips off any parameters: anything after
   // the first space. We skip to the last slash for this, so it should work with directories with
   // spaces.
-  static bool FileExists(std::string file) {
+  static bool FileExists(const std::string& file) {
     if (file.length() == 0) {
       return false;
     }
@@ -478,7 +483,7 @@
     return getcwd(temp, 1024) ? std::string(temp) + "/" : std::string("");
   }
 
-  std::string FindTool(std::string tool_name) {
+  std::string FindTool(const std::string& tool_name) {
     // Find the current tool. Wild-card pattern is "arch-string*tool-name".
     std::string gcc_path = GetRootPath() + GetGCCRootPath();
     std::vector<std::string> args;
@@ -522,7 +527,8 @@
 
   // Helper for below. If name_predicate is empty, search for all files, otherwise use it for the
   // "-name" option.
-  static void FindToolDumpPrintout(std::string name_predicate, std::string tmp_file) {
+  static void FindToolDumpPrintout(const std::string& name_predicate,
+                                   const std::string& tmp_file) {
     std::string gcc_path = GetRootPath() + GetGCCRootPath();
     std::vector<std::string> args;
     args.push_back("find");
@@ -562,7 +568,7 @@
   }
 
   // For debug purposes.
-  void FindToolDump(std::string tool_name) {
+  void FindToolDump(const std::string& tool_name) {
     // Check with the tool name.
     FindToolDumpPrintout(architecture_string_ + "*" + tool_name, GetTmpnam());
     FindToolDumpPrintout("", GetTmpnam());
diff --git a/compiler/utils/assembler_thumb_test.cc b/compiler/utils/assembler_thumb_test.cc
index 10bed13..50a1d9f 100644
--- a/compiler/utils/assembler_thumb_test.cc
+++ b/compiler/utils/assembler_thumb_test.cc
@@ -1753,7 +1753,10 @@
   __ LoadFromOffset(kLoadWordPair, R2, R4, 0x40400);
   __ LoadFromOffset(kLoadWordPair, R4, R4, 0x40400);
 
+  vixl::aarch32::UseScratchRegisterScope temps(assembler.asm_.GetVIXLAssembler());
+  temps.Exclude(R12);
   __ LoadFromOffset(kLoadWord, R0, R12, 12);  // 32-bit because of R12.
+  temps.Include(R12);
   __ LoadFromOffset(kLoadWord, R2, R4, 0xa4 - 0x100000);
 
   __ LoadFromOffset(kLoadSignedByte, R2, R4, 12);
@@ -1783,7 +1786,10 @@
   __ StoreToOffset(kStoreWordPair, R2, R4, 0x40400);
   __ StoreToOffset(kStoreWordPair, R4, R4, 0x40400);
 
+  vixl::aarch32::UseScratchRegisterScope temps(assembler.asm_.GetVIXLAssembler());
+  temps.Exclude(R12);
   __ StoreToOffset(kStoreWord, R0, R12, 12);  // 32-bit because of R12.
+  temps.Include(R12);
   __ StoreToOffset(kStoreWord, R2, R4, 0xa4 - 0x100000);
 
   __ StoreToOffset(kStoreByte, R2, R4, 12);
diff --git a/compiler/utils/atomic_method_ref_map-inl.h b/compiler/utils/atomic_method_ref_map-inl.h
new file mode 100644
index 0000000..70ea028
--- /dev/null
+++ b/compiler/utils/atomic_method_ref_map-inl.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_UTILS_ATOMIC_METHOD_REF_MAP_INL_H_
+#define ART_COMPILER_UTILS_ATOMIC_METHOD_REF_MAP_INL_H_
+
+#include "atomic_method_ref_map.h"
+
+#include "dex_file-inl.h"
+
+namespace art {
+
+template <typename T>
+inline typename AtomicMethodRefMap<T>::InsertResult AtomicMethodRefMap<T>::Insert(
+    MethodReference ref,
+    const T& expected,
+    const T& desired) {
+  ElementArray* const array = GetArray(ref.dex_file);
+  if (array == nullptr) {
+    return kInsertResultInvalidDexFile;
+  }
+  return (*array)[ref.dex_method_index].CompareExchangeStrongSequentiallyConsistent(
+      expected, desired)
+      ? kInsertResultSuccess
+      : kInsertResultCASFailure;
+}
+
+template <typename T>
+inline bool AtomicMethodRefMap<T>::Get(MethodReference ref, T* out) const {
+  const ElementArray* const array = GetArray(ref.dex_file);
+  if (array == nullptr) {
+    return kInsertResultInvalidDexFile;
+  }
+  *out = (*array)[ref.dex_method_index].LoadRelaxed();
+  return true;
+}
+
+template <typename T>
+inline void AtomicMethodRefMap<T>::AddDexFile(const DexFile* dex_file) {
+  arrays_.Put(dex_file, std::move(ElementArray(dex_file->NumMethodIds())));
+}
+
+template <typename T>
+inline typename AtomicMethodRefMap<T>::ElementArray* AtomicMethodRefMap<T>::GetArray(
+    const DexFile* dex_file) {
+  auto it = arrays_.find(dex_file);
+  return (it != arrays_.end()) ? &it->second : nullptr;
+}
+
+template <typename T>
+inline const typename AtomicMethodRefMap<T>::ElementArray* AtomicMethodRefMap<T>::GetArray(
+    const DexFile* dex_file) const {
+  auto it = arrays_.find(dex_file);
+  return (it != arrays_.end()) ? &it->second : nullptr;
+}
+
+template <typename T> template <typename Visitor>
+inline void AtomicMethodRefMap<T>::Visit(const Visitor& visitor) {
+  for (auto& pair : arrays_) {
+    const DexFile* dex_file = pair.first;
+    const ElementArray& elements = pair.second;
+    for (size_t i = 0; i < elements.size(); ++i) {
+      visitor(MethodReference(dex_file, i), elements[i].LoadRelaxed());
+    }
+  }
+}
+
+}  // namespace art
+
+#endif  // ART_COMPILER_UTILS_ATOMIC_METHOD_REF_MAP_INL_H_
diff --git a/compiler/utils/atomic_method_ref_map.h b/compiler/utils/atomic_method_ref_map.h
new file mode 100644
index 0000000..11ab211
--- /dev/null
+++ b/compiler/utils/atomic_method_ref_map.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_UTILS_ATOMIC_METHOD_REF_MAP_H_
+#define ART_COMPILER_UTILS_ATOMIC_METHOD_REF_MAP_H_
+
+#include "base/dchecked_vector.h"
+#include "method_reference.h"
+#include "safe_map.h"
+
+namespace art {
+
+class DexFile;
+
+// Used by CompilerCallbacks to track verification information from the Runtime.
+template <typename T>
+class AtomicMethodRefMap {
+ public:
+  explicit AtomicMethodRefMap() {}
+  ~AtomicMethodRefMap() {}
+
+  // Atomically swap the element in if the existing value matches expected.
+  enum InsertResult {
+    kInsertResultInvalidDexFile,
+    kInsertResultCASFailure,
+    kInsertResultSuccess,
+  };
+  InsertResult Insert(MethodReference ref, const T& expected, const T& desired);
+
+  // Retreive an item, returns false if the dex file is not added.
+  bool Get(MethodReference ref, T* out) const;
+
+  // Dex files must be added before method references belonging to them can be used as keys. Not
+  // thread safe.
+  void AddDexFile(const DexFile* dex_file);
+
+  bool HaveDexFile(const DexFile* dex_file) const {
+    return arrays_.find(dex_file) != arrays_.end();
+  }
+
+  // Visit all of the dex files and elements.
+  template <typename Visitor>
+  void Visit(const Visitor& visitor);
+
+ private:
+  // Verified methods. The method array is fixed to avoid needing a lock to extend it.
+  using ElementArray = dchecked_vector<Atomic<T>>;
+  using DexFileArrays = SafeMap<const DexFile*, ElementArray>;
+
+  const ElementArray* GetArray(const DexFile* dex_file) const;
+  ElementArray* GetArray(const DexFile* dex_file);
+
+  DexFileArrays arrays_;
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_UTILS_ATOMIC_METHOD_REF_MAP_H_
diff --git a/compiler/utils/atomic_method_ref_map_test.cc b/compiler/utils/atomic_method_ref_map_test.cc
new file mode 100644
index 0000000..9e5bf4b
--- /dev/null
+++ b/compiler/utils/atomic_method_ref_map_test.cc
@@ -0,0 +1,71 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "atomic_method_ref_map-inl.h"
+
+#include <memory>
+
+#include "common_runtime_test.h"
+#include "dex_file-inl.h"
+#include "method_reference.h"
+#include "scoped_thread_state_change-inl.h"
+
+namespace art {
+
+class AtomicMethodRefMapTest : public CommonRuntimeTest {};
+
+TEST_F(AtomicMethodRefMapTest, RunTests) {
+  ScopedObjectAccess soa(Thread::Current());
+  std::unique_ptr<const DexFile> dex(OpenTestDexFile("Interfaces"));
+  ASSERT_TRUE(dex != nullptr);
+  using Map = AtomicMethodRefMap<int>;
+  Map map;
+  int value = 123;
+  // Error case: Not already inserted.
+  EXPECT_FALSE(map.Get(MethodReference(dex.get(), 1), &value));
+  EXPECT_FALSE(map.HaveDexFile(dex.get()));
+  // Error case: Dex file not registered.
+  EXPECT_TRUE(map.Insert(MethodReference(dex.get(), 1), 0, 1) == Map::kInsertResultInvalidDexFile);
+  map.AddDexFile(dex.get());
+  EXPECT_TRUE(map.HaveDexFile(dex.get()));
+  EXPECT_GT(dex->NumMethodIds(), 10u);
+  // After we have added the get should succeed but return the default value.
+  EXPECT_TRUE(map.Get(MethodReference(dex.get(), 1), &value));
+  EXPECT_EQ(value, 0);
+  // Actually insert an item and make sure we can retreive it.
+  static const int kInsertValue = 44;
+  EXPECT_TRUE(map.Insert(MethodReference(dex.get(), 1), 0, kInsertValue) ==
+              Map::kInsertResultSuccess);
+  EXPECT_TRUE(map.Get(MethodReference(dex.get(), 1), &value));
+  EXPECT_EQ(value, kInsertValue);
+  static const int kInsertValue2 = 123;
+  EXPECT_TRUE(map.Insert(MethodReference(dex.get(), 2), 0, kInsertValue2) ==
+              Map::kInsertResultSuccess);
+  EXPECT_TRUE(map.Get(MethodReference(dex.get(), 1), &value));
+  EXPECT_EQ(value, kInsertValue);
+  EXPECT_TRUE(map.Get(MethodReference(dex.get(), 2), &value));
+  EXPECT_EQ(value, kInsertValue2);
+  // Error case: Incorrect expected value for CAS.
+  EXPECT_TRUE(map.Insert(MethodReference(dex.get(), 1), 0, kInsertValue + 1) ==
+      Map::kInsertResultCASFailure);
+  // Correctly overwrite the value and verify.
+  EXPECT_TRUE(map.Insert(MethodReference(dex.get(), 1), kInsertValue, kInsertValue + 1) ==
+      Map::kInsertResultSuccess);
+  EXPECT_TRUE(map.Get(MethodReference(dex.get(), 1), &value));
+  EXPECT_EQ(value, kInsertValue + 1);
+}
+
+}  // namespace art
diff --git a/compiler/utils/jni_macro_assembler_test.h b/compiler/utils/jni_macro_assembler_test.h
index 829f34b..293f4cd 100644
--- a/compiler/utils/jni_macro_assembler_test.h
+++ b/compiler/utils/jni_macro_assembler_test.h
@@ -39,12 +39,12 @@
 
   typedef std::string (*TestFn)(JNIMacroAssemblerTest* assembler_test, Ass* assembler);
 
-  void DriverFn(TestFn f, std::string test_name) {
+  void DriverFn(TestFn f, const std::string& test_name) {
     DriverWrapper(f(this, assembler_.get()), test_name);
   }
 
   // This driver assumes the assembler has already been called.
-  void DriverStr(std::string assembly_string, std::string test_name) {
+  void DriverStr(const std::string& assembly_string, const std::string& test_name) {
     DriverWrapper(assembly_string, test_name);
   }
 
@@ -128,7 +128,7 @@
   virtual void Pad(std::vector<uint8_t>& data ATTRIBUTE_UNUSED) {
   }
 
-  void DriverWrapper(std::string assembly_text, std::string test_name) {
+  void DriverWrapper(const std::string& assembly_text, const std::string& test_name) {
     assembler_->FinalizeCode();
     size_t cs = assembler_->CodeSize();
     std::unique_ptr<std::vector<uint8_t>> data(new std::vector<uint8_t>(cs));
diff --git a/compiler/utils/managed_register.h b/compiler/utils/managed_register.h
index 46adb3f..184cdf5 100644
--- a/compiler/utils/managed_register.h
+++ b/compiler/utils/managed_register.h
@@ -17,8 +17,11 @@
 #ifndef ART_COMPILER_UTILS_MANAGED_REGISTER_H_
 #define ART_COMPILER_UTILS_MANAGED_REGISTER_H_
 
+#include <type_traits>
 #include <vector>
 
+#include "base/value_object.h"
+
 namespace art {
 
 namespace arm {
@@ -42,17 +45,14 @@
 class X86_64ManagedRegister;
 }
 
-class ManagedRegister {
+class ManagedRegister : public ValueObject {
  public:
   // ManagedRegister is a value class. There exists no method to change the
   // internal state. We therefore allow a copy constructor and an
   // assignment-operator.
-  constexpr ManagedRegister(const ManagedRegister& other) : id_(other.id_) { }
+  constexpr ManagedRegister(const ManagedRegister& other) = default;
 
-  ManagedRegister& operator=(const ManagedRegister& other) {
-    id_ = other.id_;
-    return *this;
-  }
+  ManagedRegister& operator=(const ManagedRegister& other) = default;
 
   constexpr arm::ArmManagedRegister AsArm() const;
   constexpr arm64::Arm64ManagedRegister AsArm64() const;
@@ -85,6 +85,9 @@
   int id_;
 };
 
+static_assert(std::is_trivially_copyable<ManagedRegister>::value,
+              "ManagedRegister should be trivially copyable");
+
 class ManagedRegisterSpill : public ManagedRegister {
  public:
   // ManagedRegisterSpill contains information about data type size and location in caller frame
@@ -115,18 +118,18 @@
  public:
   // The ManagedRegister does not have information about size and offset.
   // In this case it's size and offset determined by BuildFrame (assembler)
-  void push_back(ManagedRegister __x) {
-    ManagedRegisterSpill spill(__x);
+  void push_back(ManagedRegister x) {
+    ManagedRegisterSpill spill(x);
     std::vector<ManagedRegisterSpill>::push_back(spill);
   }
 
-  void push_back(ManagedRegister __x, int32_t __size) {
-    ManagedRegisterSpill spill(__x, __size);
+  void push_back(ManagedRegister x, int32_t size) {
+    ManagedRegisterSpill spill(x, size);
     std::vector<ManagedRegisterSpill>::push_back(spill);
   }
 
-  void push_back(ManagedRegisterSpill __x) {
-    std::vector<ManagedRegisterSpill>::push_back(__x);
+  void push_back(ManagedRegisterSpill x) {
+    std::vector<ManagedRegisterSpill>::push_back(x);
   }
  private:
 };
diff --git a/compiler/utils/mips/assembler_mips32r6_test.cc b/compiler/utils/mips/assembler_mips32r6_test.cc
index 3ef2f94..a52f519 100644
--- a/compiler/utils/mips/assembler_mips32r6_test.cc
+++ b/compiler/utils/mips/assembler_mips32r6_test.cc
@@ -219,7 +219,7 @@
   void BranchCondTwoRegsHelper(void (mips::MipsAssembler::*f)(mips::Register,
                                                               mips::Register,
                                                               mips::MipsLabel*),
-                               std::string instr_name) {
+                               const std::string& instr_name) {
     mips::MipsLabel label;
     (Base::GetAssembler()->*f)(mips::A0, mips::A1, &label);
     constexpr size_t kAdduCount1 = 63;
diff --git a/compiler/utils/mips/assembler_mips_test.cc b/compiler/utils/mips/assembler_mips_test.cc
index 75149cf..c24e1b1 100644
--- a/compiler/utils/mips/assembler_mips_test.cc
+++ b/compiler/utils/mips/assembler_mips_test.cc
@@ -188,7 +188,7 @@
 
   void BranchCondOneRegHelper(void (mips::MipsAssembler::*f)(mips::Register,
                                                              mips::MipsLabel*),
-                              std::string instr_name) {
+                              const std::string& instr_name) {
     mips::MipsLabel label;
     (Base::GetAssembler()->*f)(mips::A0, &label);
     constexpr size_t kAdduCount1 = 63;
@@ -217,7 +217,7 @@
   void BranchCondTwoRegsHelper(void (mips::MipsAssembler::*f)(mips::Register,
                                                               mips::Register,
                                                               mips::MipsLabel*),
-                               std::string instr_name) {
+                               const std::string& instr_name) {
     mips::MipsLabel label;
     (Base::GetAssembler()->*f)(mips::A0, mips::A1, &label);
     constexpr size_t kAdduCount1 = 63;
diff --git a/compiler/utils/mips64/assembler_mips64_test.cc b/compiler/utils/mips64/assembler_mips64_test.cc
index 1fdef96..ba8f25e 100644
--- a/compiler/utils/mips64/assembler_mips64_test.cc
+++ b/compiler/utils/mips64/assembler_mips64_test.cc
@@ -212,7 +212,7 @@
 
   void BranchCondOneRegHelper(void (mips64::Mips64Assembler::*f)(mips64::GpuRegister,
                                                                  mips64::Mips64Label*),
-                              std::string instr_name) {
+                              const std::string& instr_name) {
     mips64::Mips64Label label;
     (Base::GetAssembler()->*f)(mips64::A0, &label);
     constexpr size_t kAdduCount1 = 63;
@@ -241,7 +241,7 @@
   void BranchCondTwoRegsHelper(void (mips64::Mips64Assembler::*f)(mips64::GpuRegister,
                                                                   mips64::GpuRegister,
                                                                   mips64::Mips64Label*),
-                               std::string instr_name) {
+                               const std::string& instr_name) {
     mips64::Mips64Label label;
     (Base::GetAssembler()->*f)(mips64::A0, mips64::A1, &label);
     constexpr size_t kAdduCount1 = 63;
diff --git a/compiler/utils/string_reference_test.cc b/compiler/utils/string_reference_test.cc
index 0fd9e5b..90335eb 100644
--- a/compiler/utils/string_reference_test.cc
+++ b/compiler/utils/string_reference_test.cc
@@ -18,6 +18,7 @@
 
 #include <memory>
 
+#include "dex_file_types.h"
 #include "gtest/gtest.h"
 #include "utils/test_dex_file_builder.h"
 
@@ -34,15 +35,15 @@
   builder1.AddString("String1");
   std::unique_ptr<const DexFile> dex_file1 = builder1.Build("dummy location 1");
   ASSERT_EQ(1u, dex_file1->NumStringIds());
-  ASSERT_STREQ("String1", dex_file1->GetStringData(dex_file1->GetStringId(0)));
-  StringReference sr1(dex_file1.get(), 0);
+  ASSERT_STREQ("String1", dex_file1->GetStringData(dex_file1->GetStringId(dex::StringIndex(0))));
+  StringReference sr1(dex_file1.get(), dex::StringIndex(0));
 
   TestDexFileBuilder builder2;
   builder2.AddString("String2");
   std::unique_ptr<const DexFile> dex_file2 = builder2.Build("dummy location 2");
   ASSERT_EQ(1u, dex_file2->NumStringIds());
-  ASSERT_STREQ("String2", dex_file2->GetStringData(dex_file2->GetStringId(0)));
-  StringReference sr2(dex_file2.get(), 0);
+  ASSERT_STREQ("String2", dex_file2->GetStringData(dex_file2->GetStringId(dex::StringIndex(0))));
+  StringReference sr2(dex_file2.get(), dex::StringIndex(0));
 
   StringReferenceValueComparator cmp;
   EXPECT_TRUE(cmp(sr1, sr2));  // "String1" < "String2" is true.
@@ -80,7 +81,8 @@
   std::unique_ptr<const DexFile> dex_file1 = builder1.Build("dummy location 1");
   ASSERT_EQ(arraysize(kDexFile1Strings), dex_file1->NumStringIds());
   for (size_t index = 0; index != arraysize(kDexFile1Strings); ++index) {
-    ASSERT_STREQ(kDexFile1Strings[index], dex_file1->GetStringData(dex_file1->GetStringId(index)));
+    ASSERT_STREQ(kDexFile1Strings[index],
+                 dex_file1->GetStringData(dex_file1->GetStringId(dex::StringIndex(index))));
   }
 
   TestDexFileBuilder builder2;
@@ -90,14 +92,15 @@
   std::unique_ptr<const DexFile> dex_file2 = builder2.Build("dummy location 1");
   ASSERT_EQ(arraysize(kDexFile2Strings), dex_file2->NumStringIds());
   for (size_t index = 0; index != arraysize(kDexFile2Strings); ++index) {
-    ASSERT_STREQ(kDexFile2Strings[index], dex_file2->GetStringData(dex_file2->GetStringId(index)));
+    ASSERT_STREQ(kDexFile2Strings[index],
+                 dex_file2->GetStringData(dex_file2->GetStringId(dex::StringIndex(index))));
   }
 
   StringReferenceValueComparator cmp;
   for (size_t index1 = 0; index1 != arraysize(kDexFile1Strings); ++index1) {
     for (size_t index2 = 0; index2 != arraysize(kDexFile2Strings); ++index2) {
-      StringReference sr1(dex_file1.get(), index1);
-      StringReference sr2(dex_file2.get(), index2);
+      StringReference sr1(dex_file1.get(), dex::StringIndex(index1));
+      StringReference sr2(dex_file2.get(), dex::StringIndex(index2));
       EXPECT_EQ(expectedCmp12[index1][index2], cmp(sr1, sr2)) << index1 << " " << index2;
       EXPECT_EQ(expectedCmp21[index2][index1], cmp(sr2, sr1)) << index1 << " " << index2;
     }
diff --git a/compiler/utils/test_dex_file_builder_test.cc b/compiler/utils/test_dex_file_builder_test.cc
index da4ac79..c76739b 100644
--- a/compiler/utils/test_dex_file_builder_test.cc
+++ b/compiler/utils/test_dex_file_builder_test.cc
@@ -49,7 +49,8 @@
   };
   ASSERT_EQ(arraysize(expected_strings), dex_file->NumStringIds());
   for (size_t i = 0; i != arraysize(expected_strings); ++i) {
-    EXPECT_STREQ(expected_strings[i], dex_file->GetStringData(dex_file->GetStringId(i))) << i;
+    EXPECT_STREQ(expected_strings[i],
+                 dex_file->GetStringData(dex_file->GetStringId(dex::StringIndex(i)))) << i;
   }
 
   static const char* const expected_types[] = {
@@ -62,7 +63,8 @@
   };
   ASSERT_EQ(arraysize(expected_types), dex_file->NumTypeIds());
   for (size_t i = 0; i != arraysize(expected_types); ++i) {
-    EXPECT_STREQ(expected_types[i], dex_file->GetTypeDescriptor(dex_file->GetTypeId(i))) << i;
+    EXPECT_STREQ(expected_types[i],
+                 dex_file->GetTypeDescriptor(dex_file->GetTypeId(dex::TypeIndex(i)))) << i;
   }
 
   ASSERT_EQ(1u, dex_file->NumFieldIds());
diff --git a/compiler/utils/type_reference.h b/compiler/utils/type_reference.h
index d0c1656..a0fa1a4 100644
--- a/compiler/utils/type_reference.h
+++ b/compiler/utils/type_reference.h
@@ -20,6 +20,7 @@
 #include <stdint.h>
 
 #include "base/logging.h"
+#include "dex_file_types.h"
 #include "string_reference.h"
 
 namespace art {
@@ -28,10 +29,10 @@
 
 // A type is located by its DexFile and the string_ids_ table index into that DexFile.
 struct TypeReference {
-  TypeReference(const DexFile* file, uint32_t index) : dex_file(file), type_index(index) { }
+  TypeReference(const DexFile* file, dex::TypeIndex index) : dex_file(file), type_index(index) { }
 
   const DexFile* dex_file;
-  uint32_t type_index;
+  dex::TypeIndex type_index;
 };
 
 // Compare the actual referenced type names. Used for type reference deduplication.
diff --git a/compiler/verifier_deps_test.cc b/compiler/verifier_deps_test.cc
index 9664e43..90fe6da 100644
--- a/compiler/verifier_deps_test.cc
+++ b/compiler/verifier_deps_test.cc
@@ -23,6 +23,7 @@
 #include "compiler/driver/compiler_driver.h"
 #include "compiler_callbacks.h"
 #include "dex_file.h"
+#include "dex_file_types.h"
 #include "handle_scope-inl.h"
 #include "verifier/method_verifier-inl.h"
 #include "mirror/class_loader.h"
@@ -72,6 +73,25 @@
     return klass;
   }
 
+  void SetupCompilerDriver() {
+    compiler_options_->boot_image_ = false;
+    compiler_driver_->InitializeThreadPools();
+  }
+
+  void VerifyWithCompilerDriver(verifier::VerifierDeps* deps) {
+    TimingLogger timings("Verify", false, false);
+    // The compiler driver handles the verifier deps in the callbacks, so
+    // remove what this class did for unit testing.
+    verifier_deps_.reset(nullptr);
+    callbacks_->SetVerifierDeps(deps);
+    compiler_driver_->Verify(class_loader_, dex_files_, &timings);
+    // The compiler driver may have updated the VerifierDeps in the callback object.
+    if (callbacks_->GetVerifierDeps() != deps) {
+      verifier_deps_.reset(callbacks_->GetVerifierDeps());
+    }
+    callbacks_->SetVerifierDeps(nullptr);
+  }
+
   void SetVerifierDeps(const std::vector<const DexFile*>& dex_files) {
     verifier_deps_.reset(new verifier::VerifierDeps(dex_files));
     VerifierDepsCompilerCallbacks* callbacks =
@@ -79,17 +99,24 @@
     callbacks->SetVerifierDeps(verifier_deps_.get());
   }
 
+  void LoadDexFile(ScopedObjectAccess* soa, const char* name1, const char* name2 = nullptr)
+      REQUIRES_SHARED(Locks::mutator_lock_) {
+    class_loader_ = (name2 == nullptr) ? LoadDex(name1) : LoadMultiDex(name1, name2);
+    dex_files_ = GetDexFiles(class_loader_);
+    primary_dex_file_ = dex_files_.front();
+
+    SetVerifierDeps(dex_files_);
+    StackHandleScope<1> hs(soa->Self());
+    Handle<mirror::ClassLoader> loader =
+        hs.NewHandle(soa->Decode<mirror::ClassLoader>(class_loader_));
+    for (const DexFile* dex_file : dex_files_) {
+      class_linker_->RegisterDexFile(*dex_file, loader.Get());
+    }
+  }
+
   void LoadDexFile(ScopedObjectAccess* soa) REQUIRES_SHARED(Locks::mutator_lock_) {
-    class_loader_ = LoadDex("VerifierDeps");
-    std::vector<const DexFile*> dex_files = GetDexFiles(class_loader_);
-    CHECK_EQ(dex_files.size(), 1u);
-    dex_file_ = dex_files.front();
-
-    SetVerifierDeps(dex_files);
-
-    ObjPtr<mirror::ClassLoader> loader = soa->Decode<mirror::ClassLoader>(class_loader_);
-    class_linker_->RegisterDexFile(*dex_file_, loader.Ptr());
-
+    LoadDexFile(soa, "VerifierDeps");
+    CHECK_EQ(dex_files_.size(), 1u);
     klass_Main_ = FindClassByName("LMain;", soa);
     CHECK(klass_Main_ != nullptr);
   }
@@ -98,16 +125,16 @@
     ScopedObjectAccess soa(Thread::Current());
     LoadDexFile(&soa);
 
-    StackHandleScope<2> hs(Thread::Current());
+    StackHandleScope<2> hs(soa.Self());
     Handle<mirror::ClassLoader> class_loader_handle(
         hs.NewHandle(soa.Decode<mirror::ClassLoader>(class_loader_)));
     Handle<mirror::DexCache> dex_cache_handle(hs.NewHandle(klass_Main_->GetDexCache()));
 
     const DexFile::ClassDef* class_def = klass_Main_->GetClassDef();
-    const uint8_t* class_data = dex_file_->GetClassData(*class_def);
+    const uint8_t* class_data = primary_dex_file_->GetClassData(*class_def);
     CHECK(class_data != nullptr);
 
-    ClassDataItemIterator it(*dex_file_, class_data);
+    ClassDataItemIterator it(*primary_dex_file_, class_data);
     while (it.HasNextStaticField() || it.HasNextInstanceField()) {
       it.Next();
     }
@@ -115,7 +142,7 @@
     ArtMethod* method = nullptr;
     while (it.HasNextDirectMethod()) {
       ArtMethod* resolved_method = class_linker_->ResolveMethod<ClassLinker::kNoICCECheckForCache>(
-          *dex_file_,
+          *primary_dex_file_,
           it.GetMemberIndex(),
           dex_cache_handle,
           class_loader_handle,
@@ -130,8 +157,9 @@
     }
     CHECK(method != nullptr);
 
+    Thread::Current()->SetVerifierDeps(callbacks_->GetVerifierDeps());
     MethodVerifier verifier(Thread::Current(),
-                            dex_file_,
+                            primary_dex_file_,
                             dex_cache_handle,
                             class_loader_handle,
                             *class_def,
@@ -145,22 +173,17 @@
                             false /* verify to dump */,
                             true /* allow_thread_suspension */);
     verifier.Verify();
+    Thread::Current()->SetVerifierDeps(nullptr);
     return !verifier.HasFailures();
   }
 
-  void VerifyDexFile() {
-    std::string error_msg;
+  void VerifyDexFile(const char* multidex = nullptr) {
     {
       ScopedObjectAccess soa(Thread::Current());
-      LoadDexFile(&soa);
+      LoadDexFile(&soa, "VerifierDeps", multidex);
     }
-    SetVerifierDeps({ dex_file_ });
-    TimingLogger timings("Verify", false, false);
-    std::vector<const DexFile*> dex_files;
-    dex_files.push_back(dex_file_);
-    compiler_options_->boot_image_ = false;
-    compiler_driver_->InitializeThreadPools();
-    compiler_driver_->Verify(class_loader_, dex_files, &timings);
+    SetupCompilerDriver();
+    VerifyWithCompilerDriver(/* verifier_deps */ nullptr);
   }
 
   bool TestAssignabilityRecording(const std::string& dst,
@@ -173,7 +196,7 @@
     DCHECK(klass_dst != nullptr);
     mirror::Class* klass_src = FindClassByName(src, &soa);
     DCHECK(klass_src != nullptr);
-    verifier_deps_->AddAssignability(*dex_file_,
+    verifier_deps_->AddAssignability(*primary_dex_file_,
                                      klass_dst,
                                      klass_src,
                                      is_strict,
@@ -181,13 +204,39 @@
     return true;
   }
 
+  // Check that the status of classes in `class_loader_` match the
+  // expected status in `deps`.
+  void VerifyClassStatus(const verifier::VerifierDeps& deps) {
+    ScopedObjectAccess soa(Thread::Current());
+    StackHandleScope<2> hs(soa.Self());
+    Handle<mirror::ClassLoader> class_loader_handle(
+        hs.NewHandle(soa.Decode<mirror::ClassLoader>(class_loader_)));
+    MutableHandle<mirror::Class> cls(hs.NewHandle<mirror::Class>(nullptr));
+    for (const DexFile* dex_file : dex_files_) {
+      const std::vector<dex::TypeIndex>& unverified_classes = deps.GetUnverifiedClasses(*dex_file);
+      std::set<dex::TypeIndex> set(unverified_classes.begin(), unverified_classes.end());
+      for (uint32_t i = 0; i < dex_file->NumClassDefs(); ++i) {
+        const DexFile::ClassDef& class_def = dex_file->GetClassDef(i);
+        const char* descriptor = dex_file->GetClassDescriptor(class_def);
+        cls.Assign(class_linker_->FindClass(soa.Self(), descriptor, class_loader_handle));
+        if (cls.Get() == nullptr) {
+          CHECK(soa.Self()->IsExceptionPending());
+          soa.Self()->ClearException();
+        } else if (set.find(class_def.class_idx_) == set.end()) {
+          ASSERT_EQ(cls->GetStatus(), mirror::Class::kStatusVerified);
+        } else {
+          ASSERT_LT(cls->GetStatus(), mirror::Class::kStatusVerified);
+        }
+      }
+    }
+  }
+
   bool HasUnverifiedClass(const std::string& cls) {
-    const DexFile::TypeId* type_id = dex_file_->FindTypeId(cls.c_str());
+    const DexFile::TypeId* type_id = primary_dex_file_->FindTypeId(cls.c_str());
     DCHECK(type_id != nullptr);
-    uint16_t index = dex_file_->GetIndexForTypeId(*type_id);
-    MutexLock mu(Thread::Current(), *Locks::verifier_deps_lock_);
+    dex::TypeIndex index = primary_dex_file_->GetIndexForTypeId(*type_id);
     for (const auto& dex_dep : verifier_deps_->dex_deps_) {
-      for (uint16_t entry : dex_dep.second->unverified_classes_) {
+      for (dex::TypeIndex entry : dex_dep.second->unverified_classes_) {
         if (index == entry) {
           return true;
         }
@@ -201,7 +250,6 @@
   bool HasAssignable(const std::string& expected_destination,
                      const std::string& expected_source,
                      bool expected_is_assignable) {
-    MutexLock mu(Thread::Current(), *Locks::verifier_deps_lock_);
     for (auto& dex_dep : verifier_deps_->dex_deps_) {
       const DexFile& dex_file = *dex_dep.first;
       auto& storage = expected_is_assignable ? dex_dep.second->assignable_types_
@@ -223,7 +271,6 @@
   bool HasClass(const std::string& expected_klass,
                 bool expected_resolved,
                 const std::string& expected_access_flags = "") {
-    MutexLock mu(Thread::Current(), *Locks::verifier_deps_lock_);
     for (auto& dex_dep : verifier_deps_->dex_deps_) {
       for (auto& entry : dex_dep.second->classes_) {
         if (expected_resolved != entry.IsResolved()) {
@@ -258,7 +305,6 @@
                 bool expected_resolved,
                 const std::string& expected_access_flags = "",
                 const std::string& expected_decl_klass = "") {
-    MutexLock mu(Thread::Current(), *Locks::verifier_deps_lock_);
     for (auto& dex_dep : verifier_deps_->dex_deps_) {
       for (auto& entry : dex_dep.second->fields_) {
         if (expected_resolved != entry.IsResolved()) {
@@ -312,7 +358,6 @@
                  bool expected_resolved,
                  const std::string& expected_access_flags = "",
                  const std::string& expected_decl_klass = "") {
-    MutexLock mu(Thread::Current(), *Locks::verifier_deps_lock_);
     for (auto& dex_dep : verifier_deps_->dex_deps_) {
       auto& storage = (expected_kind == "direct") ? dex_dep.second->direct_methods_
                           : (expected_kind == "virtual") ? dex_dep.second->virtual_methods_
@@ -361,13 +406,10 @@
   }
 
   size_t NumberOfCompiledDexFiles() {
-    MutexLock mu(Thread::Current(), *Locks::verifier_deps_lock_);
     return verifier_deps_->dex_deps_.size();
   }
 
   size_t HasEachKindOfRecord() {
-    MutexLock mu(Thread::Current(), *Locks::verifier_deps_lock_);
-
     bool has_strings = false;
     bool has_assignability = false;
     bool has_classes = false;
@@ -395,8 +437,21 @@
            has_unverified_classes;
   }
 
+  static std::set<VerifierDeps::MethodResolution>* GetMethods(
+      VerifierDeps::DexFileDeps* deps, MethodResolutionKind resolution_kind) {
+    if (resolution_kind == kDirectMethodResolution) {
+      return &deps->direct_methods_;
+    } else if (resolution_kind == kVirtualMethodResolution) {
+      return &deps->virtual_methods_;
+    } else {
+      DCHECK_EQ(resolution_kind, kInterfaceMethodResolution);
+      return &deps->interface_methods_;
+    }
+  }
+
   std::unique_ptr<verifier::VerifierDeps> verifier_deps_;
-  const DexFile* dex_file_;
+  std::vector<const DexFile*> dex_files_;
+  const DexFile* primary_dex_file_;
   jobject class_loader_;
   mirror::Class* klass_Main_;
 };
@@ -405,23 +460,21 @@
   ScopedObjectAccess soa(Thread::Current());
   LoadDexFile(&soa);
 
-  MutexLock mu(Thread::Current(), *Locks::verifier_deps_lock_);
+  dex::StringIndex id_Main1 = verifier_deps_->GetIdFromString(*primary_dex_file_, "LMain;");
+  ASSERT_LT(id_Main1.index_, primary_dex_file_->NumStringIds());
+  ASSERT_EQ("LMain;", verifier_deps_->GetStringFromId(*primary_dex_file_, id_Main1));
 
-  uint32_t id_Main1 = verifier_deps_->GetIdFromString(*dex_file_, "LMain;");
-  ASSERT_LT(id_Main1, dex_file_->NumStringIds());
-  ASSERT_EQ("LMain;", verifier_deps_->GetStringFromId(*dex_file_, id_Main1));
+  dex::StringIndex id_Main2 = verifier_deps_->GetIdFromString(*primary_dex_file_, "LMain;");
+  ASSERT_LT(id_Main2.index_, primary_dex_file_->NumStringIds());
+  ASSERT_EQ("LMain;", verifier_deps_->GetStringFromId(*primary_dex_file_, id_Main2));
 
-  uint32_t id_Main2 = verifier_deps_->GetIdFromString(*dex_file_, "LMain;");
-  ASSERT_LT(id_Main2, dex_file_->NumStringIds());
-  ASSERT_EQ("LMain;", verifier_deps_->GetStringFromId(*dex_file_, id_Main2));
+  dex::StringIndex id_Lorem1 = verifier_deps_->GetIdFromString(*primary_dex_file_, "Lorem ipsum");
+  ASSERT_GE(id_Lorem1.index_, primary_dex_file_->NumStringIds());
+  ASSERT_EQ("Lorem ipsum", verifier_deps_->GetStringFromId(*primary_dex_file_, id_Lorem1));
 
-  uint32_t id_Lorem1 = verifier_deps_->GetIdFromString(*dex_file_, "Lorem ipsum");
-  ASSERT_GE(id_Lorem1, dex_file_->NumStringIds());
-  ASSERT_EQ("Lorem ipsum", verifier_deps_->GetStringFromId(*dex_file_, id_Lorem1));
-
-  uint32_t id_Lorem2 = verifier_deps_->GetIdFromString(*dex_file_, "Lorem ipsum");
-  ASSERT_GE(id_Lorem2, dex_file_->NumStringIds());
-  ASSERT_EQ("Lorem ipsum", verifier_deps_->GetStringFromId(*dex_file_, id_Lorem2));
+  dex::StringIndex id_Lorem2 = verifier_deps_->GetIdFromString(*primary_dex_file_, "Lorem ipsum");
+  ASSERT_GE(id_Lorem2.index_, primary_dex_file_->NumStringIds());
+  ASSERT_EQ("Lorem ipsum", verifier_deps_->GetStringFromId(*primary_dex_file_, id_Lorem2));
 
   ASSERT_EQ(id_Main1, id_Main2);
   ASSERT_EQ(id_Lorem1, id_Lorem2);
@@ -1068,13 +1121,41 @@
   ASSERT_TRUE(HasEachKindOfRecord());
 
   std::vector<uint8_t> buffer;
-  verifier_deps_->Encode(&buffer);
+  verifier_deps_->Encode(dex_files_, &buffer);
   ASSERT_FALSE(buffer.empty());
 
-  VerifierDeps decoded_deps({ dex_file_ }, ArrayRef<uint8_t>(buffer));
+  VerifierDeps decoded_deps(dex_files_, ArrayRef<const uint8_t>(buffer));
   ASSERT_TRUE(verifier_deps_->Equals(decoded_deps));
 }
 
+TEST_F(VerifierDepsTest, EncodeDecodeMulti) {
+  VerifyDexFile("MultiDex");
+
+  ASSERT_GT(NumberOfCompiledDexFiles(), 1u);
+  std::vector<uint8_t> buffer;
+  verifier_deps_->Encode(dex_files_, &buffer);
+  ASSERT_FALSE(buffer.empty());
+
+  // Create new DexFile, to mess with std::map order: the verifier deps used
+  // to iterate over the map, which doesn't guarantee insertion order. We fixed
+  // this by passing the expected order when encoding/decoding.
+  std::vector<std::unique_ptr<const DexFile>> first_dex_files = OpenTestDexFiles("VerifierDeps");
+  std::vector<std::unique_ptr<const DexFile>> second_dex_files = OpenTestDexFiles("MultiDex");
+  std::vector<const DexFile*> dex_files;
+  for (auto& dex_file : first_dex_files) {
+    dex_files.push_back(dex_file.get());
+  }
+  for (auto& dex_file : second_dex_files) {
+    dex_files.push_back(dex_file.get());
+  }
+
+  // Dump the new verifier deps to ensure it can properly read the data.
+  VerifierDeps decoded_deps(dex_files, ArrayRef<const uint8_t>(buffer));
+  std::ostringstream stream;
+  VariableIndentationOutputStream os(&stream);
+  decoded_deps.Dump(&os);
+}
+
 TEST_F(VerifierDepsTest, UnverifiedClasses) {
   VerifyDexFile();
   ASSERT_FALSE(HasUnverifiedClass("LMyThread;"));
@@ -1088,5 +1169,362 @@
   ASSERT_TRUE(HasUnverifiedClass("LMyClassWithNoSuperButFailures;"));
 }
 
+// Returns the next resolution kind in the enum.
+static MethodResolutionKind GetNextResolutionKind(MethodResolutionKind resolution_kind) {
+  if (resolution_kind == kDirectMethodResolution) {
+    return kVirtualMethodResolution;
+  } else if (resolution_kind == kVirtualMethodResolution) {
+    return kInterfaceMethodResolution;
+  } else {
+    DCHECK_EQ(resolution_kind, kInterfaceMethodResolution);
+    return kDirectMethodResolution;
+  }
+}
+
+TEST_F(VerifierDepsTest, VerifyDeps) {
+  VerifyDexFile();
+
+  ASSERT_EQ(1u, NumberOfCompiledDexFiles());
+  ASSERT_TRUE(HasEachKindOfRecord());
+
+  // When validating, we create a new class loader, as
+  // the existing `class_loader_` may contain erroneous classes,
+  // that ClassLinker::FindClass won't return.
+
+  ScopedObjectAccess soa(Thread::Current());
+  StackHandleScope<1> hs(soa.Self());
+  MutableHandle<mirror::ClassLoader> new_class_loader(hs.NewHandle<mirror::ClassLoader>(nullptr));
+  {
+    new_class_loader.Assign(soa.Decode<mirror::ClassLoader>(LoadDex("VerifierDeps")));
+    ASSERT_TRUE(verifier_deps_->ValidateDependencies(new_class_loader, soa.Self()));
+  }
+
+  std::vector<uint8_t> buffer;
+  verifier_deps_->Encode(dex_files_, &buffer);
+  ASSERT_FALSE(buffer.empty());
+
+  {
+    VerifierDeps decoded_deps(dex_files_, ArrayRef<const uint8_t>(buffer));
+    new_class_loader.Assign(soa.Decode<mirror::ClassLoader>(LoadDex("VerifierDeps")));
+    ASSERT_TRUE(decoded_deps.ValidateDependencies(new_class_loader, soa.Self()));
+  }
+
+  // Fiddle with the dependencies to make sure we catch any change and fail to verify.
+
+  {
+    // Mess up with the assignable_types.
+    VerifierDeps decoded_deps(dex_files_, ArrayRef<const uint8_t>(buffer));
+    VerifierDeps::DexFileDeps* deps = decoded_deps.GetDexFileDeps(*primary_dex_file_);
+    deps->assignable_types_.insert(*deps->unassignable_types_.begin());
+    new_class_loader.Assign(soa.Decode<mirror::ClassLoader>(LoadDex("VerifierDeps")));
+    ASSERT_FALSE(decoded_deps.ValidateDependencies(new_class_loader, soa.Self()));
+  }
+
+  {
+    // Mess up with the unassignable_types.
+    VerifierDeps decoded_deps(dex_files_, ArrayRef<const uint8_t>(buffer));
+    VerifierDeps::DexFileDeps* deps = decoded_deps.GetDexFileDeps(*primary_dex_file_);
+    deps->unassignable_types_.insert(*deps->assignable_types_.begin());
+    new_class_loader.Assign(soa.Decode<mirror::ClassLoader>(LoadDex("VerifierDeps")));
+    ASSERT_FALSE(decoded_deps.ValidateDependencies(new_class_loader, soa.Self()));
+  }
+
+  // Mess up with classes.
+  {
+    VerifierDeps decoded_deps(dex_files_, ArrayRef<const uint8_t>(buffer));
+    VerifierDeps::DexFileDeps* deps = decoded_deps.GetDexFileDeps(*primary_dex_file_);
+    bool found = false;
+    for (const auto& entry : deps->classes_) {
+      if (entry.IsResolved()) {
+        deps->classes_.insert(VerifierDeps::ClassResolution(
+            entry.GetDexTypeIndex(), VerifierDeps::kUnresolvedMarker));
+        found = true;
+        break;
+      }
+    }
+    ASSERT_TRUE(found);
+    new_class_loader.Assign(soa.Decode<mirror::ClassLoader>(LoadDex("VerifierDeps")));
+    ASSERT_FALSE(decoded_deps.ValidateDependencies(new_class_loader, soa.Self()));
+  }
+
+  {
+    VerifierDeps decoded_deps(dex_files_, ArrayRef<const uint8_t>(buffer));
+    VerifierDeps::DexFileDeps* deps = decoded_deps.GetDexFileDeps(*primary_dex_file_);
+    bool found = false;
+    for (const auto& entry : deps->classes_) {
+      if (!entry.IsResolved()) {
+        deps->classes_.insert(VerifierDeps::ClassResolution(
+            entry.GetDexTypeIndex(), VerifierDeps::kUnresolvedMarker - 1));
+        found = true;
+        break;
+      }
+    }
+    ASSERT_TRUE(found);
+    new_class_loader.Assign(soa.Decode<mirror::ClassLoader>(LoadDex("VerifierDeps")));
+    ASSERT_FALSE(decoded_deps.ValidateDependencies(new_class_loader, soa.Self()));
+  }
+
+  {
+    VerifierDeps decoded_deps(dex_files_, ArrayRef<const uint8_t>(buffer));
+    VerifierDeps::DexFileDeps* deps = decoded_deps.GetDexFileDeps(*primary_dex_file_);
+    bool found = false;
+    for (const auto& entry : deps->classes_) {
+      if (entry.IsResolved()) {
+        deps->classes_.insert(VerifierDeps::ClassResolution(
+            entry.GetDexTypeIndex(), entry.GetAccessFlags() - 1));
+        found = true;
+        break;
+      }
+    }
+    ASSERT_TRUE(found);
+    new_class_loader.Assign(soa.Decode<mirror::ClassLoader>(LoadDex("VerifierDeps")));
+    ASSERT_FALSE(decoded_deps.ValidateDependencies(new_class_loader, soa.Self()));
+  }
+
+  // Mess up with fields.
+  {
+    VerifierDeps decoded_deps(dex_files_, ArrayRef<const uint8_t>(buffer));
+    VerifierDeps::DexFileDeps* deps = decoded_deps.GetDexFileDeps(*primary_dex_file_);
+    bool found = false;
+    for (const auto& entry : deps->fields_) {
+      if (entry.IsResolved()) {
+        deps->fields_.insert(VerifierDeps::FieldResolution(entry.GetDexFieldIndex(),
+                                                           VerifierDeps::kUnresolvedMarker,
+                                                           entry.GetDeclaringClassIndex()));
+        found = true;
+        break;
+      }
+    }
+    ASSERT_TRUE(found);
+    new_class_loader.Assign(soa.Decode<mirror::ClassLoader>(LoadDex("VerifierDeps")));
+    ASSERT_FALSE(decoded_deps.ValidateDependencies(new_class_loader, soa.Self()));
+  }
+
+  {
+    VerifierDeps decoded_deps(dex_files_, ArrayRef<const uint8_t>(buffer));
+    VerifierDeps::DexFileDeps* deps = decoded_deps.GetDexFileDeps(*primary_dex_file_);
+    bool found = false;
+    for (const auto& entry : deps->fields_) {
+      if (!entry.IsResolved()) {
+        constexpr dex::StringIndex kStringIndexZero(0);  // We know there is a class there.
+        deps->fields_.insert(VerifierDeps::FieldResolution(0 /* we know there is a field there */,
+                                                           VerifierDeps::kUnresolvedMarker - 1,
+                                                           kStringIndexZero));
+        found = true;
+        break;
+      }
+    }
+    ASSERT_TRUE(found);
+    new_class_loader.Assign(soa.Decode<mirror::ClassLoader>(LoadDex("VerifierDeps")));
+    ASSERT_FALSE(decoded_deps.ValidateDependencies(new_class_loader, soa.Self()));
+  }
+
+  {
+    VerifierDeps decoded_deps(dex_files_, ArrayRef<const uint8_t>(buffer));
+    VerifierDeps::DexFileDeps* deps = decoded_deps.GetDexFileDeps(*primary_dex_file_);
+    bool found = false;
+    for (const auto& entry : deps->fields_) {
+      if (entry.IsResolved()) {
+        deps->fields_.insert(VerifierDeps::FieldResolution(entry.GetDexFieldIndex(),
+                                                           entry.GetAccessFlags() - 1,
+                                                           entry.GetDeclaringClassIndex()));
+        found = true;
+        break;
+      }
+    }
+    ASSERT_TRUE(found);
+    new_class_loader.Assign(soa.Decode<mirror::ClassLoader>(LoadDex("VerifierDeps")));
+    ASSERT_FALSE(decoded_deps.ValidateDependencies(new_class_loader, soa.Self()));
+  }
+
+  {
+    VerifierDeps decoded_deps(dex_files_, ArrayRef<const uint8_t>(buffer));
+    VerifierDeps::DexFileDeps* deps = decoded_deps.GetDexFileDeps(*primary_dex_file_);
+    bool found = false;
+    for (const auto& entry : deps->fields_) {
+      constexpr dex::StringIndex kNewTypeIndex(0);
+      if (entry.GetDeclaringClassIndex() != kNewTypeIndex) {
+        deps->fields_.insert(VerifierDeps::FieldResolution(entry.GetDexFieldIndex(),
+                                                           entry.GetAccessFlags(),
+                                                           kNewTypeIndex));
+        found = true;
+        break;
+      }
+    }
+    ASSERT_TRUE(found);
+    new_class_loader.Assign(soa.Decode<mirror::ClassLoader>(LoadDex("VerifierDeps")));
+    ASSERT_FALSE(decoded_deps.ValidateDependencies(new_class_loader, soa.Self()));
+  }
+
+  // Mess up with methods.
+  for (MethodResolutionKind resolution_kind :
+            { kDirectMethodResolution, kVirtualMethodResolution, kInterfaceMethodResolution }) {
+    {
+      VerifierDeps decoded_deps(dex_files_, ArrayRef<const uint8_t>(buffer));
+      VerifierDeps::DexFileDeps* deps = decoded_deps.GetDexFileDeps(*primary_dex_file_);
+      bool found = false;
+      std::set<VerifierDeps::MethodResolution>* methods = GetMethods(deps, resolution_kind);
+      for (const auto& entry : *methods) {
+        if (entry.IsResolved()) {
+          methods->insert(VerifierDeps::MethodResolution(entry.GetDexMethodIndex(),
+                                                         VerifierDeps::kUnresolvedMarker,
+                                                         entry.GetDeclaringClassIndex()));
+          found = true;
+          break;
+        }
+      }
+      ASSERT_TRUE(found);
+      new_class_loader.Assign(soa.Decode<mirror::ClassLoader>(LoadDex("VerifierDeps")));
+      ASSERT_FALSE(decoded_deps.ValidateDependencies(new_class_loader, soa.Self()));
+    }
+
+    {
+      VerifierDeps decoded_deps(dex_files_, ArrayRef<const uint8_t>(buffer));
+      VerifierDeps::DexFileDeps* deps = decoded_deps.GetDexFileDeps(*primary_dex_file_);
+      bool found = false;
+      std::set<VerifierDeps::MethodResolution>* methods = GetMethods(deps, resolution_kind);
+      for (const auto& entry : *methods) {
+        if (!entry.IsResolved()) {
+          constexpr dex::StringIndex kStringIndexZero(0);  // We know there is a class there.
+          methods->insert(VerifierDeps::MethodResolution(0 /* we know there is a method there */,
+                                                         VerifierDeps::kUnresolvedMarker - 1,
+                                                         kStringIndexZero));
+          found = true;
+          break;
+        }
+      }
+      ASSERT_TRUE(found);
+      new_class_loader.Assign(soa.Decode<mirror::ClassLoader>(LoadDex("VerifierDeps")));
+      ASSERT_FALSE(decoded_deps.ValidateDependencies(new_class_loader, soa.Self()));
+    }
+
+    {
+      VerifierDeps decoded_deps(dex_files_, ArrayRef<const uint8_t>(buffer));
+      VerifierDeps::DexFileDeps* deps = decoded_deps.GetDexFileDeps(*primary_dex_file_);
+      bool found = false;
+      std::set<VerifierDeps::MethodResolution>* methods = GetMethods(deps, resolution_kind);
+      for (const auto& entry : *methods) {
+        if (entry.IsResolved()) {
+          methods->insert(VerifierDeps::MethodResolution(entry.GetDexMethodIndex(),
+                                                         entry.GetAccessFlags() - 1,
+                                                         entry.GetDeclaringClassIndex()));
+          found = true;
+          break;
+        }
+      }
+      ASSERT_TRUE(found);
+      new_class_loader.Assign(soa.Decode<mirror::ClassLoader>(LoadDex("VerifierDeps")));
+      ASSERT_FALSE(decoded_deps.ValidateDependencies(new_class_loader, soa.Self()));
+    }
+
+    {
+      VerifierDeps decoded_deps(dex_files_, ArrayRef<const uint8_t>(buffer));
+      VerifierDeps::DexFileDeps* deps = decoded_deps.GetDexFileDeps(*primary_dex_file_);
+      bool found = false;
+      std::set<VerifierDeps::MethodResolution>* methods = GetMethods(deps, resolution_kind);
+      for (const auto& entry : *methods) {
+        constexpr dex::StringIndex kNewTypeIndex(0);
+        if (entry.IsResolved() && entry.GetDeclaringClassIndex() != kNewTypeIndex) {
+          methods->insert(VerifierDeps::MethodResolution(entry.GetDexMethodIndex(),
+                                                         entry.GetAccessFlags(),
+                                                         kNewTypeIndex));
+          found = true;
+          break;
+        }
+      }
+      ASSERT_TRUE(found);
+      new_class_loader.Assign(soa.Decode<mirror::ClassLoader>(LoadDex("VerifierDeps")));
+      ASSERT_FALSE(decoded_deps.ValidateDependencies(new_class_loader, soa.Self()));
+    }
+
+    {
+      VerifierDeps decoded_deps(dex_files_, ArrayRef<const uint8_t>(buffer));
+      VerifierDeps::DexFileDeps* deps = decoded_deps.GetDexFileDeps(*primary_dex_file_);
+      bool found = false;
+      std::set<VerifierDeps::MethodResolution>* methods = GetMethods(deps, resolution_kind);
+      for (const auto& entry : *methods) {
+        if (entry.IsResolved()) {
+          GetMethods(deps, GetNextResolutionKind(resolution_kind))->insert(
+              VerifierDeps::MethodResolution(entry.GetDexMethodIndex(),
+                                             entry.GetAccessFlags(),
+                                             entry.GetDeclaringClassIndex()));
+          found = true;
+        }
+      }
+      ASSERT_TRUE(found);
+      new_class_loader.Assign(soa.Decode<mirror::ClassLoader>(LoadDex("VerifierDeps")));
+      ASSERT_FALSE(decoded_deps.ValidateDependencies(new_class_loader, soa.Self()));
+    }
+
+    {
+      VerifierDeps decoded_deps(dex_files_, ArrayRef<const uint8_t>(buffer));
+      VerifierDeps::DexFileDeps* deps = decoded_deps.GetDexFileDeps(*primary_dex_file_);
+      bool found = false;
+      std::set<VerifierDeps::MethodResolution>* methods = GetMethods(deps, resolution_kind);
+      for (const auto& entry : *methods) {
+        if (entry.IsResolved()) {
+          GetMethods(deps, GetNextResolutionKind(GetNextResolutionKind(resolution_kind)))->insert(
+              VerifierDeps::MethodResolution(entry.GetDexMethodIndex(),
+                                             entry.GetAccessFlags(),
+                                             entry.GetDeclaringClassIndex()));
+          found = true;
+        }
+      }
+      ASSERT_TRUE(found);
+      new_class_loader.Assign(soa.Decode<mirror::ClassLoader>(LoadDex("VerifierDeps")));
+      ASSERT_FALSE(decoded_deps.ValidateDependencies(new_class_loader, soa.Self()));
+    }
+  }
+}
+
+TEST_F(VerifierDepsTest, CompilerDriver) {
+  SetupCompilerDriver();
+
+  // Test both multi-dex and single-dex configuration.
+  for (const char* multi : { "MultiDex", static_cast<const char*>(nullptr) }) {
+    // Test that the compiler driver behaves as expected when the dependencies
+    // verify and when they don't verify.
+    for (bool verify_failure : { false, true }) {
+      {
+        ScopedObjectAccess soa(Thread::Current());
+        LoadDexFile(&soa, "VerifierDeps", multi);
+      }
+      VerifyWithCompilerDriver(/* verifier_deps */ nullptr);
+
+      std::vector<uint8_t> buffer;
+      verifier_deps_->Encode(dex_files_, &buffer);
+
+      {
+        ScopedObjectAccess soa(Thread::Current());
+        LoadDexFile(&soa, "VerifierDeps", multi);
+      }
+      verifier::VerifierDeps decoded_deps(dex_files_, ArrayRef<const uint8_t>(buffer));
+      if (verify_failure) {
+        // Just taint the decoded VerifierDeps with one invalid entry.
+        VerifierDeps::DexFileDeps* deps = decoded_deps.GetDexFileDeps(*primary_dex_file_);
+        bool found = false;
+        for (const auto& entry : deps->classes_) {
+          if (entry.IsResolved()) {
+            deps->classes_.insert(VerifierDeps::ClassResolution(
+                entry.GetDexTypeIndex(), VerifierDeps::kUnresolvedMarker));
+            found = true;
+            break;
+          }
+        }
+        ASSERT_TRUE(found);
+      }
+      VerifyWithCompilerDriver(&decoded_deps);
+
+      if (verify_failure) {
+        ASSERT_FALSE(verifier_deps_ == nullptr);
+        ASSERT_FALSE(verifier_deps_->Equals(decoded_deps));
+      } else {
+        ASSERT_TRUE(verifier_deps_ == nullptr);
+        VerifyClassStatus(decoded_deps);
+      }
+    }
+  }
+}
+
 }  // namespace verifier
 }  // namespace art
diff --git a/dex2oat/Android.bp b/dex2oat/Android.bp
index 05a5d0f..0924aec 100644
--- a/dex2oat/Android.bp
+++ b/dex2oat/Android.bp
@@ -89,6 +89,7 @@
     ],
     static_libs: [
         "libart-compiler",
+        "libart-dexlayout",
         "libart",
         "libvixl-arm",
         "libvixl-arm64",
@@ -118,6 +119,7 @@
     ],
     static_libs: [
         "libartd-compiler",
+        "libartd-dexlayout",
         "libartd",
         "libvixld-arm",
         "libvixld-arm64",
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index 28d6289..91a32f9 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -78,6 +78,7 @@
 #include "ScopedLocalRef.h"
 #include "scoped_thread_state_change-inl.h"
 #include "utils.h"
+#include "vdex_file.h"
 #include "verifier/verifier_deps.h"
 #include "well_known_classes.h"
 #include "zip_archive.h"
@@ -270,6 +271,7 @@
                 "|balanced"
                 "|speed-profile"
                 "|speed"
+                "|layout-profile"
                 "|everything-profile"
                 "|everything):");
   UsageError("      select compiler filter.");
@@ -517,7 +519,9 @@
       thread_count_(sysconf(_SC_NPROCESSORS_CONF)),
       start_ns_(NanoTime()),
       oat_fd_(-1),
-      vdex_fd_(-1),
+      input_vdex_fd_(-1),
+      output_vdex_fd_(-1),
+      input_vdex_file_(nullptr),
       zip_fd_(-1),
       image_base_(0U),
       image_classes_zip_filename_(nullptr),
@@ -590,8 +594,13 @@
     ParseUintOption(option, "--zip-fd", &zip_fd_, Usage);
   }
 
-  void ParseVdexFd(const StringPiece& option) {
-    ParseUintOption(option, "--vdex-fd", &vdex_fd_, Usage);
+  void ParseInputVdexFd(const StringPiece& option) {
+    // Note that the input vdex fd might be -1.
+    ParseIntOption(option, "--input-vdex-fd", &input_vdex_fd_, Usage);
+  }
+
+  void ParseOutputVdexFd(const StringPiece& option) {
+    ParseUintOption(option, "--output-vdex-fd", &output_vdex_fd_, Usage);
   }
 
   void ParseOatFd(const StringPiece& option) {
@@ -703,13 +712,17 @@
       Usage("Output must be supplied with either --oat-file or --oat-fd");
     }
 
+    if (input_vdex_fd_ != -1 && !input_vdex_.empty()) {
+      Usage("Can't have both --input-vdex-fd and --input-vdex");
+    }
+
     if (!oat_filenames_.empty() && oat_fd_ != -1) {
       Usage("--oat-file should not be used with --oat-fd");
     }
 
-    if ((vdex_fd_ == -1) != (oat_fd_ == -1)) {
+    if ((output_vdex_fd_ == -1) != (oat_fd_ == -1)) {
       Usage("VDEX and OAT output must be specified either with one --oat-filename "
-            "or with --oat-fd and --vdex-fd file descriptors");
+            "or with --oat-fd and --output-vdex-fd file descriptors");
     }
 
     if (!parser_options->oat_symbols.empty() && oat_fd_ != -1) {
@@ -720,8 +733,8 @@
       Usage("--oat-symbols should not be used with --host");
     }
 
-    if (vdex_fd_ != -1 && !image_filenames_.empty()) {
-      Usage("--vdex-fd should not be used with --image");
+    if (output_vdex_fd_ != -1 && !image_filenames_.empty()) {
+      Usage("--output-vdex-fd should not be used with --image");
     }
 
     if (oat_fd_ != -1 && !image_filenames_.empty()) {
@@ -1114,8 +1127,12 @@
         ParseZipFd(option);
       } else if (option.starts_with("--zip-location=")) {
         zip_location_ = option.substr(strlen("--zip-location=")).data();
-      } else if (option.starts_with("--vdex-fd=")) {
-        ParseVdexFd(option);
+      } else if (option.starts_with("--input-vdex-fd=")) {
+        ParseInputVdexFd(option);
+      } else if (option.starts_with("--input-vdex=")) {
+        input_vdex_ = option.substr(strlen("--input-vdex=")).data();
+      } else if (option.starts_with("--output-vdex-fd=")) {
+        ParseOutputVdexFd(option);
       } else if (option.starts_with("--oat-file=")) {
         oat_filenames_.push_back(option.substr(strlen("--oat-file=")).data());
       } else if (option.starts_with("--oat-symbols=")) {
@@ -1257,8 +1274,19 @@
           return false;
         }
         oat_files_.push_back(std::move(oat_file));
+        DCHECK_EQ(input_vdex_fd_, -1);
+        if (!input_vdex_.empty()) {
+          std::string error_msg;
+          input_vdex_file_.reset(VdexFile::Open(input_vdex_,
+                                                /* writable */ false,
+                                                /* low_4gb */ false,
+                                                &error_msg));
+          if (input_vdex_file_ != nullptr && !input_vdex_file_->IsValid()) {
+            input_vdex_file_.reset(nullptr);
+          }
+        }
 
-        DCHECK_EQ(vdex_fd_, -1);
+        DCHECK_EQ(output_vdex_fd_, -1);
         std::string vdex_filename = ReplaceFileExtension(oat_filename, "vdex");
         std::unique_ptr<File> vdex_file(OS::CreateEmptyFile(vdex_filename.c_str()));
         if (vdex_file.get() == nullptr) {
@@ -1284,9 +1312,34 @@
       }
       oat_files_.push_back(std::move(oat_file));
 
-      DCHECK_NE(vdex_fd_, -1);
+      DCHECK_NE(input_vdex_fd_, output_vdex_fd_);
+      if (input_vdex_fd_ != -1) {
+        struct stat s;
+        int rc = TEMP_FAILURE_RETRY(fstat(input_vdex_fd_, &s));
+        if (rc == -1) {
+          PLOG(WARNING) << "Failed getting length of vdex file";
+        } else {
+          std::string error_msg;
+          input_vdex_file_.reset(VdexFile::Open(input_vdex_fd_,
+                                                s.st_size,
+                                                "vdex",
+                                                /* writable */ false,
+                                                /* low_4gb */ false,
+                                                &error_msg));
+          // If there's any problem with the passed vdex, just warn and proceed
+          // without it.
+          if (input_vdex_file_ == nullptr) {
+            PLOG(WARNING) << "Failed opening vdex file " << error_msg;
+          } else if (!input_vdex_file_->IsValid()) {
+            PLOG(WARNING) << "Existing vdex file is invalid";
+            input_vdex_file_.reset(nullptr);
+          }
+        }
+      }
+
+      DCHECK_NE(output_vdex_fd_, -1);
       std::string vdex_location = ReplaceFileExtension(oat_location_, "vdex");
-      std::unique_ptr<File> vdex_file(new File(vdex_fd_, vdex_location, /* check_usage */ true));
+      std::unique_ptr<File> vdex_file(new File(output_vdex_fd_, vdex_location, /* check_usage */ true));
       if (vdex_file.get() == nullptr) {
         PLOG(ERROR) << "Failed to create vdex file: " << vdex_location;
         return false;
@@ -1379,7 +1432,6 @@
   // boot class path.
   bool Setup() {
     TimingLogger::ScopedTiming t("dex2oat Setup", timings_);
-    art::MemMap::Init();  // For ZipEntry::ExtractToMemMap.
 
     if (!PrepareImageClasses() || !PrepareCompiledClasses() || !PrepareCompiledMethods()) {
       return false;
@@ -1471,13 +1523,18 @@
         // Unzip or copy dex files straight to the oat file.
         std::unique_ptr<MemMap> opened_dex_files_map;
         std::vector<std::unique_ptr<const DexFile>> opened_dex_files;
+        // No need to verify the dex file for:
+        // 1) dexlayout, which already verified it
+        // 2) when we have a vdex file, which means it was already verified.
+        bool verify = compiler_options_->GetCompilerFilter() != CompilerFilter::kLayoutProfile &&
+            (input_vdex_file_ == nullptr);
         if (!oat_writers_[i]->WriteAndOpenDexFiles(
             kIsVdexEnabled ? vdex_files_[i].get() : oat_files_[i].get(),
             rodata_.back(),
             instruction_set_,
             instruction_set_features_.get(),
             key_value_store_.get(),
-            /* verify */ true,
+            verify,
             &opened_dex_files_map,
             &opened_dex_files)) {
           return false;
@@ -1497,12 +1554,6 @@
 
     dex_files_ = MakeNonOwningPointerVector(opened_dex_files_);
 
-    if (!IsBootImage()) {
-      // Collect verification dependencies when compiling an app.
-      verifier_deps_.reset(new verifier::VerifierDeps(dex_files_));
-      callbacks_->SetVerifierDeps(verifier_deps_.get());
-    }
-
     // We had to postpone the swap decision till now, as this is the point when we actually
     // know about the dex files we're going to use.
 
@@ -1583,6 +1634,9 @@
       dex_caches_.push_back(soa.AddLocalReference<jobject>(
           class_linker->RegisterDexFile(*dex_file,
                                         soa.Decode<mirror::ClassLoader>(class_loader_).Ptr())));
+      // Pre-register dex files so that we can access verification results without locks during
+      // compilation and verification.
+      verification_results_->AddDexFile(dex_file);
     }
 
     return true;
@@ -1660,7 +1714,7 @@
                                      swap_fd_,
                                      profile_compilation_info_.get()));
     driver_->SetDexFilesForOatFile(dex_files_);
-    driver_->CompileAll(class_loader_, dex_files_, timings_);
+    driver_->CompileAll(class_loader_, dex_files_, input_vdex_file_.get(), timings_);
   }
 
   // Notes on the interleaving of creating the images and oat files to
@@ -1785,13 +1839,13 @@
     {
       TimingLogger::ScopedTiming t2("dex2oat Write VDEX", timings_);
       DCHECK(IsBootImage() || oat_files_.size() == 1u);
-      DCHECK_EQ(IsBootImage(), verifier_deps_ == nullptr);
+      verifier::VerifierDeps* verifier_deps = callbacks_->GetVerifierDeps();
       for (size_t i = 0, size = oat_files_.size(); i != size; ++i) {
         File* vdex_file = vdex_files_[i].get();
         std::unique_ptr<BufferedOutputStream> vdex_out(
             MakeUnique<BufferedOutputStream>(MakeUnique<FileOutputStream>(vdex_file)));
 
-        if (!oat_writers_[i]->WriteVerifierDeps(vdex_out.get(), verifier_deps_.get())) {
+        if (!oat_writers_[i]->WriteVerifierDeps(vdex_out.get(), verifier_deps)) {
           LOG(ERROR) << "Failed to write verifier dependencies into VDEX " << vdex_file->GetPath();
           return false;
         }
@@ -2233,7 +2287,14 @@
 
   bool AddDexFileSources() {
     TimingLogger::ScopedTiming t2("AddDexFileSources", timings_);
-    if (zip_fd_ != -1) {
+    if (input_vdex_file_ != nullptr) {
+      DCHECK_EQ(oat_writers_.size(), 1u);
+      const std::string& name = zip_location_.empty() ? dex_locations_[0] : zip_location_;
+      DCHECK(!name.empty());
+      if (!oat_writers_[0]->AddVdexDexFilesSource(*input_vdex_file_.get(), name.c_str())) {
+        return false;
+      }
+    } else if (zip_fd_ != -1) {
       DCHECK_EQ(oat_writers_.size(), 1u);
       if (!oat_writers_[0]->AddZippedDexFilesSource(File(zip_fd_, /* check_usage */ false),
                                                     zip_location_.c_str())) {
@@ -2271,7 +2332,9 @@
                                                      compiler_options_.get(),
                                                      oat_file.get()));
       elf_writers_.back()->Start();
-      oat_writers_.emplace_back(new OatWriter(IsBootImage(), timings_));
+      bool do_dexlayout = compiler_options_->GetCompilerFilter() == CompilerFilter::kLayoutProfile;
+      oat_writers_.emplace_back(new OatWriter(
+          IsBootImage(), timings_, do_dexlayout ? profile_compilation_info_.get() : nullptr));
     }
   }
 
@@ -2359,6 +2422,11 @@
       LOG(ERROR) << "Failed to create runtime";
       return false;
     }
+
+    // Runtime::Init will rename this thread to be "main". Prefer "dex2oat" so that "top" and
+    // "ps -a" don't change to non-descript "main."
+    SetThreadName(kIsDebugBuild ? "dex2oatd" : "dex2oat");
+
     runtime_.reset(Runtime::Current());
     runtime_->SetInstructionSet(instruction_set_);
     for (int i = 0; i < Runtime::kLastCalleeSaveType; i++) {
@@ -2582,7 +2650,10 @@
   std::vector<const char*> oat_filenames_;
   std::vector<const char*> oat_unstripped_;
   int oat_fd_;
-  int vdex_fd_;
+  int input_vdex_fd_;
+  int output_vdex_fd_;
+  std::string input_vdex_;
+  std::unique_ptr<VdexFile> input_vdex_file_;
   std::vector<const char*> dex_filenames_;
   std::vector<const char*> dex_locations_;
   int zip_fd_;
@@ -2645,9 +2716,6 @@
   std::vector<std::vector<const DexFile*>> dex_files_per_oat_file_;
   std::unordered_map<const DexFile*, size_t> dex_file_oat_index_map_;
 
-  // Collector of verifier dependencies.
-  std::unique_ptr<verifier::VerifierDeps> verifier_deps_;
-
   // Backing storage.
   std::vector<std::string> char_backing_storage_;
 
@@ -2782,6 +2850,8 @@
     }
   }
 
+  art::MemMap::Init();  // For ZipEntry::ExtractToMemMap, and vdex.
+
   // Check early that the result of compilation can be written
   if (!dex2oat->OpenFile()) {
     return EXIT_FAILURE;
diff --git a/dex2oat/dex2oat_test.cc b/dex2oat/dex2oat_test.cc
index 58dd047..b6b62a8 100644
--- a/dex2oat/dex2oat_test.cc
+++ b/dex2oat/dex2oat_test.cc
@@ -24,7 +24,9 @@
 #include "base/logging.h"
 #include "base/macros.h"
 #include "base/stringprintf.h"
+#include "dex_file-inl.h"
 #include "dex2oat_environment_test.h"
+#include "jit/offline_profiling_info.h"
 #include "oat.h"
 #include "oat_file.h"
 #include "utils.h"
@@ -438,9 +440,7 @@
 
     Copy(GetDexSrc1(), dex_location);
 
-    std::vector<std::string> copy(extra_args);
-
-    GenerateOdexForTest(dex_location, odex_location, filter, copy);
+    GenerateOdexForTest(dex_location, odex_location, filter, extra_args);
 
     CheckValidity();
     ASSERT_TRUE(success_);
@@ -553,4 +553,108 @@
   RunTest(CompilerFilter::kSpeed, true, { "--very-large-app-threshold=100" });
 }
 
+class Dex2oatLayoutTest : public Dex2oatTest {
+ protected:
+  void CheckFilter(CompilerFilter::Filter input ATTRIBUTE_UNUSED,
+                   CompilerFilter::Filter result ATTRIBUTE_UNUSED) OVERRIDE {
+    // Ignore, we'll do our own checks.
+  }
+
+  // Emits a profile with a single dex file with the given location and a single class index of 1.
+  void GenerateProfile(const std::string& test_profile,
+                       const std::string& dex_location,
+                       uint32_t checksum) {
+    int profile_test_fd = open(test_profile.c_str(), O_CREAT | O_TRUNC | O_WRONLY, 0644);
+    CHECK_GE(profile_test_fd, 0);
+
+    ProfileCompilationInfo info;
+    std::string profile_key = ProfileCompilationInfo::GetProfileDexFileKey(dex_location);
+    info.AddClassIndex(profile_key, checksum, dex::TypeIndex(1));
+    bool result = info.Save(profile_test_fd);
+    close(profile_test_fd);
+    ASSERT_TRUE(result);
+  }
+
+  void RunTest() {
+    std::string dex_location = GetScratchDir() + "/DexNoOat.jar";
+    std::string profile_location = GetScratchDir() + "/primary.prof";
+    std::string odex_location = GetOdexDir() + "/DexOdexNoOat.odex";
+
+    Copy(GetDexSrc2(), dex_location);
+    const char* location = dex_location.c_str();
+    std::string error_msg;
+    std::vector<std::unique_ptr<const DexFile>> dex_files;
+    ASSERT_TRUE(DexFile::Open(location, location, true, &error_msg, &dex_files));
+    EXPECT_EQ(dex_files.size(), 1U);
+    std::unique_ptr<const DexFile>& dex_file = dex_files[0];
+    GenerateProfile(profile_location, dex_location, dex_file->GetLocationChecksum());
+
+    const std::vector<std::string>& extra_args = { "--profile-file=" + profile_location };
+    GenerateOdexForTest(dex_location, odex_location, CompilerFilter::kLayoutProfile, extra_args);
+
+    CheckValidity();
+    ASSERT_TRUE(success_);
+    CheckResult(dex_location, odex_location);
+  }
+  void CheckResult(const std::string& dex_location, const std::string& odex_location) {
+    // Host/target independent checks.
+    std::string error_msg;
+    std::unique_ptr<OatFile> odex_file(OatFile::Open(odex_location.c_str(),
+                                                     odex_location.c_str(),
+                                                     nullptr,
+                                                     nullptr,
+                                                     false,
+                                                     /*low_4gb*/false,
+                                                     dex_location.c_str(),
+                                                     &error_msg));
+    ASSERT_TRUE(odex_file.get() != nullptr) << error_msg;
+
+    const char* location = dex_location.c_str();
+    std::vector<std::unique_ptr<const DexFile>> dex_files;
+    ASSERT_TRUE(DexFile::Open(location, location, true, &error_msg, &dex_files));
+    EXPECT_EQ(dex_files.size(), 1U);
+    std::unique_ptr<const DexFile>& old_dex_file = dex_files[0];
+
+    for (const OatDexFile* oat_dex_file : odex_file->GetOatDexFiles()) {
+      std::unique_ptr<const DexFile> new_dex_file = oat_dex_file->OpenDexFile(&error_msg);
+      ASSERT_TRUE(new_dex_file != nullptr);
+      uint32_t class_def_count = new_dex_file->NumClassDefs();
+      ASSERT_LT(class_def_count, std::numeric_limits<uint16_t>::max());
+      ASSERT_GE(class_def_count, 2U);
+
+      // The new layout swaps the classes at indexes 0 and 1.
+      std::string old_class0 = old_dex_file->PrettyType(old_dex_file->GetClassDef(0).class_idx_);
+      std::string old_class1 = old_dex_file->PrettyType(old_dex_file->GetClassDef(1).class_idx_);
+      std::string new_class0 = new_dex_file->PrettyType(new_dex_file->GetClassDef(0).class_idx_);
+      std::string new_class1 = new_dex_file->PrettyType(new_dex_file->GetClassDef(1).class_idx_);
+      EXPECT_EQ(old_class0, new_class1);
+      EXPECT_EQ(old_class1, new_class0);
+    }
+
+    EXPECT_EQ(odex_file->GetCompilerFilter(), CompilerFilter::kLayoutProfile);
+  }
+
+    // Check whether the dex2oat run was really successful.
+    void CheckValidity() {
+      if (kIsTargetBuild) {
+        CheckTargetValidity();
+      } else {
+        CheckHostValidity();
+      }
+    }
+
+    void CheckTargetValidity() {
+      // TODO: Ignore for now.
+    }
+
+    // On the host, we can get the dex2oat output. Here, look for "dex2oat took."
+    void CheckHostValidity() {
+      EXPECT_NE(output_.find("dex2oat took"), std::string::npos) << output_;
+    }
+  };
+
+TEST_F(Dex2oatLayoutTest, TestLayout) {
+  RunTest();
+}
+
 }  // namespace art
diff --git a/dexdump/dexdump.cc b/dexdump/dexdump.cc
index 30de28e..916984c 100644
--- a/dexdump/dexdump.cc
+++ b/dexdump/dexdump.cc
@@ -45,6 +45,7 @@
 #include "base/stringprintf.h"
 #include "dexdump_cfg.h"
 #include "dex_file-inl.h"
+#include "dex_file_types.h"
 #include "dex_instruction-inl.h"
 
 namespace art {
@@ -474,15 +475,15 @@
     case DexFile::kDexAnnotationString: {
       const u4 idx = static_cast<u4>(readVarWidth(data, arg, false));
       if (gOptions.outputFormat == OUTPUT_PLAIN) {
-        dumpEscapedString(pDexFile->StringDataByIdx(idx));
+        dumpEscapedString(pDexFile->StringDataByIdx(dex::StringIndex(idx)));
       } else {
-        dumpXmlAttribute(pDexFile->StringDataByIdx(idx));
+        dumpXmlAttribute(pDexFile->StringDataByIdx(dex::StringIndex(idx)));
       }
       break;
     }
     case DexFile::kDexAnnotationType: {
       const u4 str_idx = static_cast<u4>(readVarWidth(data, arg, false));
-      fputs(pDexFile->StringByTypeIdx(str_idx), gOutFile);
+      fputs(pDexFile->StringByTypeIdx(dex::TypeIndex(str_idx)), gOutFile);
       break;
     }
     case DexFile::kDexAnnotationField:
@@ -511,13 +512,13 @@
     }
     case DexFile::kDexAnnotationAnnotation: {
       const u4 type_idx = DecodeUnsignedLeb128(data);
-      fputs(pDexFile->StringByTypeIdx(type_idx), gOutFile);
+      fputs(pDexFile->StringByTypeIdx(dex::TypeIndex(type_idx)), gOutFile);
       // Decode and display all name=value pairs.
       const u4 size = DecodeUnsignedLeb128(data);
       for (u4 i = 0; i < size; i++) {
         const u4 name_idx = DecodeUnsignedLeb128(data);
         fputc(' ', gOutFile);
-        fputs(pDexFile->StringDataByIdx(name_idx), gOutFile);
+        fputs(pDexFile->StringDataByIdx(dex::StringIndex(name_idx)), gOutFile);
         fputc('=', gOutFile);
         dumpEncodedValue(pDexFile, data);
       }
@@ -592,13 +593,13 @@
   // General class information.
   const DexFile::ClassDef& pClassDef = pDexFile->GetClassDef(idx);
   fprintf(gOutFile, "Class #%d header:\n", idx);
-  fprintf(gOutFile, "class_idx           : %d\n", pClassDef.class_idx_);
+  fprintf(gOutFile, "class_idx           : %d\n", pClassDef.class_idx_.index_);
   fprintf(gOutFile, "access_flags        : %d (0x%04x)\n",
           pClassDef.access_flags_, pClassDef.access_flags_);
-  fprintf(gOutFile, "superclass_idx      : %d\n", pClassDef.superclass_idx_);
+  fprintf(gOutFile, "superclass_idx      : %d\n", pClassDef.superclass_idx_.index_);
   fprintf(gOutFile, "interfaces_off      : %d (0x%06x)\n",
           pClassDef.interfaces_off_, pClassDef.interfaces_off_);
-  fprintf(gOutFile, "source_file_idx     : %d\n", pClassDef.source_file_idx_);
+  fprintf(gOutFile, "source_file_idx     : %d\n", pClassDef.source_file_idx_.index_);
   fprintf(gOutFile, "annotations_off     : %d (0x%06x)\n",
           pClassDef.annotations_off_, pClassDef.annotations_off_);
   fprintf(gOutFile, "class_data_off      : %d (0x%06x)\n",
@@ -747,9 +748,8 @@
     const u4 end = start + pTry->insn_count_;
     fprintf(gOutFile, "        0x%04x - 0x%04x\n", start, end);
     for (CatchHandlerIterator it(*pCode, *pTry); it.HasNext(); it.Next()) {
-      const u2 tidx = it.GetHandlerTypeIndex();
-      const char* descriptor =
-          (tidx == DexFile::kDexNoIndex16) ? "<any>" : pDexFile->StringByTypeIdx(tidx);
+      const dex::TypeIndex tidx = it.GetHandlerTypeIndex();
+      const char* descriptor = (!tidx.IsValid()) ? "<any>" : pDexFile->StringByTypeIdx(tidx);
       fprintf(gOutFile, "          %s -> 0x%04x\n", descriptor, it.GetHandlerAddress());
     }  // for
   }  // for
@@ -834,7 +834,7 @@
       break;
     case Instruction::kIndexTypeRef:
       if (index < pDexFile->GetHeader().type_ids_size_) {
-        const char* tp = pDexFile->StringByTypeIdx(index);
+        const char* tp = pDexFile->StringByTypeIdx(dex::TypeIndex(index));
         outSize = snprintf(buf.get(), bufSize, "%s // type@%0*x", tp, width, index);
       } else {
         outSize = snprintf(buf.get(), bufSize, "<type?> // type@%0*x", width, index);
@@ -842,7 +842,7 @@
       break;
     case Instruction::kIndexStringRef:
       if (index < pDexFile->GetHeader().string_ids_size_) {
-        const char* st = pDexFile->StringDataByIdx(index);
+        const char* st = pDexFile->StringDataByIdx(dex::StringIndex(index));
         outSize = snprintf(buf.get(), bufSize, "\"%s\" // string@%0*x", st, width, index);
       } else {
         outSize = snprintf(buf.get(), bufSize, "<string?> // string@%0*x", width, index);
@@ -1461,7 +1461,7 @@
   // General class information.
   char* accessStr = createAccessFlagStr(pClassDef.access_flags_, kAccessForClass);
   const char* superclassDescriptor;
-  if (pClassDef.superclass_idx_ == DexFile::kDexNoIndex16) {
+  if (!pClassDef.superclass_idx_.IsValid()) {
     superclassDescriptor = nullptr;
   } else {
     superclassDescriptor = pDexFile->StringByTypeIdx(pClassDef.superclass_idx_);
@@ -1564,13 +1564,13 @@
   // End of class.
   if (gOptions.outputFormat == OUTPUT_PLAIN) {
     const char* fileName;
-    if (pClassDef.source_file_idx_ != DexFile::kDexNoIndex) {
+    if (pClassDef.source_file_idx_.IsValid()) {
       fileName = pDexFile->StringDataByIdx(pClassDef.source_file_idx_);
     } else {
       fileName = "unknown";
     }
     fprintf(gOutFile, "  source_file_idx   : %d (%s)\n\n",
-            pClassDef.source_file_idx_, fileName);
+            pClassDef.source_file_idx_.index_, fileName);
   } else if (gOptions.outputFormat == OUTPUT_XML) {
     fprintf(gOutFile, "</class>\n");
   }
diff --git a/dexlayout/Android.bp b/dexlayout/Android.bp
index b9266f7..9ee9ebd 100644
--- a/dexlayout/Android.bp
+++ b/dexlayout/Android.bp
@@ -12,28 +12,46 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-art_cc_binary {
-    name: "dexlayout",
+art_cc_defaults {
+    name: "libart-dexlayout-defaults",
     host_supported: true,
     srcs: [
-        "dexlayout_main.cc",
         "dexlayout.cc",
         "dex_ir.cc",
         "dex_ir_builder.cc",
         "dex_visualize.cc",
         "dex_writer.cc",
     ],
+    export_include_dirs: ["."],
+    shared_libs: ["libbase"],
+    static_libs: ["libz"],
+}
+
+art_cc_library {
+    name: "libart-dexlayout",
+    defaults: ["libart-dexlayout-defaults"],
+    shared_libs: ["libart"],
+}
+
+art_cc_library {
+    name: "libartd-dexlayout",
+    defaults: ["libart-dexlayout-defaults"],
+    shared_libs: ["libartd"],
+}
+
+art_cc_binary {
+    name: "dexlayout",
+    host_supported: true,
+    srcs: ["dexlayout_main.cc"],
     cflags: ["-Wall"],
     shared_libs: [
         "libart",
-        "libbase",
+        "libart-dexlayout",
     ],
 }
 
 art_cc_test {
     name: "art_dexlayout_tests",
-    defaults: [
-        "art_gtest_defaults",
-    ],
+    defaults: ["art_gtest_defaults"],
     srcs: ["dexlayout_test.cc"],
 }
diff --git a/dexlayout/dex_ir.cc b/dexlayout/dex_ir.cc
index c3c763f..b1e66be 100644
--- a/dexlayout/dex_ir.cc
+++ b/dexlayout/dex_ir.cc
@@ -56,6 +56,36 @@
                     entry.end_address_, entry.reg_)));
 }
 
+static uint32_t GetCodeItemSize(const DexFile& dex_file, const DexFile::CodeItem& disk_code_item) {
+  uintptr_t code_item_start = reinterpret_cast<uintptr_t>(&disk_code_item);
+  uint32_t insns_size = disk_code_item.insns_size_in_code_units_;
+  uint32_t tries_size = disk_code_item.tries_size_;
+  if (tries_size == 0) {
+    uintptr_t insns_end = reinterpret_cast<uintptr_t>(&disk_code_item.insns_[insns_size]);
+    return insns_end - code_item_start;
+  } else {
+    uint32_t last_handler_off = 0;
+    for (uint32_t i = 0; i < tries_size; ++i) {
+      // Iterate over the try items to find the last catch handler.
+      const DexFile::TryItem* disk_try_item = dex_file.GetTryItems(disk_code_item, i);
+      uint16_t handler_off = disk_try_item->handler_off_;
+      if (handler_off > last_handler_off) {
+        last_handler_off = handler_off;
+      }
+    }
+    // Decode the final handler to see where it ends.
+    const uint8_t* handler_data = DexFile::GetCatchHandlerData(disk_code_item, last_handler_off);
+    int32_t uleb128_count = DecodeSignedLeb128(&handler_data) * 2;
+    if (uleb128_count <= 0) {
+      uleb128_count = -uleb128_count + 1;
+    }
+    for (int32_t i = 0; i < uleb128_count; ++i) {
+      DecodeUnsignedLeb128(&handler_data);
+    }
+    return reinterpret_cast<uintptr_t>(handler_data) - code_item_start;
+  }
+}
+
 static uint32_t GetDebugInfoStreamSize(const uint8_t* debug_info_stream) {
   const uint8_t* stream = debug_info_stream;
   DecodeUnsignedLeb128(&stream);  // line_start
@@ -310,7 +340,7 @@
 }
 
 void Collections::CreateStringId(const DexFile& dex_file, uint32_t i) {
-  const DexFile::StringId& disk_string_id = dex_file.GetStringId(i);
+  const DexFile::StringId& disk_string_id = dex_file.GetStringId(dex::StringIndex(i));
   StringData* string_data = new StringData(dex_file.GetStringData(disk_string_id));
   string_datas_.AddItem(string_data, disk_string_id.string_data_off_);
 
@@ -319,8 +349,8 @@
 }
 
 void Collections::CreateTypeId(const DexFile& dex_file, uint32_t i) {
-  const DexFile::TypeId& disk_type_id = dex_file.GetTypeId(i);
-  TypeId* type_id = new TypeId(GetStringId(disk_type_id.descriptor_idx_));
+  const DexFile::TypeId& disk_type_id = dex_file.GetTypeId(dex::TypeIndex(i));
+  TypeId* type_id = new TypeId(GetStringId(disk_type_id.descriptor_idx_.index_));
   type_ids_.AddIndexedItem(type_id, TypeIdsOffset() + i * TypeId::ItemSize(), i);
 }
 
@@ -329,38 +359,38 @@
   const DexFile::TypeList* type_list = dex_file.GetProtoParameters(disk_proto_id);
   TypeList* parameter_type_list = CreateTypeList(type_list, disk_proto_id.parameters_off_);
 
-  ProtoId* proto_id = new ProtoId(GetStringId(disk_proto_id.shorty_idx_),
-                                  GetTypeId(disk_proto_id.return_type_idx_),
+  ProtoId* proto_id = new ProtoId(GetStringId(disk_proto_id.shorty_idx_.index_),
+                                  GetTypeId(disk_proto_id.return_type_idx_.index_),
                                   parameter_type_list);
   proto_ids_.AddIndexedItem(proto_id, ProtoIdsOffset() + i * ProtoId::ItemSize(), i);
 }
 
 void Collections::CreateFieldId(const DexFile& dex_file, uint32_t i) {
   const DexFile::FieldId& disk_field_id = dex_file.GetFieldId(i);
-  FieldId* field_id = new FieldId(GetTypeId(disk_field_id.class_idx_),
-                                  GetTypeId(disk_field_id.type_idx_),
-                                  GetStringId(disk_field_id.name_idx_));
+  FieldId* field_id = new FieldId(GetTypeId(disk_field_id.class_idx_.index_),
+                                  GetTypeId(disk_field_id.type_idx_.index_),
+                                  GetStringId(disk_field_id.name_idx_.index_));
   field_ids_.AddIndexedItem(field_id, FieldIdsOffset() + i * FieldId::ItemSize(), i);
 }
 
 void Collections::CreateMethodId(const DexFile& dex_file, uint32_t i) {
   const DexFile::MethodId& disk_method_id = dex_file.GetMethodId(i);
-  MethodId* method_id = new MethodId(GetTypeId(disk_method_id.class_idx_),
+  MethodId* method_id = new MethodId(GetTypeId(disk_method_id.class_idx_.index_),
                                      GetProtoId(disk_method_id.proto_idx_),
-                                     GetStringId(disk_method_id.name_idx_));
+                                     GetStringId(disk_method_id.name_idx_.index_));
   method_ids_.AddIndexedItem(method_id, MethodIdsOffset() + i * MethodId::ItemSize(), i);
 }
 
 void Collections::CreateClassDef(const DexFile& dex_file, uint32_t i) {
   const DexFile::ClassDef& disk_class_def = dex_file.GetClassDef(i);
-  const TypeId* class_type = GetTypeId(disk_class_def.class_idx_);
+  const TypeId* class_type = GetTypeId(disk_class_def.class_idx_.index_);
   uint32_t access_flags = disk_class_def.access_flags_;
-  const TypeId* superclass = GetTypeIdOrNullPtr(disk_class_def.superclass_idx_);
+  const TypeId* superclass = GetTypeIdOrNullPtr(disk_class_def.superclass_idx_.index_);
 
   const DexFile::TypeList* type_list = dex_file.GetInterfacesList(disk_class_def);
   TypeList* interfaces_type_list = CreateTypeList(type_list, disk_class_def.interfaces_off_);
 
-  const StringId* source_file = GetStringIdOrNullPtr(disk_class_def.source_file_idx_);
+  const StringId* source_file = GetStringIdOrNullPtr(disk_class_def.source_file_idx_.index_);
   // Annotations.
   AnnotationsDirectoryItem* annotations = nullptr;
   const DexFile::AnnotationsDirectoryItem* disk_annotations_directory_item =
@@ -384,16 +414,14 @@
   if (dex_type_list == nullptr) {
     return nullptr;
   }
-  // TODO: Create more efficient lookup for existing type lists.
-  for (std::unique_ptr<TypeList>& type_list : TypeLists()) {
-    if (type_list->GetOffset() == offset) {
-      return type_list.get();
-    }
+  auto found_type_list = TypeLists().find(offset);
+  if (found_type_list != TypeLists().end()) {
+    return found_type_list->second.get();
   }
   TypeIdVector* type_vector = new TypeIdVector();
   uint32_t size = dex_type_list->Size();
   for (uint32_t index = 0; index < size; ++index) {
-    type_vector->push_back(GetTypeId(dex_type_list->GetTypeItem(index).type_idx_));
+    type_vector->push_back(GetTypeId(dex_type_list->GetTypeItem(index).type_idx_.index_));
   }
   TypeList* new_type_list = new TypeList(type_vector);
   type_lists_.AddItem(new_type_list, offset);
@@ -404,10 +432,9 @@
   if (static_data == nullptr) {
     return nullptr;
   }
-  for (std::unique_ptr<EncodedArrayItem>& existing_array_item : EncodedArrayItems()) {
-    if (existing_array_item->GetOffset() == offset) {
-      return existing_array_item.get();
-    }
+  auto found_encoded_array_item = EncodedArrayItems().find(offset);
+  if (found_encoded_array_item != EncodedArrayItems().end()) {
+    return found_encoded_array_item->second.get();
   }
   uint32_t size = DecodeUnsignedLeb128(&static_data);
   EncodedValueVector* values = new EncodedValueVector();
@@ -422,10 +449,9 @@
 
 AnnotationItem* Collections::CreateAnnotationItem(const DexFile::AnnotationItem* annotation,
                                                   uint32_t offset) {
-  for (std::unique_ptr<AnnotationItem>& existing_annotation_item : AnnotationItems()) {
-    if (existing_annotation_item->GetOffset() == offset) {
-      return existing_annotation_item.get();
-    }
+  auto found_annotation_item = AnnotationItems().find(offset);
+  if (found_annotation_item != AnnotationItems().end()) {
+    return found_annotation_item->second.get();
   }
   uint8_t visibility = annotation->visibility_;
   const uint8_t* annotation_data = annotation->annotation_;
@@ -444,10 +470,9 @@
   if (disk_annotations_item.size_ == 0 && offset == 0) {
     return nullptr;
   }
-  for (std::unique_ptr<AnnotationSetItem>& existing_anno_set_item : AnnotationSetItems()) {
-    if (existing_anno_set_item->GetOffset() == offset) {
-      return existing_anno_set_item.get();
-    }
+  auto found_anno_set_item = AnnotationSetItems().find(offset);
+  if (found_anno_set_item != AnnotationSetItems().end()) {
+    return found_anno_set_item->second.get();
   }
   std::vector<AnnotationItem*>* items = new std::vector<AnnotationItem*>();
   for (uint32_t i = 0; i < disk_annotations_item.size_; ++i) {
@@ -467,10 +492,9 @@
 
 AnnotationsDirectoryItem* Collections::CreateAnnotationsDirectoryItem(const DexFile& dex_file,
     const DexFile::AnnotationsDirectoryItem* disk_annotations_item, uint32_t offset) {
-  for (std::unique_ptr<AnnotationsDirectoryItem>& anno_dir_item : AnnotationsDirectoryItems()) {
-    if (anno_dir_item->GetOffset() == offset) {
-      return anno_dir_item.get();
-    }
+  auto found_anno_dir_item = AnnotationsDirectoryItems().find(offset);
+  if (found_anno_dir_item != AnnotationsDirectoryItems().end()) {
+    return found_anno_dir_item->second.get();
   }
   const DexFile::AnnotationSetItem* class_set_item =
       dex_file.GetClassAnnotationSet(disk_annotations_item);
@@ -535,11 +559,9 @@
     const DexFile& dex_file, MethodId* method_id,
     const DexFile::AnnotationSetRefList* annotation_set_ref_list, uint32_t offset) {
   AnnotationSetRefList* set_ref_list = nullptr;
-  for (std::unique_ptr<AnnotationSetRefList>& existing_set_ref_list : AnnotationSetRefLists()) {
-    if (existing_set_ref_list->GetOffset() == offset) {
-      set_ref_list = existing_set_ref_list.get();
-      break;
-    }
+  auto found_set_ref_list = AnnotationSetRefLists().find(offset);
+  if (found_set_ref_list != AnnotationSetRefLists().end()) {
+    set_ref_list = found_set_ref_list->second.get();
   }
   if (set_ref_list == nullptr) {
     std::vector<AnnotationSetItem*>* annotations = new std::vector<AnnotationSetItem*>();
@@ -597,8 +619,8 @@
         bool catch_all = false;
         TypeAddrPairVector* addr_pairs = new TypeAddrPairVector();
         for (CatchHandlerIterator it(disk_code_item, *disk_try_item); it.HasNext(); it.Next()) {
-          const uint16_t type_index = it.GetHandlerTypeIndex();
-          const TypeId* type_id = GetTypeIdOrNullPtr(type_index);
+          const dex::TypeIndex type_index = it.GetHandlerTypeIndex();
+          const TypeId* type_id = GetTypeIdOrNullPtr(type_index.index_);
           catch_all |= type_id == nullptr;
           addr_pairs->push_back(std::unique_ptr<const TypeAddrPair>(
               new TypeAddrPair(type_id, it.GetHandlerAddress())));
@@ -610,9 +632,10 @@
       tries->push_back(std::unique_ptr<const TryItem>(try_item));
     }
   }
-  // TODO: Calculate the size of the code item.
+  uint32_t size = GetCodeItemSize(dex_file, disk_code_item);
   CodeItem* code_item = new CodeItem(
       registers_size, ins_size, outs_size, debug_info, insns_size, insns, tries, handler_list);
+  code_item->SetSize(size);
   code_items_.AddItem(code_item, offset);
   // Add "fixup" references to types, strings, methods, and fields.
   // This is temporary, as we will probably want more detailed parsing of the
@@ -690,8 +713,8 @@
       virtual_methods->push_back(
           std::unique_ptr<MethodItem>(GenerateMethodItem(dex_file, cdii)));
     }
-    // TODO: Calculate the size of the class data.
     class_data = new ClassData(static_fields, instance_fields, direct_methods, virtual_methods);
+    class_data->SetSize(cdii.EndDataPointer() - encoded_data);
     class_datas_.AddItem(class_data, offset);
   }
   return class_data;
diff --git a/dexlayout/dex_ir.h b/dexlayout/dex_ir.h
index 38eb0b1..a2d1190 100644
--- a/dexlayout/dex_ir.h
+++ b/dexlayout/dex_ir.h
@@ -19,6 +19,7 @@
 #ifndef ART_DEXLAYOUT_DEX_IR_H_
 #define ART_DEXLAYOUT_DEX_IR_H_
 
+#include <map>
 #include <vector>
 #include <stdint.h>
 
@@ -98,34 +99,52 @@
 };
 
 // Collections become owners of the objects added by moving them into unique pointers.
-template<class T> class CollectionWithOffset {
+template<class T> class CollectionBase {
  public:
-  CollectionWithOffset() = default;
-  std::vector<std::unique_ptr<T>>& Collection() { return collection_; }
-  // Read-time support methods
-  void AddItem(T* object, uint32_t offset) {
-    object->SetOffset(offset);
-    collection_.push_back(std::unique_ptr<T>(object));
-  }
+  CollectionBase() = default;
+
+  uint32_t GetOffset() const { return offset_; }
+  void SetOffset(uint32_t new_offset) { offset_ = new_offset; }
+
+ private:
+  uint32_t offset_ = 0;
+
+  DISALLOW_COPY_AND_ASSIGN(CollectionBase);
+};
+
+template<class T> class CollectionVector : public CollectionBase<T> {
+ public:
+  CollectionVector() = default;
+
   void AddIndexedItem(T* object, uint32_t offset, uint32_t index) {
     object->SetOffset(offset);
     object->SetIndex(index);
     collection_.push_back(std::unique_ptr<T>(object));
   }
-  // Ordinary object insertion into collection.
-  void Insert(T object ATTRIBUTE_UNUSED) {
-    // TODO(sehr): add ordered insertion support.
-    UNIMPLEMENTED(FATAL) << "Insertion not ready";
-  }
-  uint32_t GetOffset() const { return offset_; }
-  void SetOffset(uint32_t new_offset) { offset_ = new_offset; }
   uint32_t Size() const { return collection_.size(); }
+  std::vector<std::unique_ptr<T>>& Collection() { return collection_; }
 
  private:
   std::vector<std::unique_ptr<T>> collection_;
-  uint32_t offset_ = 0;
 
-  DISALLOW_COPY_AND_ASSIGN(CollectionWithOffset);
+  DISALLOW_COPY_AND_ASSIGN(CollectionVector);
+};
+
+template<class T> class CollectionMap : public CollectionBase<T> {
+ public:
+  CollectionMap() = default;
+
+  void AddItem(T* object, uint32_t offset) {
+    object->SetOffset(offset);
+    collection_.emplace(offset, std::unique_ptr<T>(object));
+  }
+  uint32_t Size() const { return collection_.size(); }
+  std::map<uint32_t, std::unique_ptr<T>>& Collection() { return collection_; }
+
+ private:
+  std::map<uint32_t, std::unique_ptr<T>> collection_;
+
+  DISALLOW_COPY_AND_ASSIGN(CollectionMap);
 };
 
 class Collections {
@@ -138,22 +157,23 @@
   std::vector<std::unique_ptr<FieldId>>& FieldIds() { return field_ids_.Collection(); }
   std::vector<std::unique_ptr<MethodId>>& MethodIds() { return method_ids_.Collection(); }
   std::vector<std::unique_ptr<ClassDef>>& ClassDefs() { return class_defs_.Collection(); }
-  std::vector<std::unique_ptr<StringData>>& StringDatas() { return string_datas_.Collection(); }
-  std::vector<std::unique_ptr<TypeList>>& TypeLists() { return type_lists_.Collection(); }
-  std::vector<std::unique_ptr<EncodedArrayItem>>& EncodedArrayItems()
+  std::map<uint32_t, std::unique_ptr<StringData>>& StringDatas()
+      { return string_datas_.Collection(); }
+  std::map<uint32_t, std::unique_ptr<TypeList>>& TypeLists() { return type_lists_.Collection(); }
+  std::map<uint32_t, std::unique_ptr<EncodedArrayItem>>& EncodedArrayItems()
       { return encoded_array_items_.Collection(); }
-  std::vector<std::unique_ptr<AnnotationItem>>& AnnotationItems()
+  std::map<uint32_t, std::unique_ptr<AnnotationItem>>& AnnotationItems()
       { return annotation_items_.Collection(); }
-  std::vector<std::unique_ptr<AnnotationSetItem>>& AnnotationSetItems()
+  std::map<uint32_t, std::unique_ptr<AnnotationSetItem>>& AnnotationSetItems()
       { return annotation_set_items_.Collection(); }
-  std::vector<std::unique_ptr<AnnotationSetRefList>>& AnnotationSetRefLists()
+  std::map<uint32_t, std::unique_ptr<AnnotationSetRefList>>& AnnotationSetRefLists()
       { return annotation_set_ref_lists_.Collection(); }
-  std::vector<std::unique_ptr<AnnotationsDirectoryItem>>& AnnotationsDirectoryItems()
+  std::map<uint32_t, std::unique_ptr<AnnotationsDirectoryItem>>& AnnotationsDirectoryItems()
       { return annotations_directory_items_.Collection(); }
-  std::vector<std::unique_ptr<DebugInfoItem>>& DebugInfoItems()
+  std::map<uint32_t, std::unique_ptr<DebugInfoItem>>& DebugInfoItems()
       { return debug_info_items_.Collection(); }
-  std::vector<std::unique_ptr<CodeItem>>& CodeItems() { return code_items_.Collection(); }
-  std::vector<std::unique_ptr<ClassData>>& ClassDatas() { return class_datas_.Collection(); }
+  std::map<uint32_t, std::unique_ptr<CodeItem>>& CodeItems() { return code_items_.Collection(); }
+  std::map<uint32_t, std::unique_ptr<ClassData>>& ClassDatas() { return class_datas_.Collection(); }
 
   void CreateStringId(const DexFile& dex_file, uint32_t i);
   void CreateTypeId(const DexFile& dex_file, uint32_t i);
@@ -204,7 +224,7 @@
   uint32_t DebugInfoItemsOffset() const { return debug_info_items_.GetOffset(); }
   uint32_t CodeItemsOffset() const { return code_items_.GetOffset(); }
   uint32_t ClassDatasOffset() const { return class_datas_.GetOffset(); }
-  uint32_t MapItemOffset() const { return map_item_offset_; }
+  uint32_t MapListOffset() const { return map_list_offset_; }
 
   void SetStringIdsOffset(uint32_t new_offset) { string_ids_.SetOffset(new_offset); }
   void SetTypeIdsOffset(uint32_t new_offset) { type_ids_.SetOffset(new_offset); }
@@ -226,7 +246,7 @@
   void SetDebugInfoItemsOffset(uint32_t new_offset) { debug_info_items_.SetOffset(new_offset); }
   void SetCodeItemsOffset(uint32_t new_offset) { code_items_.SetOffset(new_offset); }
   void SetClassDatasOffset(uint32_t new_offset) { class_datas_.SetOffset(new_offset); }
-  void SetMapItemOffset(uint32_t new_offset) { map_item_offset_ = new_offset; }
+  void SetMapListOffset(uint32_t new_offset) { map_list_offset_ = new_offset; }
 
   uint32_t StringIdsSize() const { return string_ids_.Size(); }
   uint32_t TypeIdsSize() const { return type_ids_.Size(); }
@@ -254,25 +274,25 @@
       const DexFile::AnnotationSetRefList* annotation_set_ref_list, uint32_t offset);
   MethodItem* GenerateMethodItem(const DexFile& dex_file, ClassDataItemIterator& cdii);
 
-  CollectionWithOffset<StringId> string_ids_;
-  CollectionWithOffset<TypeId> type_ids_;
-  CollectionWithOffset<ProtoId> proto_ids_;
-  CollectionWithOffset<FieldId> field_ids_;
-  CollectionWithOffset<MethodId> method_ids_;
-  CollectionWithOffset<ClassDef> class_defs_;
+  CollectionVector<StringId> string_ids_;
+  CollectionVector<TypeId> type_ids_;
+  CollectionVector<ProtoId> proto_ids_;
+  CollectionVector<FieldId> field_ids_;
+  CollectionVector<MethodId> method_ids_;
+  CollectionVector<ClassDef> class_defs_;
 
-  CollectionWithOffset<StringData> string_datas_;
-  CollectionWithOffset<TypeList> type_lists_;
-  CollectionWithOffset<EncodedArrayItem> encoded_array_items_;
-  CollectionWithOffset<AnnotationItem> annotation_items_;
-  CollectionWithOffset<AnnotationSetItem> annotation_set_items_;
-  CollectionWithOffset<AnnotationSetRefList> annotation_set_ref_lists_;
-  CollectionWithOffset<AnnotationsDirectoryItem> annotations_directory_items_;
-  CollectionWithOffset<DebugInfoItem> debug_info_items_;
-  CollectionWithOffset<CodeItem> code_items_;
-  CollectionWithOffset<ClassData> class_datas_;
+  CollectionMap<StringData> string_datas_;
+  CollectionMap<TypeList> type_lists_;
+  CollectionMap<EncodedArrayItem> encoded_array_items_;
+  CollectionMap<AnnotationItem> annotation_items_;
+  CollectionMap<AnnotationSetItem> annotation_set_items_;
+  CollectionMap<AnnotationSetRefList> annotation_set_ref_lists_;
+  CollectionMap<AnnotationsDirectoryItem> annotations_directory_items_;
+  CollectionMap<DebugInfoItem> debug_info_items_;
+  CollectionMap<CodeItem> code_items_;
+  CollectionMap<ClassData> class_datas_;
 
-  uint32_t map_item_offset_ = 0;
+  uint32_t map_list_offset_ = 0;
 
   DISALLOW_COPY_AND_ASSIGN(Collections);
 };
@@ -539,20 +559,20 @@
 
 class MethodItem : public Item {
  public:
-  MethodItem(uint32_t access_flags, const MethodId* method_id, const CodeItem* code)
+  MethodItem(uint32_t access_flags, const MethodId* method_id, CodeItem* code)
       : access_flags_(access_flags), method_id_(method_id), code_(code) { }
   ~MethodItem() OVERRIDE { }
 
   uint32_t GetAccessFlags() const { return access_flags_; }
   const MethodId* GetMethodId() const { return method_id_; }
-  const CodeItem* GetCodeItem() const { return code_; }
+  CodeItem* GetCodeItem() { return code_; }
 
   void Accept(AbstractDispatcher* dispatch) { dispatch->Dispatch(this); }
 
  private:
   uint32_t access_flags_;
   const MethodId* method_id_;
-  const CodeItem* code_;  // This can be nullptr.
+  CodeItem* code_;  // This can be nullptr.
 
   DISALLOW_COPY_AND_ASSIGN(MethodItem);
 };
diff --git a/dexlayout/dex_ir_builder.cc b/dexlayout/dex_ir_builder.cc
index 68ff2a2..d0c5bf9 100644
--- a/dexlayout/dex_ir_builder.cc
+++ b/dexlayout/dex_ir_builder.cc
@@ -71,7 +71,7 @@
     collections.CreateClassDef(dex_file, i);
   }
   // MapItem.
-  collections.SetMapItemOffset(disk_header.map_off_);
+  collections.SetMapListOffset(disk_header.map_off_);
 
   CheckAndSetRemainingOffsets(dex_file, &collections);
 
diff --git a/dexlayout/dex_visualize.cc b/dexlayout/dex_visualize.cc
index bc9ca6d..02274b2 100644
--- a/dexlayout/dex_visualize.cc
+++ b/dexlayout/dex_visualize.cc
@@ -263,11 +263,13 @@
     DumpStringId(method_id->Name(), class_index);
   }
 
-  void DumpMethodItem(const dex_ir::MethodItem* method, const DexFile* dex_file, int class_index) {
-    if (profile_info_ != nullptr) {
+  void DumpMethodItem(dex_ir::MethodItem* method,
+                      const DexFile* dex_file,
+                      int class_index,
+                      ProfileCompilationInfo* profile_info) {
+    if (profile_info != nullptr) {
       uint32_t method_idx = method->GetMethodId()->GetIndex();
-      MethodReference mr(dex_file, method_idx);
-      if (!profile_info_->ContainsMethod(mr)) {
+      if (!profile_info->ContainsMethod(MethodReference(dex_file, method_idx))) {
         return;
       }
     }
@@ -344,13 +346,17 @@
  * Dumps a gnuplot data file showing the parts of the dex_file that belong to each class.
  * If profiling information is present, it dumps only those classes that are marked as hot.
  */
-void VisualizeDexLayout(dex_ir::Header* header, const DexFile* dex_file, size_t dex_file_index) {
+void VisualizeDexLayout(dex_ir::Header* header,
+                        const DexFile* dex_file,
+                        size_t dex_file_index,
+                        ProfileCompilationInfo* profile_info) {
   std::unique_ptr<Dumper> dumper(new Dumper(header->GetCollections(), dex_file_index));
 
   const uint32_t class_defs_size = header->GetCollections().ClassDefsSize();
   for (uint32_t class_index = 0; class_index < class_defs_size; class_index++) {
     dex_ir::ClassDef* class_def = header->GetCollections().GetClassDef(class_index);
-    if (profile_info_ != nullptr && !profile_info_->ContainsClass(*dex_file, class_index)) {
+    dex::TypeIndex type_idx(class_def->ClassType()->GetIndex());
+    if (profile_info != nullptr && !profile_info->ContainsClass(*dex_file, type_idx)) {
       continue;
     }
     dumper->DumpAddressRange(class_def, class_index);
@@ -383,12 +389,12 @@
       }
       if (class_data->DirectMethods()) {
         for (auto& method_item : *class_data->DirectMethods()) {
-          dumper->DumpMethodItem(method_item.get(), dex_file, class_index);
+          dumper->DumpMethodItem(method_item.get(), dex_file, class_index, profile_info);
         }
       }
       if (class_data->VirtualMethods()) {
         for (auto& method_item : *class_data->VirtualMethods()) {
-          dumper->DumpMethodItem(method_item.get(), dex_file, class_index);
+          dumper->DumpMethodItem(method_item.get(), dex_file, class_index, profile_info);
         }
       }
     }
diff --git a/dexlayout/dex_visualize.h b/dexlayout/dex_visualize.h
index b1d2ed7..09f8306 100644
--- a/dexlayout/dex_visualize.h
+++ b/dexlayout/dex_visualize.h
@@ -28,11 +28,15 @@
 namespace art {
 
 class DexFile;
+class ProfileCompilationInfo;
 namespace dex_ir {
 class Header;
 }  // namespace dex_ir
 
-void VisualizeDexLayout(dex_ir::Header* header, const DexFile* dex_file, size_t dex_file_index);
+void VisualizeDexLayout(dex_ir::Header* header,
+                        const DexFile* dex_file,
+                        size_t dex_file_index,
+                        ProfileCompilationInfo* profile_info);
 
 }  // namespace art
 
diff --git a/dexlayout/dex_writer.cc b/dexlayout/dex_writer.cc
index dba5da0..7ffa38b 100644
--- a/dexlayout/dex_writer.cc
+++ b/dexlayout/dex_writer.cc
@@ -104,7 +104,9 @@
 }
 
 size_t DexWriter::Write(const void* buffer, size_t length, size_t offset) {
-  return dex_file_->PwriteFully(buffer, length, offset) ? length : 0;
+  DCHECK_LE(offset + length, mem_map_->Size());
+  memcpy(mem_map_->Begin() + offset, buffer, length);
+  return length;
 }
 
 size_t DexWriter::WriteSleb128(uint32_t value, size_t offset) {
@@ -236,12 +238,13 @@
 
 void DexWriter::WriteStrings() {
   uint32_t string_data_off[1];
-  for (std::unique_ptr<dex_ir::StringId>& string_id : header_.GetCollections().StringIds()) {
+  for (std::unique_ptr<dex_ir::StringId>& string_id : header_->GetCollections().StringIds()) {
     string_data_off[0] = string_id->DataItem()->GetOffset();
     Write(string_data_off, string_id->GetSize(), string_id->GetOffset());
   }
 
-  for (std::unique_ptr<dex_ir::StringData>& string_data : header_.GetCollections().StringDatas()) {
+  for (auto& string_data_pair : header_->GetCollections().StringDatas()) {
+    std::unique_ptr<dex_ir::StringData>& string_data = string_data_pair.second;
     uint32_t offset = string_data->GetOffset();
     offset += WriteUleb128(CountModifiedUtf8Chars(string_data->Data()), offset);
     Write(string_data->Data(), strlen(string_data->Data()), offset);
@@ -250,7 +253,7 @@
 
 void DexWriter::WriteTypes() {
   uint32_t descriptor_idx[1];
-  for (std::unique_ptr<dex_ir::TypeId>& type_id : header_.GetCollections().TypeIds()) {
+  for (std::unique_ptr<dex_ir::TypeId>& type_id : header_->GetCollections().TypeIds()) {
     descriptor_idx[0] = type_id->GetStringId()->GetIndex();
     Write(descriptor_idx, type_id->GetSize(), type_id->GetOffset());
   }
@@ -259,7 +262,8 @@
 void DexWriter::WriteTypeLists() {
   uint32_t size[1];
   uint16_t list[1];
-  for (std::unique_ptr<dex_ir::TypeList>& type_list : header_.GetCollections().TypeLists()) {
+  for (auto& type_list_pair : header_->GetCollections().TypeLists()) {
+    std::unique_ptr<dex_ir::TypeList>& type_list = type_list_pair.second;
     size[0] = type_list->GetTypeList()->size();
     uint32_t offset = type_list->GetOffset();
     offset += Write(size, sizeof(uint32_t), offset);
@@ -272,7 +276,7 @@
 
 void DexWriter::WriteProtos() {
   uint32_t buffer[3];
-  for (std::unique_ptr<dex_ir::ProtoId>& proto_id : header_.GetCollections().ProtoIds()) {
+  for (std::unique_ptr<dex_ir::ProtoId>& proto_id : header_->GetCollections().ProtoIds()) {
     buffer[0] = proto_id->Shorty()->GetIndex();
     buffer[1] = proto_id->ReturnType()->GetIndex();
     buffer[2] = proto_id->Parameters() == nullptr ? 0 : proto_id->Parameters()->GetOffset();
@@ -282,7 +286,7 @@
 
 void DexWriter::WriteFields() {
   uint16_t buffer[4];
-  for (std::unique_ptr<dex_ir::FieldId>& field_id : header_.GetCollections().FieldIds()) {
+  for (std::unique_ptr<dex_ir::FieldId>& field_id : header_->GetCollections().FieldIds()) {
     buffer[0] = field_id->Class()->GetIndex();
     buffer[1] = field_id->Type()->GetIndex();
     buffer[2] = field_id->Name()->GetIndex();
@@ -293,7 +297,7 @@
 
 void DexWriter::WriteMethods() {
   uint16_t buffer[4];
-  for (std::unique_ptr<dex_ir::MethodId>& method_id : header_.GetCollections().MethodIds()) {
+  for (std::unique_ptr<dex_ir::MethodId>& method_id : header_->GetCollections().MethodIds()) {
     buffer[0] = method_id->Class()->GetIndex();
     buffer[1] = method_id->Proto()->GetIndex();
     buffer[2] = method_id->Name()->GetIndex();
@@ -303,16 +307,16 @@
 }
 
 void DexWriter::WriteEncodedArrays() {
-  for (std::unique_ptr<dex_ir::EncodedArrayItem>& encoded_array :
-      header_.GetCollections().EncodedArrayItems()) {
+  for (auto& encoded_array_pair : header_->GetCollections().EncodedArrayItems()) {
+    std::unique_ptr<dex_ir::EncodedArrayItem>& encoded_array = encoded_array_pair.second;
     WriteEncodedArray(encoded_array->GetEncodedValues(), encoded_array->GetOffset());
   }
 }
 
 void DexWriter::WriteAnnotations() {
   uint8_t visibility[1];
-  for (std::unique_ptr<dex_ir::AnnotationItem>& annotation :
-      header_.GetCollections().AnnotationItems()) {
+  for (auto& annotation_pair : header_->GetCollections().AnnotationItems()) {
+    std::unique_ptr<dex_ir::AnnotationItem>& annotation = annotation_pair.second;
     visibility[0] = annotation->GetVisibility();
     size_t offset = annotation->GetOffset();
     offset += Write(visibility, sizeof(uint8_t), offset);
@@ -323,8 +327,8 @@
 void DexWriter::WriteAnnotationSets() {
   uint32_t size[1];
   uint32_t annotation_off[1];
-  for (std::unique_ptr<dex_ir::AnnotationSetItem>& annotation_set :
-      header_.GetCollections().AnnotationSetItems()) {
+  for (auto& annotation_set_pair : header_->GetCollections().AnnotationSetItems()) {
+    std::unique_ptr<dex_ir::AnnotationSetItem>& annotation_set = annotation_set_pair.second;
     size[0] = annotation_set->GetItems()->size();
     size_t offset = annotation_set->GetOffset();
     offset += Write(size, sizeof(uint32_t), offset);
@@ -338,8 +342,8 @@
 void DexWriter::WriteAnnotationSetRefs() {
   uint32_t size[1];
   uint32_t annotations_off[1];
-  for (std::unique_ptr<dex_ir::AnnotationSetRefList>& annotation_set_ref :
-        header_.GetCollections().AnnotationSetRefLists()) {
+  for (auto& anno_set_ref_pair : header_->GetCollections().AnnotationSetRefLists()) {
+    std::unique_ptr<dex_ir::AnnotationSetRefList>& annotation_set_ref = anno_set_ref_pair.second;
     size[0] = annotation_set_ref->GetItems()->size();
     size_t offset = annotation_set_ref->GetOffset();
     offset += Write(size, sizeof(uint32_t), offset);
@@ -353,8 +357,9 @@
 void DexWriter::WriteAnnotationsDirectories() {
   uint32_t directory_buffer[4];
   uint32_t annotation_buffer[2];
-  for (std::unique_ptr<dex_ir::AnnotationsDirectoryItem>& annotations_directory :
-          header_.GetCollections().AnnotationsDirectoryItems()) {
+  for (auto& annotations_directory_pair : header_->GetCollections().AnnotationsDirectoryItems()) {
+    std::unique_ptr<dex_ir::AnnotationsDirectoryItem>& annotations_directory =
+        annotations_directory_pair.second;
     directory_buffer[0] = annotations_directory->GetClassAnnotation() == nullptr ? 0 :
         annotations_directory->GetClassAnnotation()->GetOffset();
     directory_buffer[1] = annotations_directory->GetFieldAnnotations() == nullptr ? 0 :
@@ -393,15 +398,17 @@
 }
 
 void DexWriter::WriteDebugInfoItems() {
-  for (std::unique_ptr<dex_ir::DebugInfoItem>& info : header_.GetCollections().DebugInfoItems()) {
-    Write(info->GetDebugInfo(), info->GetDebugInfoSize(), info->GetOffset());
+  for (auto& debug_info_pair : header_->GetCollections().DebugInfoItems()) {
+    std::unique_ptr<dex_ir::DebugInfoItem>& debug_info = debug_info_pair.second;
+    Write(debug_info->GetDebugInfo(), debug_info->GetDebugInfoSize(), debug_info->GetOffset());
   }
 }
 
 void DexWriter::WriteCodeItems() {
   uint16_t uint16_buffer[4];
   uint32_t uint32_buffer[2];
-  for (std::unique_ptr<dex_ir::CodeItem>& code_item : header_.GetCollections().CodeItems()) {
+  for (auto& code_item_pair : header_->GetCollections().CodeItems()) {
+    std::unique_ptr<dex_ir::CodeItem>& code_item = code_item_pair.second;
     uint16_buffer[0] = code_item->RegistersSize();
     uint16_buffer[1] = code_item->InsSize();
     uint16_buffer[2] = code_item->OutsSize();
@@ -446,7 +453,7 @@
 
 void DexWriter::WriteClasses() {
   uint32_t class_def_buffer[8];
-  for (std::unique_ptr<dex_ir::ClassDef>& class_def : header_.GetCollections().ClassDefs()) {
+  for (std::unique_ptr<dex_ir::ClassDef>& class_def : header_->GetCollections().ClassDefs()) {
     class_def_buffer[0] = class_def->ClassType()->GetIndex();
     class_def_buffer[1] = class_def->GetAccessFlags();
     class_def_buffer[2] = class_def->Superclass() == nullptr ? DexFile::kDexNoIndex :
@@ -464,7 +471,8 @@
     Write(class_def_buffer, class_def->GetSize(), offset);
   }
 
-  for (std::unique_ptr<dex_ir::ClassData>& class_data : header_.GetCollections().ClassDatas()) {
+  for (auto& class_data_pair : header_->GetCollections().ClassDatas()) {
+    std::unique_ptr<dex_ir::ClassData>& class_data = class_data_pair.second;
     size_t offset = class_data->GetOffset();
     offset += WriteUleb128(class_data->StaticFields()->size(), offset);
     offset += WriteUleb128(class_data->InstanceFields()->size(), offset);
@@ -491,7 +499,7 @@
 };
 
 void DexWriter::WriteMapItem() {
-  dex_ir::Collections& collection = header_.GetCollections();
+  dex_ir::Collections& collection = header_->GetCollections();
   std::priority_queue<MapItemContainer> queue;
 
   // Header and index section.
@@ -522,7 +530,7 @@
   }
 
   // Data section.
-  queue.push(MapItemContainer(DexFile::kDexTypeMapList, 1, collection.MapItemOffset()));
+  queue.push(MapItemContainer(DexFile::kDexTypeMapList, 1, collection.MapListOffset()));
   if (collection.TypeListsSize() != 0) {
     queue.push(MapItemContainer(DexFile::kDexTypeTypeList, collection.TypeListsSize(),
         collection.TypeListsOffset()));
@@ -564,7 +572,7 @@
         collection.AnnotationsDirectoryItemsSize(), collection.AnnotationsDirectoryItemsOffset()));
   }
 
-  uint32_t offset = collection.MapItemOffset();
+  uint32_t offset = collection.MapListOffset();
   uint16_t uint16_buffer[2];
   uint32_t uint32_buffer[2];
   uint16_buffer[1] = 0;
@@ -583,19 +591,19 @@
 
 void DexWriter::WriteHeader() {
   uint32_t buffer[20];
-  dex_ir::Collections& collections = header_.GetCollections();
+  dex_ir::Collections& collections = header_->GetCollections();
   size_t offset = 0;
-  offset += Write(header_.Magic(), 8 * sizeof(uint8_t), offset);
-  buffer[0] = header_.Checksum();
+  offset += Write(header_->Magic(), 8 * sizeof(uint8_t), offset);
+  buffer[0] = header_->Checksum();
   offset += Write(buffer, sizeof(uint32_t), offset);
-  offset += Write(header_.Signature(), 20 * sizeof(uint8_t), offset);
-  uint32_t file_size = header_.FileSize();
+  offset += Write(header_->Signature(), 20 * sizeof(uint8_t), offset);
+  uint32_t file_size = header_->FileSize();
   buffer[0] = file_size;
-  buffer[1] = header_.GetSize();
-  buffer[2] = header_.EndianTag();
-  buffer[3] = header_.LinkSize();
-  buffer[4] = header_.LinkOffset();
-  buffer[5] = collections.MapItemOffset();
+  buffer[1] = header_->GetSize();
+  buffer[2] = header_->EndianTag();
+  buffer[3] = header_->LinkSize();
+  buffer[4] = header_->LinkOffset();
+  buffer[5] = collections.MapListOffset();
   buffer[6] = collections.StringIdsSize();
   buffer[7] = collections.StringIdsOffset();
   buffer[8] = collections.TypeIdsSize();
@@ -617,12 +625,7 @@
   Write(buffer, 20 * sizeof(uint32_t), offset);
 }
 
-void DexWriter::WriteFile() {
-  if (dex_file_.get() == nullptr) {
-    fprintf(stderr, "Can't open output dex file\n");
-    return;
-  }
-
+void DexWriter::WriteMemMap() {
   WriteStrings();
   WriteTypes();
   WriteTypeLists();
@@ -641,8 +644,9 @@
   WriteHeader();
 }
 
-void DexWriter::OutputDexFile(dex_ir::Header& header, const char* file_name) {
-  (new DexWriter(header, file_name))->WriteFile();
+void DexWriter::Output(dex_ir::Header* header, MemMap* mem_map) {
+  DexWriter dex_writer(header, mem_map);
+  dex_writer.WriteMemMap();
 }
 
 }  // namespace art
diff --git a/dexlayout/dex_writer.h b/dexlayout/dex_writer.h
index 9104295..fb76e5c 100644
--- a/dexlayout/dex_writer.h
+++ b/dexlayout/dex_writer.h
@@ -21,19 +21,19 @@
 
 #include "base/unix_file/fd_file.h"
 #include "dex_ir.h"
+#include "mem_map.h"
 #include "os.h"
 
 namespace art {
 
 class DexWriter {
  public:
-  DexWriter(dex_ir::Header& header, const char* file_name) : header_(header),
-      dex_file_(OS::CreateEmptyFileWriteOnly(file_name)) { }
+  DexWriter(dex_ir::Header* header, MemMap* mem_map) : header_(header), mem_map_(mem_map) { }
 
-  static void OutputDexFile(dex_ir::Header& header, const char* file_name);
+  static void Output(dex_ir::Header* header, MemMap* mem_map);
 
  private:
-  void WriteFile();
+  void WriteMemMap();
 
   size_t Write(const void* buffer, size_t length, size_t offset);
   size_t WriteSleb128(uint32_t value, size_t offset);
@@ -62,13 +62,12 @@
   void WriteMapItem();
   void WriteHeader();
 
-  dex_ir::Header& header_;
-  std::unique_ptr<File> dex_file_;
+  dex_ir::Header* const header_;
+  MemMap* const mem_map_;
 
   DISALLOW_COPY_AND_ASSIGN(DexWriter);
 };
 
-
 }  // namespace art
 
 #endif  // ART_DEXLAYOUT_DEX_WRITER_H_
diff --git a/dexlayout/dexlayout.cc b/dexlayout/dexlayout.cc
index 2b30a1b..cfe4837 100644
--- a/dexlayout/dexlayout.cc
+++ b/dexlayout/dexlayout.cc
@@ -37,27 +37,13 @@
 #include "dex_visualize.h"
 #include "dex_writer.h"
 #include "jit/offline_profiling_info.h"
+#include "mem_map.h"
 #include "os.h"
 #include "utils.h"
 
 namespace art {
 
 /*
- * Options parsed in main driver.
- */
-struct Options options_;
-
-/*
- * Output file. Defaults to stdout.
- */
-FILE* out_file_ = stdout;
-
-/*
- * Profile information file.
- */
-ProfileCompilationInfo* profile_info_ = nullptr;
-
-/*
  * Flags for use with createAccessFlagStr().
  */
 enum AccessFor {
@@ -301,420 +287,65 @@
 /*
  * Dumps a string value with some escape characters.
  */
-static void DumpEscapedString(const char* p) {
-  fputs("\"", out_file_);
+static void DumpEscapedString(const char* p, FILE* out_file) {
+  fputs("\"", out_file);
   for (; *p; p++) {
     switch (*p) {
       case '\\':
-        fputs("\\\\", out_file_);
+        fputs("\\\\", out_file);
         break;
       case '\"':
-        fputs("\\\"", out_file_);
+        fputs("\\\"", out_file);
         break;
       case '\t':
-        fputs("\\t", out_file_);
+        fputs("\\t", out_file);
         break;
       case '\n':
-        fputs("\\n", out_file_);
+        fputs("\\n", out_file);
         break;
       case '\r':
-        fputs("\\r", out_file_);
+        fputs("\\r", out_file);
         break;
       default:
-        putc(*p, out_file_);
+        putc(*p, out_file);
     }  // switch
   }  // for
-  fputs("\"", out_file_);
+  fputs("\"", out_file);
 }
 
 /*
  * Dumps a string as an XML attribute value.
  */
-static void DumpXmlAttribute(const char* p) {
+static void DumpXmlAttribute(const char* p, FILE* out_file) {
   for (; *p; p++) {
     switch (*p) {
       case '&':
-        fputs("&amp;", out_file_);
+        fputs("&amp;", out_file);
         break;
       case '<':
-        fputs("&lt;", out_file_);
+        fputs("&lt;", out_file);
         break;
       case '>':
-        fputs("&gt;", out_file_);
+        fputs("&gt;", out_file);
         break;
       case '"':
-        fputs("&quot;", out_file_);
+        fputs("&quot;", out_file);
         break;
       case '\t':
-        fputs("&#x9;", out_file_);
+        fputs("&#x9;", out_file);
         break;
       case '\n':
-        fputs("&#xA;", out_file_);
+        fputs("&#xA;", out_file);
         break;
       case '\r':
-        fputs("&#xD;", out_file_);
+        fputs("&#xD;", out_file);
         break;
       default:
-        putc(*p, out_file_);
+        putc(*p, out_file);
     }  // switch
   }  // for
 }
 
-// Forward declare to resolve circular dependence.
-static void DumpEncodedValue(const dex_ir::EncodedValue* data);
-
-/*
- * Dumps encoded annotation.
- */
-static void DumpEncodedAnnotation(dex_ir::EncodedAnnotation* annotation) {
-  fputs(annotation->GetType()->GetStringId()->Data(), out_file_);
-  // Display all name=value pairs.
-  for (auto& subannotation : *annotation->GetAnnotationElements()) {
-    fputc(' ', out_file_);
-    fputs(subannotation->GetName()->Data(), out_file_);
-    fputc('=', out_file_);
-    DumpEncodedValue(subannotation->GetValue());
-  }
-}
-/*
- * Dumps encoded value.
- */
-static void DumpEncodedValue(const dex_ir::EncodedValue* data) {
-  switch (data->Type()) {
-    case DexFile::kDexAnnotationByte:
-      fprintf(out_file_, "%" PRId8, data->GetByte());
-      break;
-    case DexFile::kDexAnnotationShort:
-      fprintf(out_file_, "%" PRId16, data->GetShort());
-      break;
-    case DexFile::kDexAnnotationChar:
-      fprintf(out_file_, "%" PRIu16, data->GetChar());
-      break;
-    case DexFile::kDexAnnotationInt:
-      fprintf(out_file_, "%" PRId32, data->GetInt());
-      break;
-    case DexFile::kDexAnnotationLong:
-      fprintf(out_file_, "%" PRId64, data->GetLong());
-      break;
-    case DexFile::kDexAnnotationFloat: {
-      fprintf(out_file_, "%g", data->GetFloat());
-      break;
-    }
-    case DexFile::kDexAnnotationDouble: {
-      fprintf(out_file_, "%g", data->GetDouble());
-      break;
-    }
-    case DexFile::kDexAnnotationString: {
-      dex_ir::StringId* string_id = data->GetStringId();
-      if (options_.output_format_ == kOutputPlain) {
-        DumpEscapedString(string_id->Data());
-      } else {
-        DumpXmlAttribute(string_id->Data());
-      }
-      break;
-    }
-    case DexFile::kDexAnnotationType: {
-      dex_ir::TypeId* type_id = data->GetTypeId();
-      fputs(type_id->GetStringId()->Data(), out_file_);
-      break;
-    }
-    case DexFile::kDexAnnotationField:
-    case DexFile::kDexAnnotationEnum: {
-      dex_ir::FieldId* field_id = data->GetFieldId();
-      fputs(field_id->Name()->Data(), out_file_);
-      break;
-    }
-    case DexFile::kDexAnnotationMethod: {
-      dex_ir::MethodId* method_id = data->GetMethodId();
-      fputs(method_id->Name()->Data(), out_file_);
-      break;
-    }
-    case DexFile::kDexAnnotationArray: {
-      fputc('{', out_file_);
-      // Display all elements.
-      for (auto& value : *data->GetEncodedArray()->GetEncodedValues()) {
-        fputc(' ', out_file_);
-        DumpEncodedValue(value.get());
-      }
-      fputs(" }", out_file_);
-      break;
-    }
-    case DexFile::kDexAnnotationAnnotation: {
-      DumpEncodedAnnotation(data->GetEncodedAnnotation());
-      break;
-    }
-    case DexFile::kDexAnnotationNull:
-      fputs("null", out_file_);
-      break;
-    case DexFile::kDexAnnotationBoolean:
-      fputs(StrBool(data->GetBoolean()), out_file_);
-      break;
-    default:
-      fputs("????", out_file_);
-      break;
-  }  // switch
-}
-
-/*
- * Dumps the file header.
- */
-static void DumpFileHeader(dex_ir::Header* header) {
-  char sanitized[8 * 2 + 1];
-  dex_ir::Collections& collections = header->GetCollections();
-  fprintf(out_file_, "DEX file header:\n");
-  Asciify(sanitized, header->Magic(), 8);
-  fprintf(out_file_, "magic               : '%s'\n", sanitized);
-  fprintf(out_file_, "checksum            : %08x\n", header->Checksum());
-  fprintf(out_file_, "signature           : %02x%02x...%02x%02x\n",
-          header->Signature()[0], header->Signature()[1],
-          header->Signature()[DexFile::kSha1DigestSize - 2],
-          header->Signature()[DexFile::kSha1DigestSize - 1]);
-  fprintf(out_file_, "file_size           : %d\n", header->FileSize());
-  fprintf(out_file_, "header_size         : %d\n", header->HeaderSize());
-  fprintf(out_file_, "link_size           : %d\n", header->LinkSize());
-  fprintf(out_file_, "link_off            : %d (0x%06x)\n",
-          header->LinkOffset(), header->LinkOffset());
-  fprintf(out_file_, "string_ids_size     : %d\n", collections.StringIdsSize());
-  fprintf(out_file_, "string_ids_off      : %d (0x%06x)\n",
-          collections.StringIdsOffset(), collections.StringIdsOffset());
-  fprintf(out_file_, "type_ids_size       : %d\n", collections.TypeIdsSize());
-  fprintf(out_file_, "type_ids_off        : %d (0x%06x)\n",
-          collections.TypeIdsOffset(), collections.TypeIdsOffset());
-  fprintf(out_file_, "proto_ids_size      : %d\n", collections.ProtoIdsSize());
-  fprintf(out_file_, "proto_ids_off       : %d (0x%06x)\n",
-          collections.ProtoIdsOffset(), collections.ProtoIdsOffset());
-  fprintf(out_file_, "field_ids_size      : %d\n", collections.FieldIdsSize());
-  fprintf(out_file_, "field_ids_off       : %d (0x%06x)\n",
-          collections.FieldIdsOffset(), collections.FieldIdsOffset());
-  fprintf(out_file_, "method_ids_size     : %d\n", collections.MethodIdsSize());
-  fprintf(out_file_, "method_ids_off      : %d (0x%06x)\n",
-          collections.MethodIdsOffset(), collections.MethodIdsOffset());
-  fprintf(out_file_, "class_defs_size     : %d\n", collections.ClassDefsSize());
-  fprintf(out_file_, "class_defs_off      : %d (0x%06x)\n",
-          collections.ClassDefsOffset(), collections.ClassDefsOffset());
-  fprintf(out_file_, "data_size           : %d\n", header->DataSize());
-  fprintf(out_file_, "data_off            : %d (0x%06x)\n\n",
-          header->DataOffset(), header->DataOffset());
-}
-
-/*
- * Dumps a class_def_item.
- */
-static void DumpClassDef(dex_ir::Header* header, int idx) {
-  // General class information.
-  dex_ir::ClassDef* class_def = header->GetCollections().GetClassDef(idx);
-  fprintf(out_file_, "Class #%d header:\n", idx);
-  fprintf(out_file_, "class_idx           : %d\n", class_def->ClassType()->GetIndex());
-  fprintf(out_file_, "access_flags        : %d (0x%04x)\n",
-          class_def->GetAccessFlags(), class_def->GetAccessFlags());
-  uint32_t superclass_idx =  class_def->Superclass() == nullptr ?
-      DexFile::kDexNoIndex16 : class_def->Superclass()->GetIndex();
-  fprintf(out_file_, "superclass_idx      : %d\n", superclass_idx);
-  fprintf(out_file_, "interfaces_off      : %d (0x%06x)\n",
-          class_def->InterfacesOffset(), class_def->InterfacesOffset());
-  uint32_t source_file_offset = 0xffffffffU;
-  if (class_def->SourceFile() != nullptr) {
-    source_file_offset = class_def->SourceFile()->GetIndex();
-  }
-  fprintf(out_file_, "source_file_idx     : %d\n", source_file_offset);
-  uint32_t annotations_offset = 0;
-  if (class_def->Annotations() != nullptr) {
-    annotations_offset = class_def->Annotations()->GetOffset();
-  }
-  fprintf(out_file_, "annotations_off     : %d (0x%06x)\n",
-          annotations_offset, annotations_offset);
-  if (class_def->GetClassData() == nullptr) {
-    fprintf(out_file_, "class_data_off      : %d (0x%06x)\n", 0, 0);
-  } else {
-    fprintf(out_file_, "class_data_off      : %d (0x%06x)\n",
-            class_def->GetClassData()->GetOffset(), class_def->GetClassData()->GetOffset());
-  }
-
-  // Fields and methods.
-  dex_ir::ClassData* class_data = class_def->GetClassData();
-  if (class_data != nullptr && class_data->StaticFields() != nullptr) {
-    fprintf(out_file_, "static_fields_size  : %zu\n", class_data->StaticFields()->size());
-  } else {
-    fprintf(out_file_, "static_fields_size  : 0\n");
-  }
-  if (class_data != nullptr && class_data->InstanceFields() != nullptr) {
-    fprintf(out_file_, "instance_fields_size: %zu\n", class_data->InstanceFields()->size());
-  } else {
-    fprintf(out_file_, "instance_fields_size: 0\n");
-  }
-  if (class_data != nullptr && class_data->DirectMethods() != nullptr) {
-    fprintf(out_file_, "direct_methods_size : %zu\n", class_data->DirectMethods()->size());
-  } else {
-    fprintf(out_file_, "direct_methods_size : 0\n");
-  }
-  if (class_data != nullptr && class_data->VirtualMethods() != nullptr) {
-    fprintf(out_file_, "virtual_methods_size: %zu\n", class_data->VirtualMethods()->size());
-  } else {
-    fprintf(out_file_, "virtual_methods_size: 0\n");
-  }
-  fprintf(out_file_, "\n");
-}
-
-/**
- * Dumps an annotation set item.
- */
-static void DumpAnnotationSetItem(dex_ir::AnnotationSetItem* set_item) {
-  if (set_item == nullptr || set_item->GetItems()->size() == 0) {
-    fputs("  empty-annotation-set\n", out_file_);
-    return;
-  }
-  for (dex_ir::AnnotationItem* annotation : *set_item->GetItems()) {
-    if (annotation == nullptr) {
-      continue;
-    }
-    fputs("  ", out_file_);
-    switch (annotation->GetVisibility()) {
-      case DexFile::kDexVisibilityBuild:   fputs("VISIBILITY_BUILD ",   out_file_); break;
-      case DexFile::kDexVisibilityRuntime: fputs("VISIBILITY_RUNTIME ", out_file_); break;
-      case DexFile::kDexVisibilitySystem:  fputs("VISIBILITY_SYSTEM ",  out_file_); break;
-      default:                             fputs("VISIBILITY_UNKNOWN ", out_file_); break;
-    }  // switch
-    DumpEncodedAnnotation(annotation->GetAnnotation());
-    fputc('\n', out_file_);
-  }
-}
-
-/*
- * Dumps class annotations.
- */
-static void DumpClassAnnotations(dex_ir::Header* header, int idx) {
-  dex_ir::ClassDef* class_def = header->GetCollections().GetClassDef(idx);
-  dex_ir::AnnotationsDirectoryItem* annotations_directory = class_def->Annotations();
-  if (annotations_directory == nullptr) {
-    return;  // none
-  }
-
-  fprintf(out_file_, "Class #%d annotations:\n", idx);
-
-  dex_ir::AnnotationSetItem* class_set_item = annotations_directory->GetClassAnnotation();
-  dex_ir::FieldAnnotationVector* fields = annotations_directory->GetFieldAnnotations();
-  dex_ir::MethodAnnotationVector* methods = annotations_directory->GetMethodAnnotations();
-  dex_ir::ParameterAnnotationVector* parameters = annotations_directory->GetParameterAnnotations();
-
-  // Annotations on the class itself.
-  if (class_set_item != nullptr) {
-    fprintf(out_file_, "Annotations on class\n");
-    DumpAnnotationSetItem(class_set_item);
-  }
-
-  // Annotations on fields.
-  if (fields != nullptr) {
-    for (auto& field : *fields) {
-      const dex_ir::FieldId* field_id = field->GetFieldId();
-      const uint32_t field_idx = field_id->GetIndex();
-      const char* field_name = field_id->Name()->Data();
-      fprintf(out_file_, "Annotations on field #%u '%s'\n", field_idx, field_name);
-      DumpAnnotationSetItem(field->GetAnnotationSetItem());
-    }
-  }
-
-  // Annotations on methods.
-  if (methods != nullptr) {
-    for (auto& method : *methods) {
-      const dex_ir::MethodId* method_id = method->GetMethodId();
-      const uint32_t method_idx = method_id->GetIndex();
-      const char* method_name = method_id->Name()->Data();
-      fprintf(out_file_, "Annotations on method #%u '%s'\n", method_idx, method_name);
-      DumpAnnotationSetItem(method->GetAnnotationSetItem());
-    }
-  }
-
-  // Annotations on method parameters.
-  if (parameters != nullptr) {
-    for (auto& parameter : *parameters) {
-      const dex_ir::MethodId* method_id = parameter->GetMethodId();
-      const uint32_t method_idx = method_id->GetIndex();
-      const char* method_name = method_id->Name()->Data();
-      fprintf(out_file_, "Annotations on method #%u '%s' parameters\n", method_idx, method_name);
-      uint32_t j = 0;
-      for (dex_ir::AnnotationSetItem* annotation : *parameter->GetAnnotations()->GetItems()) {
-        fprintf(out_file_, "#%u\n", j);
-        DumpAnnotationSetItem(annotation);
-        ++j;
-      }
-    }
-  }
-
-  fputc('\n', out_file_);
-}
-
-/*
- * Dumps an interface that a class declares to implement.
- */
-static void DumpInterface(const dex_ir::TypeId* type_item, int i) {
-  const char* interface_name = type_item->GetStringId()->Data();
-  if (options_.output_format_ == kOutputPlain) {
-    fprintf(out_file_, "    #%d              : '%s'\n", i, interface_name);
-  } else {
-    std::string dot(DescriptorToDotWrapper(interface_name));
-    fprintf(out_file_, "<implements name=\"%s\">\n</implements>\n", dot.c_str());
-  }
-}
-
-/*
- * Dumps the catches table associated with the code.
- */
-static void DumpCatches(const dex_ir::CodeItem* code) {
-  const uint16_t tries_size = code->TriesSize();
-
-  // No catch table.
-  if (tries_size == 0) {
-    fprintf(out_file_, "      catches       : (none)\n");
-    return;
-  }
-
-  // Dump all table entries.
-  fprintf(out_file_, "      catches       : %d\n", tries_size);
-  std::vector<std::unique_ptr<const dex_ir::TryItem>>* tries = code->Tries();
-  for (uint32_t i = 0; i < tries_size; i++) {
-    const dex_ir::TryItem* try_item = (*tries)[i].get();
-    const uint32_t start = try_item->StartAddr();
-    const uint32_t end = start + try_item->InsnCount();
-    fprintf(out_file_, "        0x%04x - 0x%04x\n", start, end);
-    for (auto& handler : *try_item->GetHandlers()->GetHandlers()) {
-      const dex_ir::TypeId* type_id = handler->GetTypeId();
-      const char* descriptor = (type_id == nullptr) ? "<any>" : type_id->GetStringId()->Data();
-      fprintf(out_file_, "          %s -> 0x%04x\n", descriptor, handler->GetAddress());
-    }  // for
-  }  // for
-}
-
-/*
- * Dumps all positions table entries associated with the code.
- */
-static void DumpPositionInfo(const dex_ir::CodeItem* code) {
-  dex_ir::DebugInfoItem* debug_info = code->DebugInfo();
-  if (debug_info == nullptr) {
-    return;
-  }
-  std::vector<std::unique_ptr<dex_ir::PositionInfo>>& positions = debug_info->GetPositionInfo();
-  for (size_t i = 0; i < positions.size(); ++i) {
-    fprintf(out_file_, "        0x%04x line=%d\n", positions[i]->address_, positions[i]->line_);
-  }
-}
-
-/*
- * Dumps all locals table entries associated with the code.
- */
-static void DumpLocalInfo(const dex_ir::CodeItem* code) {
-  dex_ir::DebugInfoItem* debug_info = code->DebugInfo();
-  if (debug_info == nullptr) {
-    return;
-  }
-  std::vector<std::unique_ptr<dex_ir::LocalInfo>>& locals = debug_info->GetLocalInfo();
-  for (size_t i = 0; i < locals.size(); ++i) {
-    dex_ir::LocalInfo* entry = locals[i].get();
-    fprintf(out_file_, "        0x%04x - 0x%04x reg=%d %s %s %s\n",
-            entry->start_address_, entry->end_address_, entry->reg_,
-            entry->name_.c_str(), entry->descriptor_.c_str(), entry->signature_.c_str());
-  }
-}
-
 /*
  * Helper for dumpInstruction(), which builds the string
  * representation for the index in the given instruction.
@@ -723,11 +354,10 @@
 static std::unique_ptr<char[]> IndexString(dex_ir::Header* header,
                                            const Instruction* dec_insn,
                                            size_t buf_size) {
-  static const uint32_t kInvalidIndex = std::numeric_limits<uint32_t>::max();
   std::unique_ptr<char[]> buf(new char[buf_size]);
   // Determine index and width of the string.
   uint32_t index = 0;
-  uint32_t secondary_index = kInvalidIndex;
+  uint32_t secondary_index = DexFile::kDexNoIndex;
   uint32_t width = 4;
   switch (Instruction::FormatOf(dec_insn->Opcode())) {
     // SOME NOT SUPPORTED:
@@ -756,7 +386,6 @@
       index = dec_insn->VRegB();
       secondary_index = dec_insn->VRegH();
       width = 4;
-      break;
     default:
       break;
   }  // switch
@@ -821,9 +450,6 @@
     case Instruction::kIndexFieldOffset:
       outSize = snprintf(buf.get(), buf_size, "[obj+%0*x]", width, index);
       break;
-    // SOME NOT SUPPORTED:
-    // case Instruction::kIndexVaries:
-    // case Instruction::kIndexInlineMethod:
     case Instruction::kIndexMethodAndProtoRef: {
       std::string method("<method?>");
       std::string proto("<proto?>");
@@ -840,8 +466,11 @@
       }
       outSize = snprintf(buf.get(), buf_size, "%s, %s // method@%0*x, proto@%0*x",
                          method.c_str(), proto.c_str(), width, index, width, secondary_index);
-      }
-      break;
+    }
+    break;
+    // SOME NOT SUPPORTED:
+    // case Instruction::kIndexVaries:
+    // case Instruction::kIndexInlineMethod:
     default:
       outSize = snprintf(buf.get(), buf_size, "<?>");
       break;
@@ -858,11 +487,365 @@
 }
 
 /*
+ * Dumps encoded annotation.
+ */
+void DexLayout::DumpEncodedAnnotation(dex_ir::EncodedAnnotation* annotation) {
+  fputs(annotation->GetType()->GetStringId()->Data(), out_file_);
+  // Display all name=value pairs.
+  for (auto& subannotation : *annotation->GetAnnotationElements()) {
+    fputc(' ', out_file_);
+    fputs(subannotation->GetName()->Data(), out_file_);
+    fputc('=', out_file_);
+    DumpEncodedValue(subannotation->GetValue());
+  }
+}
+/*
+ * Dumps encoded value.
+ */
+void DexLayout::DumpEncodedValue(const dex_ir::EncodedValue* data) {
+  switch (data->Type()) {
+    case DexFile::kDexAnnotationByte:
+      fprintf(out_file_, "%" PRId8, data->GetByte());
+      break;
+    case DexFile::kDexAnnotationShort:
+      fprintf(out_file_, "%" PRId16, data->GetShort());
+      break;
+    case DexFile::kDexAnnotationChar:
+      fprintf(out_file_, "%" PRIu16, data->GetChar());
+      break;
+    case DexFile::kDexAnnotationInt:
+      fprintf(out_file_, "%" PRId32, data->GetInt());
+      break;
+    case DexFile::kDexAnnotationLong:
+      fprintf(out_file_, "%" PRId64, data->GetLong());
+      break;
+    case DexFile::kDexAnnotationFloat: {
+      fprintf(out_file_, "%g", data->GetFloat());
+      break;
+    }
+    case DexFile::kDexAnnotationDouble: {
+      fprintf(out_file_, "%g", data->GetDouble());
+      break;
+    }
+    case DexFile::kDexAnnotationString: {
+      dex_ir::StringId* string_id = data->GetStringId();
+      if (options_.output_format_ == kOutputPlain) {
+        DumpEscapedString(string_id->Data(), out_file_);
+      } else {
+        DumpXmlAttribute(string_id->Data(), out_file_);
+      }
+      break;
+    }
+    case DexFile::kDexAnnotationType: {
+      dex_ir::TypeId* type_id = data->GetTypeId();
+      fputs(type_id->GetStringId()->Data(), out_file_);
+      break;
+    }
+    case DexFile::kDexAnnotationField:
+    case DexFile::kDexAnnotationEnum: {
+      dex_ir::FieldId* field_id = data->GetFieldId();
+      fputs(field_id->Name()->Data(), out_file_);
+      break;
+    }
+    case DexFile::kDexAnnotationMethod: {
+      dex_ir::MethodId* method_id = data->GetMethodId();
+      fputs(method_id->Name()->Data(), out_file_);
+      break;
+    }
+    case DexFile::kDexAnnotationArray: {
+      fputc('{', out_file_);
+      // Display all elements.
+      for (auto& value : *data->GetEncodedArray()->GetEncodedValues()) {
+        fputc(' ', out_file_);
+        DumpEncodedValue(value.get());
+      }
+      fputs(" }", out_file_);
+      break;
+    }
+    case DexFile::kDexAnnotationAnnotation: {
+      DumpEncodedAnnotation(data->GetEncodedAnnotation());
+      break;
+    }
+    case DexFile::kDexAnnotationNull:
+      fputs("null", out_file_);
+      break;
+    case DexFile::kDexAnnotationBoolean:
+      fputs(StrBool(data->GetBoolean()), out_file_);
+      break;
+    default:
+      fputs("????", out_file_);
+      break;
+  }  // switch
+}
+
+/*
+ * Dumps the file header.
+ */
+void DexLayout::DumpFileHeader() {
+  char sanitized[8 * 2 + 1];
+  dex_ir::Collections& collections = header_->GetCollections();
+  fprintf(out_file_, "DEX file header:\n");
+  Asciify(sanitized, header_->Magic(), 8);
+  fprintf(out_file_, "magic               : '%s'\n", sanitized);
+  fprintf(out_file_, "checksum            : %08x\n", header_->Checksum());
+  fprintf(out_file_, "signature           : %02x%02x...%02x%02x\n",
+          header_->Signature()[0], header_->Signature()[1],
+          header_->Signature()[DexFile::kSha1DigestSize - 2],
+          header_->Signature()[DexFile::kSha1DigestSize - 1]);
+  fprintf(out_file_, "file_size           : %d\n", header_->FileSize());
+  fprintf(out_file_, "header_size         : %d\n", header_->HeaderSize());
+  fprintf(out_file_, "link_size           : %d\n", header_->LinkSize());
+  fprintf(out_file_, "link_off            : %d (0x%06x)\n",
+          header_->LinkOffset(), header_->LinkOffset());
+  fprintf(out_file_, "string_ids_size     : %d\n", collections.StringIdsSize());
+  fprintf(out_file_, "string_ids_off      : %d (0x%06x)\n",
+          collections.StringIdsOffset(), collections.StringIdsOffset());
+  fprintf(out_file_, "type_ids_size       : %d\n", collections.TypeIdsSize());
+  fprintf(out_file_, "type_ids_off        : %d (0x%06x)\n",
+          collections.TypeIdsOffset(), collections.TypeIdsOffset());
+  fprintf(out_file_, "proto_ids_size      : %d\n", collections.ProtoIdsSize());
+  fprintf(out_file_, "proto_ids_off       : %d (0x%06x)\n",
+          collections.ProtoIdsOffset(), collections.ProtoIdsOffset());
+  fprintf(out_file_, "field_ids_size      : %d\n", collections.FieldIdsSize());
+  fprintf(out_file_, "field_ids_off       : %d (0x%06x)\n",
+          collections.FieldIdsOffset(), collections.FieldIdsOffset());
+  fprintf(out_file_, "method_ids_size     : %d\n", collections.MethodIdsSize());
+  fprintf(out_file_, "method_ids_off      : %d (0x%06x)\n",
+          collections.MethodIdsOffset(), collections.MethodIdsOffset());
+  fprintf(out_file_, "class_defs_size     : %d\n", collections.ClassDefsSize());
+  fprintf(out_file_, "class_defs_off      : %d (0x%06x)\n",
+          collections.ClassDefsOffset(), collections.ClassDefsOffset());
+  fprintf(out_file_, "data_size           : %d\n", header_->DataSize());
+  fprintf(out_file_, "data_off            : %d (0x%06x)\n\n",
+          header_->DataOffset(), header_->DataOffset());
+}
+
+/*
+ * Dumps a class_def_item.
+ */
+void DexLayout::DumpClassDef(int idx) {
+  // General class information.
+  dex_ir::ClassDef* class_def = header_->GetCollections().GetClassDef(idx);
+  fprintf(out_file_, "Class #%d header:\n", idx);
+  fprintf(out_file_, "class_idx           : %d\n", class_def->ClassType()->GetIndex());
+  fprintf(out_file_, "access_flags        : %d (0x%04x)\n",
+          class_def->GetAccessFlags(), class_def->GetAccessFlags());
+  uint32_t superclass_idx =  class_def->Superclass() == nullptr ?
+      DexFile::kDexNoIndex16 : class_def->Superclass()->GetIndex();
+  fprintf(out_file_, "superclass_idx      : %d\n", superclass_idx);
+  fprintf(out_file_, "interfaces_off      : %d (0x%06x)\n",
+          class_def->InterfacesOffset(), class_def->InterfacesOffset());
+  uint32_t source_file_offset = 0xffffffffU;
+  if (class_def->SourceFile() != nullptr) {
+    source_file_offset = class_def->SourceFile()->GetIndex();
+  }
+  fprintf(out_file_, "source_file_idx     : %d\n", source_file_offset);
+  uint32_t annotations_offset = 0;
+  if (class_def->Annotations() != nullptr) {
+    annotations_offset = class_def->Annotations()->GetOffset();
+  }
+  fprintf(out_file_, "annotations_off     : %d (0x%06x)\n",
+          annotations_offset, annotations_offset);
+  if (class_def->GetClassData() == nullptr) {
+    fprintf(out_file_, "class_data_off      : %d (0x%06x)\n", 0, 0);
+  } else {
+    fprintf(out_file_, "class_data_off      : %d (0x%06x)\n",
+            class_def->GetClassData()->GetOffset(), class_def->GetClassData()->GetOffset());
+  }
+
+  // Fields and methods.
+  dex_ir::ClassData* class_data = class_def->GetClassData();
+  if (class_data != nullptr && class_data->StaticFields() != nullptr) {
+    fprintf(out_file_, "static_fields_size  : %zu\n", class_data->StaticFields()->size());
+  } else {
+    fprintf(out_file_, "static_fields_size  : 0\n");
+  }
+  if (class_data != nullptr && class_data->InstanceFields() != nullptr) {
+    fprintf(out_file_, "instance_fields_size: %zu\n", class_data->InstanceFields()->size());
+  } else {
+    fprintf(out_file_, "instance_fields_size: 0\n");
+  }
+  if (class_data != nullptr && class_data->DirectMethods() != nullptr) {
+    fprintf(out_file_, "direct_methods_size : %zu\n", class_data->DirectMethods()->size());
+  } else {
+    fprintf(out_file_, "direct_methods_size : 0\n");
+  }
+  if (class_data != nullptr && class_data->VirtualMethods() != nullptr) {
+    fprintf(out_file_, "virtual_methods_size: %zu\n", class_data->VirtualMethods()->size());
+  } else {
+    fprintf(out_file_, "virtual_methods_size: 0\n");
+  }
+  fprintf(out_file_, "\n");
+}
+
+/**
+ * Dumps an annotation set item.
+ */
+void DexLayout::DumpAnnotationSetItem(dex_ir::AnnotationSetItem* set_item) {
+  if (set_item == nullptr || set_item->GetItems()->size() == 0) {
+    fputs("  empty-annotation-set\n", out_file_);
+    return;
+  }
+  for (dex_ir::AnnotationItem* annotation : *set_item->GetItems()) {
+    if (annotation == nullptr) {
+      continue;
+    }
+    fputs("  ", out_file_);
+    switch (annotation->GetVisibility()) {
+      case DexFile::kDexVisibilityBuild:   fputs("VISIBILITY_BUILD ",   out_file_); break;
+      case DexFile::kDexVisibilityRuntime: fputs("VISIBILITY_RUNTIME ", out_file_); break;
+      case DexFile::kDexVisibilitySystem:  fputs("VISIBILITY_SYSTEM ",  out_file_); break;
+      default:                             fputs("VISIBILITY_UNKNOWN ", out_file_); break;
+    }  // switch
+    DumpEncodedAnnotation(annotation->GetAnnotation());
+    fputc('\n', out_file_);
+  }
+}
+
+/*
+ * Dumps class annotations.
+ */
+void DexLayout::DumpClassAnnotations(int idx) {
+  dex_ir::ClassDef* class_def = header_->GetCollections().GetClassDef(idx);
+  dex_ir::AnnotationsDirectoryItem* annotations_directory = class_def->Annotations();
+  if (annotations_directory == nullptr) {
+    return;  // none
+  }
+
+  fprintf(out_file_, "Class #%d annotations:\n", idx);
+
+  dex_ir::AnnotationSetItem* class_set_item = annotations_directory->GetClassAnnotation();
+  dex_ir::FieldAnnotationVector* fields = annotations_directory->GetFieldAnnotations();
+  dex_ir::MethodAnnotationVector* methods = annotations_directory->GetMethodAnnotations();
+  dex_ir::ParameterAnnotationVector* parameters = annotations_directory->GetParameterAnnotations();
+
+  // Annotations on the class itself.
+  if (class_set_item != nullptr) {
+    fprintf(out_file_, "Annotations on class\n");
+    DumpAnnotationSetItem(class_set_item);
+  }
+
+  // Annotations on fields.
+  if (fields != nullptr) {
+    for (auto& field : *fields) {
+      const dex_ir::FieldId* field_id = field->GetFieldId();
+      const uint32_t field_idx = field_id->GetIndex();
+      const char* field_name = field_id->Name()->Data();
+      fprintf(out_file_, "Annotations on field #%u '%s'\n", field_idx, field_name);
+      DumpAnnotationSetItem(field->GetAnnotationSetItem());
+    }
+  }
+
+  // Annotations on methods.
+  if (methods != nullptr) {
+    for (auto& method : *methods) {
+      const dex_ir::MethodId* method_id = method->GetMethodId();
+      const uint32_t method_idx = method_id->GetIndex();
+      const char* method_name = method_id->Name()->Data();
+      fprintf(out_file_, "Annotations on method #%u '%s'\n", method_idx, method_name);
+      DumpAnnotationSetItem(method->GetAnnotationSetItem());
+    }
+  }
+
+  // Annotations on method parameters.
+  if (parameters != nullptr) {
+    for (auto& parameter : *parameters) {
+      const dex_ir::MethodId* method_id = parameter->GetMethodId();
+      const uint32_t method_idx = method_id->GetIndex();
+      const char* method_name = method_id->Name()->Data();
+      fprintf(out_file_, "Annotations on method #%u '%s' parameters\n", method_idx, method_name);
+      uint32_t j = 0;
+      for (dex_ir::AnnotationSetItem* annotation : *parameter->GetAnnotations()->GetItems()) {
+        fprintf(out_file_, "#%u\n", j);
+        DumpAnnotationSetItem(annotation);
+        ++j;
+      }
+    }
+  }
+
+  fputc('\n', out_file_);
+}
+
+/*
+ * Dumps an interface that a class declares to implement.
+ */
+void DexLayout::DumpInterface(const dex_ir::TypeId* type_item, int i) {
+  const char* interface_name = type_item->GetStringId()->Data();
+  if (options_.output_format_ == kOutputPlain) {
+    fprintf(out_file_, "    #%d              : '%s'\n", i, interface_name);
+  } else {
+    std::string dot(DescriptorToDotWrapper(interface_name));
+    fprintf(out_file_, "<implements name=\"%s\">\n</implements>\n", dot.c_str());
+  }
+}
+
+/*
+ * Dumps the catches table associated with the code.
+ */
+void DexLayout::DumpCatches(const dex_ir::CodeItem* code) {
+  const uint16_t tries_size = code->TriesSize();
+
+  // No catch table.
+  if (tries_size == 0) {
+    fprintf(out_file_, "      catches       : (none)\n");
+    return;
+  }
+
+  // Dump all table entries.
+  fprintf(out_file_, "      catches       : %d\n", tries_size);
+  std::vector<std::unique_ptr<const dex_ir::TryItem>>* tries = code->Tries();
+  for (uint32_t i = 0; i < tries_size; i++) {
+    const dex_ir::TryItem* try_item = (*tries)[i].get();
+    const uint32_t start = try_item->StartAddr();
+    const uint32_t end = start + try_item->InsnCount();
+    fprintf(out_file_, "        0x%04x - 0x%04x\n", start, end);
+    for (auto& handler : *try_item->GetHandlers()->GetHandlers()) {
+      const dex_ir::TypeId* type_id = handler->GetTypeId();
+      const char* descriptor = (type_id == nullptr) ? "<any>" : type_id->GetStringId()->Data();
+      fprintf(out_file_, "          %s -> 0x%04x\n", descriptor, handler->GetAddress());
+    }  // for
+  }  // for
+}
+
+/*
+ * Dumps all positions table entries associated with the code.
+ */
+void DexLayout::DumpPositionInfo(const dex_ir::CodeItem* code) {
+  dex_ir::DebugInfoItem* debug_info = code->DebugInfo();
+  if (debug_info == nullptr) {
+    return;
+  }
+  std::vector<std::unique_ptr<dex_ir::PositionInfo>>& positions = debug_info->GetPositionInfo();
+  for (size_t i = 0; i < positions.size(); ++i) {
+    fprintf(out_file_, "        0x%04x line=%d\n", positions[i]->address_, positions[i]->line_);
+  }
+}
+
+/*
+ * Dumps all locals table entries associated with the code.
+ */
+void DexLayout::DumpLocalInfo(const dex_ir::CodeItem* code) {
+  dex_ir::DebugInfoItem* debug_info = code->DebugInfo();
+  if (debug_info == nullptr) {
+    return;
+  }
+  std::vector<std::unique_ptr<dex_ir::LocalInfo>>& locals = debug_info->GetLocalInfo();
+  for (size_t i = 0; i < locals.size(); ++i) {
+    dex_ir::LocalInfo* entry = locals[i].get();
+    fprintf(out_file_, "        0x%04x - 0x%04x reg=%d %s %s %s\n",
+            entry->start_address_, entry->end_address_, entry->reg_,
+            entry->name_.c_str(), entry->descriptor_.c_str(), entry->signature_.c_str());
+  }
+}
+
+/*
  * Dumps a single instruction.
  */
-static void DumpInstruction(dex_ir::Header* header, const dex_ir::CodeItem* code,
-                            uint32_t code_offset, uint32_t insn_idx, uint32_t insn_width,
-                            const Instruction* dec_insn) {
+void DexLayout::DumpInstruction(const dex_ir::CodeItem* code,
+                                uint32_t code_offset,
+                                uint32_t insn_idx,
+                                uint32_t insn_width,
+                                const Instruction* dec_insn) {
   // Address of instruction (expressed as byte offset).
   fprintf(out_file_, "%06x:", code_offset + 0x10 + insn_idx * 2);
 
@@ -901,7 +884,7 @@
   // Set up additional argument.
   std::unique_ptr<char[]> index_buf;
   if (Instruction::IndexTypeOf(dec_insn->Opcode()) != Instruction::kIndexNone) {
-    index_buf = IndexString(header, dec_insn, 200);
+    index_buf = IndexString(header_, dec_insn, 200);
   }
 
   // Dump the instruction.
@@ -1073,9 +1056,8 @@
 /*
  * Dumps a bytecode disassembly.
  */
-static void DumpBytecodes(dex_ir::Header* header, uint32_t idx,
-                          const dex_ir::CodeItem* code, uint32_t code_offset) {
-  dex_ir::MethodId* method_id = header->GetCollections().GetMethodId(idx);
+void DexLayout::DumpBytecodes(uint32_t idx, const dex_ir::CodeItem* code, uint32_t code_offset) {
+  dex_ir::MethodId* method_id = header_->GetCollections().GetMethodId(idx);
   const char* name = method_id->Name()->Data();
   std::string type_descriptor = GetSignatureForProtoId(method_id->Proto());
   const char* back_descriptor = method_id->Class()->GetStringId()->Data();
@@ -1094,7 +1076,7 @@
       fprintf(stderr, "GLITCH: zero-width instruction at idx=0x%04x\n", insn_idx);
       break;
     }
-    DumpInstruction(header, code, code_offset, insn_idx, insn_width, instruction);
+    DumpInstruction(code, code_offset, insn_idx, insn_width, instruction);
     insn_idx += insn_width;
   }  // for
 }
@@ -1102,8 +1084,7 @@
 /*
  * Dumps code of a method.
  */
-static void DumpCode(dex_ir::Header* header, uint32_t idx, const dex_ir::CodeItem* code,
-                     uint32_t code_offset) {
+void DexLayout::DumpCode(uint32_t idx, const dex_ir::CodeItem* code, uint32_t code_offset) {
   fprintf(out_file_, "      registers     : %d\n", code->RegistersSize());
   fprintf(out_file_, "      ins           : %d\n", code->InsSize());
   fprintf(out_file_, "      outs          : %d\n", code->OutsSize());
@@ -1112,7 +1093,7 @@
 
   // Bytecode disassembly, if requested.
   if (options_.disassemble_) {
-    DumpBytecodes(header, idx, code, code_offset);
+    DumpBytecodes(idx, code, code_offset);
   }
 
   // Try-catch blocks.
@@ -1128,14 +1109,13 @@
 /*
  * Dumps a method.
  */
-static void DumpMethod(dex_ir::Header* header, uint32_t idx, uint32_t flags,
-                       const dex_ir::CodeItem* code, int i) {
+void DexLayout::DumpMethod(uint32_t idx, uint32_t flags, const dex_ir::CodeItem* code, int i) {
   // Bail for anything private if export only requested.
   if (options_.exports_only_ && (flags & (kAccPublic | kAccProtected)) == 0) {
     return;
   }
 
-  dex_ir::MethodId* method_id = header->GetCollections().GetMethodId(idx);
+  dex_ir::MethodId* method_id = header_->GetCollections().GetMethodId(idx);
   const char* name = method_id->Name()->Data();
   char* type_descriptor = strdup(GetSignatureForProtoId(method_id->Proto()).c_str());
   const char* back_descriptor = method_id->Class()->GetStringId()->Data();
@@ -1150,7 +1130,7 @@
       fprintf(out_file_, "      code          : (none)\n");
     } else {
       fprintf(out_file_, "      code          -\n");
-      DumpCode(header, idx, code, code->GetOffset());
+      DumpCode(idx, code, code->GetOffset());
     }
     if (options_.disassemble_) {
       fputc('\n', out_file_);
@@ -1233,14 +1213,13 @@
 /*
  * Dumps a static (class) field.
  */
-static void DumpSField(dex_ir::Header* header, uint32_t idx, uint32_t flags,
-                       int i, dex_ir::EncodedValue* init) {
+void DexLayout::DumpSField(uint32_t idx, uint32_t flags, int i, dex_ir::EncodedValue* init) {
   // Bail for anything private if export only requested.
   if (options_.exports_only_ && (flags & (kAccPublic | kAccProtected)) == 0) {
     return;
   }
 
-  dex_ir::FieldId* field_id = header->GetCollections().GetFieldId(idx);
+  dex_ir::FieldId* field_id = header_->GetCollections().GetFieldId(idx);
   const char* name = field_id->Name()->Data();
   const char* type_descriptor = field_id->Type()->GetStringId()->Data();
   const char* back_descriptor = field_id->Class()->GetStringId()->Data();
@@ -1281,8 +1260,8 @@
 /*
  * Dumps an instance field.
  */
-static void DumpIField(dex_ir::Header* header, uint32_t idx, uint32_t flags, int i) {
-  DumpSField(header, idx, flags, i, nullptr);
+void DexLayout::DumpIField(uint32_t idx, uint32_t flags, int i) {
+  DumpSField(idx, flags, i, nullptr);
 }
 
 /*
@@ -1293,19 +1272,19 @@
  * If "*last_package" is nullptr or does not match the current class' package,
  * the value will be replaced with a newly-allocated string.
  */
-static void DumpClass(dex_ir::Header* header, int idx, char** last_package) {
-  dex_ir::ClassDef* class_def = header->GetCollections().GetClassDef(idx);
+void DexLayout::DumpClass(int idx, char** last_package) {
+  dex_ir::ClassDef* class_def = header_->GetCollections().GetClassDef(idx);
   // Omitting non-public class.
   if (options_.exports_only_ && (class_def->GetAccessFlags() & kAccPublic) == 0) {
     return;
   }
 
   if (options_.show_section_headers_) {
-    DumpClassDef(header, idx);
+    DumpClassDef(idx);
   }
 
   if (options_.show_annotations_) {
-    DumpClassAnnotations(header, idx);
+    DumpClassAnnotations(idx);
   }
 
   // For the XML output, show the package name.  Ideally we'd gather
@@ -1313,7 +1292,7 @@
   // package name wouldn't jump around, but that's not a great plan
   // for something that needs to run on the device.
   const char* class_descriptor =
-      header->GetCollections().GetClassDef(idx)->ClassType()->GetStringId()->Data();
+      header_->GetCollections().GetClassDef(idx)->ClassType()->GetStringId()->Data();
   if (!(class_descriptor[0] == 'L' &&
         class_descriptor[strlen(class_descriptor)-1] == ';')) {
     // Arrays and primitives should not be defined explicitly. Keep going?
@@ -1406,8 +1385,7 @@
     dex_ir::FieldItemVector* static_fields = class_data->StaticFields();
     if (static_fields != nullptr) {
       for (uint32_t i = 0; i < static_fields->size(); i++) {
-        DumpSField(header,
-                   (*static_fields)[i]->GetFieldId()->GetIndex(),
+        DumpSField((*static_fields)[i]->GetFieldId()->GetIndex(),
                    (*static_fields)[i]->GetAccessFlags(),
                    i,
                    i < encoded_values_size ? (*encoded_values)[i].get() : nullptr);
@@ -1423,8 +1401,7 @@
     dex_ir::FieldItemVector* instance_fields = class_data->InstanceFields();
     if (instance_fields != nullptr) {
       for (uint32_t i = 0; i < instance_fields->size(); i++) {
-        DumpIField(header,
-                   (*instance_fields)[i]->GetFieldId()->GetIndex(),
+        DumpIField((*instance_fields)[i]->GetFieldId()->GetIndex(),
                    (*instance_fields)[i]->GetAccessFlags(),
                    i);
       }  // for
@@ -1439,8 +1416,7 @@
     dex_ir::MethodItemVector* direct_methods = class_data->DirectMethods();
     if (direct_methods != nullptr) {
       for (uint32_t i = 0; i < direct_methods->size(); i++) {
-        DumpMethod(header,
-                   (*direct_methods)[i]->GetMethodId()->GetIndex(),
+        DumpMethod((*direct_methods)[i]->GetMethodId()->GetIndex(),
                    (*direct_methods)[i]->GetAccessFlags(),
                    (*direct_methods)[i]->GetCodeItem(),
                  i);
@@ -1456,8 +1432,7 @@
     dex_ir::MethodItemVector* virtual_methods = class_data->VirtualMethods();
     if (virtual_methods != nullptr) {
       for (uint32_t i = 0; i < virtual_methods->size(); i++) {
-        DumpMethod(header,
-                   (*virtual_methods)[i]->GetMethodId()->GetIndex(),
+        DumpMethod((*virtual_methods)[i]->GetMethodId()->GetIndex(),
                    (*virtual_methods)[i]->GetAccessFlags(),
                    (*virtual_methods)[i]->GetCodeItem(),
                    i);
@@ -1481,24 +1456,10 @@
   free(access_str);
 }
 
-/*
- * Dumps the requested sections of the file.
- */
-static void ProcessDexFile(const char* file_name, const DexFile* dex_file, size_t dex_file_index) {
-  if (options_.verbose_) {
-    fprintf(out_file_, "Opened '%s', DEX version '%.3s'\n",
-            file_name, dex_file->GetHeader().magic_ + 4);
-  }
-  std::unique_ptr<dex_ir::Header> header(dex_ir::DexIrBuilder(*dex_file));
-
-  if (options_.visualize_pattern_) {
-    VisualizeDexLayout(header.get(), dex_file, dex_file_index);
-    return;
-  }
-
+void DexLayout::DumpDexFile() {
   // Headers.
   if (options_.show_file_headers_) {
-    DumpFileHeader(header.get());
+    DumpFileHeader();
   }
 
   // Open XML context.
@@ -1508,9 +1469,9 @@
 
   // Iterate over all classes.
   char* package = nullptr;
-  const uint32_t class_defs_size = header->GetCollections().ClassDefsSize();
+  const uint32_t class_defs_size = header_->GetCollections().ClassDefsSize();
   for (uint32_t i = 0; i < class_defs_size; i++) {
-    DumpClass(header.get(), i, &package);
+    DumpClass(i, &package);
   }  // for
 
   // Free the last package allocated.
@@ -1523,20 +1484,227 @@
   if (options_.output_format_ == kOutputXml) {
     fprintf(out_file_, "</api>\n");
   }
+}
 
-  // Output dex file.
-  if (options_.output_dex_directory_ != nullptr) {
+std::vector<dex_ir::ClassDef*> DexLayout::LayoutClassDefsAndClassData(const DexFile* dex_file) {
+  std::vector<dex_ir::ClassDef*> new_class_def_order;
+  for (std::unique_ptr<dex_ir::ClassDef>& class_def : header_->GetCollections().ClassDefs()) {
+    dex::TypeIndex type_idx(class_def->ClassType()->GetIndex());
+    if (info_->ContainsClass(*dex_file, type_idx)) {
+      new_class_def_order.push_back(class_def.get());
+    }
+  }
+  for (std::unique_ptr<dex_ir::ClassDef>& class_def : header_->GetCollections().ClassDefs()) {
+    dex::TypeIndex type_idx(class_def->ClassType()->GetIndex());
+    if (!info_->ContainsClass(*dex_file, type_idx)) {
+      new_class_def_order.push_back(class_def.get());
+    }
+  }
+  uint32_t class_defs_offset = header_->GetCollections().ClassDefsOffset();
+  uint32_t class_data_offset = header_->GetCollections().ClassDatasOffset();
+  for (uint32_t i = 0; i < new_class_def_order.size(); ++i) {
+    dex_ir::ClassDef* class_def = new_class_def_order[i];
+    class_def->SetIndex(i);
+    class_def->SetOffset(class_defs_offset);
+    class_defs_offset += dex_ir::ClassDef::ItemSize();
+    if (class_def->GetClassData() != nullptr) {
+      class_def->GetClassData()->SetOffset(class_data_offset);
+      class_data_offset += class_def->GetClassData()->GetSize();
+    }
+  }
+  return new_class_def_order;
+}
+
+int32_t DexLayout::LayoutCodeItems(std::vector<dex_ir::ClassDef*> new_class_def_order) {
+  int32_t diff = 0;
+  uint32_t offset = header_->GetCollections().CodeItemsOffset();
+  for (dex_ir::ClassDef* class_def : new_class_def_order) {
+    dex_ir::ClassData* class_data = class_def->GetClassData();
+    if (class_data != nullptr) {
+      class_data->SetOffset(class_data->GetOffset() + diff);
+      for (auto& method : *class_data->DirectMethods()) {
+        dex_ir::CodeItem* code_item = method->GetCodeItem();
+        if (code_item != nullptr) {
+          diff += UnsignedLeb128Size(offset) - UnsignedLeb128Size(code_item->GetOffset());
+          code_item->SetOffset(offset);
+          offset += RoundUp(code_item->GetSize(), 4);
+        }
+      }
+      for (auto& method : *class_data->VirtualMethods()) {
+        dex_ir::CodeItem* code_item = method->GetCodeItem();
+        if (code_item != nullptr) {
+          diff += UnsignedLeb128Size(offset) - UnsignedLeb128Size(code_item->GetOffset());
+          code_item->SetOffset(offset);
+          offset += RoundUp(code_item->GetSize(), 4);
+        }
+      }
+    }
+  }
+
+  return diff;
+}
+
+// Adjust offsets of every item in the specified section by diff bytes.
+template<class T> void DexLayout::FixupSection(std::map<uint32_t, std::unique_ptr<T>>& map,
+                                               uint32_t diff) {
+  for (auto& pair : map) {
+    std::unique_ptr<T>& item = pair.second;
+    item->SetOffset(item->GetOffset() + diff);
+  }
+}
+
+// Adjust offsets of all sections with an address after the specified offset by diff bytes.
+void DexLayout::FixupSections(uint32_t offset, uint32_t diff) {
+  dex_ir::Collections& collections = header_->GetCollections();
+  uint32_t map_list_offset = collections.MapListOffset();
+  if (map_list_offset > offset) {
+    collections.SetMapListOffset(map_list_offset + diff);
+  }
+
+  uint32_t type_lists_offset = collections.TypeListsOffset();
+  if (type_lists_offset > offset) {
+    collections.SetTypeListsOffset(type_lists_offset + diff);
+    FixupSection(collections.TypeLists(), diff);
+  }
+
+  uint32_t annotation_set_ref_lists_offset = collections.AnnotationSetRefListsOffset();
+  if (annotation_set_ref_lists_offset > offset) {
+    collections.SetAnnotationSetRefListsOffset(annotation_set_ref_lists_offset + diff);
+    FixupSection(collections.AnnotationSetRefLists(), diff);
+  }
+
+  uint32_t annotation_set_items_offset = collections.AnnotationSetItemsOffset();
+  if (annotation_set_items_offset > offset) {
+    collections.SetAnnotationSetItemsOffset(annotation_set_items_offset + diff);
+    FixupSection(collections.AnnotationSetItems(), diff);
+  }
+
+  uint32_t class_datas_offset = collections.ClassDatasOffset();
+  if (class_datas_offset > offset) {
+    collections.SetClassDatasOffset(class_datas_offset + diff);
+    FixupSection(collections.ClassDatas(), diff);
+  }
+
+  uint32_t code_items_offset = collections.CodeItemsOffset();
+  if (code_items_offset > offset) {
+    collections.SetCodeItemsOffset(code_items_offset + diff);
+    FixupSection(collections.CodeItems(), diff);
+  }
+
+  uint32_t string_datas_offset = collections.StringDatasOffset();
+  if (string_datas_offset > offset) {
+    collections.SetStringDatasOffset(string_datas_offset + diff);
+    FixupSection(collections.StringDatas(), diff);
+  }
+
+  uint32_t debug_info_items_offset = collections.DebugInfoItemsOffset();
+  if (debug_info_items_offset > offset) {
+    collections.SetDebugInfoItemsOffset(debug_info_items_offset + diff);
+    FixupSection(collections.DebugInfoItems(), diff);
+  }
+
+  uint32_t annotation_items_offset = collections.AnnotationItemsOffset();
+  if (annotation_items_offset > offset) {
+    collections.SetAnnotationItemsOffset(annotation_items_offset + diff);
+    FixupSection(collections.AnnotationItems(), diff);
+  }
+
+  uint32_t encoded_array_items_offset = collections.EncodedArrayItemsOffset();
+  if (encoded_array_items_offset > offset) {
+    collections.SetEncodedArrayItemsOffset(encoded_array_items_offset + diff);
+    FixupSection(collections.EncodedArrayItems(), diff);
+  }
+
+  uint32_t annotations_directory_items_offset = collections.AnnotationsDirectoryItemsOffset();
+  if (annotations_directory_items_offset > offset) {
+    collections.SetAnnotationsDirectoryItemsOffset(annotations_directory_items_offset + diff);
+    FixupSection(collections.AnnotationsDirectoryItems(), diff);
+  }
+}
+
+void DexLayout::LayoutOutputFile(const DexFile* dex_file) {
+  std::vector<dex_ir::ClassDef*> new_class_def_order = LayoutClassDefsAndClassData(dex_file);
+  int32_t diff = LayoutCodeItems(new_class_def_order);
+  // Adjust diff to be 4-byte aligned.
+  diff = RoundUp(diff, 4);
+  // Move sections after ClassData by diff bytes.
+  FixupSections(header_->GetCollections().ClassDatasOffset(), diff);
+  // Update file size.
+  header_->SetFileSize(header_->FileSize() + diff);
+}
+
+void DexLayout::OutputDexFile(const std::string& dex_file_location) {
+  std::string error_msg;
+  std::unique_ptr<File> new_file;
+  if (!options_.output_to_memmap_) {
     std::string output_location(options_.output_dex_directory_);
-    size_t last_slash = dex_file->GetLocation().rfind("/");
-    output_location.append(dex_file->GetLocation().substr(last_slash));
-    DexWriter::OutputDexFile(*header, output_location.c_str());
+    size_t last_slash = dex_file_location.rfind("/");
+    std::string dex_file_directory = dex_file_location.substr(0, last_slash + 1);
+    if (output_location == dex_file_directory) {
+      output_location = dex_file_location + ".new";
+    } else if (last_slash != std::string::npos) {
+      output_location += dex_file_location.substr(last_slash);
+    } else {
+      output_location += "/" + dex_file_location + ".new";
+    }
+    new_file.reset(OS::CreateEmptyFile(output_location.c_str()));
+    ftruncate(new_file->Fd(), header_->FileSize());
+    mem_map_.reset(MemMap::MapFile(header_->FileSize(), PROT_READ | PROT_WRITE, MAP_SHARED,
+        new_file->Fd(), 0, /*low_4gb*/ false, output_location.c_str(), &error_msg));
+  } else {
+    mem_map_.reset(MemMap::MapAnonymous("layout dex", nullptr, header_->FileSize(),
+        PROT_READ | PROT_WRITE, /* low_4gb */ false, /* reuse */ false, &error_msg));
+  }
+  if (mem_map_ == nullptr) {
+    LOG(ERROR) << "Could not create mem map for dex writer output: " << error_msg;
+    if (new_file.get() != nullptr) {
+      new_file->Erase();
+    }
+    return;
+  }
+  DexWriter::Output(header_, mem_map_.get());
+  if (new_file != nullptr) {
+    UNUSED(new_file->FlushCloseOrErase());
+  }
+}
+
+/*
+ * Dumps the requested sections of the file.
+ */
+void DexLayout::ProcessDexFile(const char* file_name,
+                               const DexFile* dex_file,
+                               size_t dex_file_index) {
+  std::unique_ptr<dex_ir::Header> header(dex_ir::DexIrBuilder(*dex_file));
+  SetHeader(header.get());
+
+  if (options_.verbose_) {
+    fprintf(out_file_, "Opened '%s', DEX version '%.3s'\n",
+            file_name, dex_file->GetHeader().magic_ + 4);
+  }
+
+  if (options_.visualize_pattern_) {
+    VisualizeDexLayout(header_, dex_file, dex_file_index, info_);
+    return;
+  }
+
+  // Dump dex file.
+  if (options_.dump_) {
+    DumpDexFile();
+  }
+
+  // Output dex file as file or memmap.
+  if (options_.output_dex_directory_ != nullptr || options_.output_to_memmap_) {
+    if (info_ != nullptr) {
+      LayoutOutputFile(dex_file);
+    }
+    OutputDexFile(dex_file->GetLocation());
   }
 }
 
 /*
  * Processes a single file (either direct .dex or indirect .zip/.jar/.apk).
  */
-int ProcessFile(const char* file_name) {
+int DexLayout::ProcessFile(const char* file_name) {
   if (options_.verbose_) {
     fprintf(out_file_, "Processing '%s'...\n", file_name);
   }
diff --git a/dexlayout/dexlayout.h b/dexlayout/dexlayout.h
index a5bd992..ac1a4a6 100644
--- a/dexlayout/dexlayout.h
+++ b/dexlayout/dexlayout.h
@@ -26,8 +26,13 @@
 #include <stdint.h>
 #include <stdio.h>
 
+#include "dex_ir.h"
+#include "mem_map.h"
+
 namespace art {
 
+class DexFile;
+class Instruction;
 class ProfileCompilationInfo;
 
 /* Supported output formats. */
@@ -37,28 +42,90 @@
 };
 
 /* Command-line options. */
-struct Options {
-  bool build_dex_ir_;
-  bool checksum_only_;
-  bool disassemble_;
-  bool exports_only_;
-  bool ignore_bad_checksum_;
-  bool show_annotations_;
-  bool show_file_headers_;
-  bool show_section_headers_;
-  bool verbose_;
-  bool visualize_pattern_;
-  OutputFormat output_format_;
-  const char* output_dex_directory_;
-  const char* output_file_name_;
-  const char* profile_file_name_;
+class Options {
+ public:
+  Options() = default;
+
+  bool dump_ = false;
+  bool build_dex_ir_ = false;
+  bool checksum_only_ = false;
+  bool disassemble_ = false;
+  bool exports_only_ = false;
+  bool ignore_bad_checksum_ = false;
+  bool output_to_memmap_ = false;
+  bool show_annotations_ = false;
+  bool show_file_headers_ = false;
+  bool show_section_headers_ = false;
+  bool verbose_ = false;
+  bool visualize_pattern_ = false;
+  OutputFormat output_format_ = kOutputPlain;
+  const char* output_dex_directory_ = nullptr;
+  const char* output_file_name_ = nullptr;
+  const char* profile_file_name_ = nullptr;
 };
 
-/* Prototypes. */
-extern struct Options options_;
-extern FILE* out_file_;
-extern ProfileCompilationInfo* profile_info_;
-int ProcessFile(const char* file_name);
+class DexLayout {
+ public:
+  DexLayout(Options& options,
+            ProfileCompilationInfo* info,
+            FILE* out_file,
+            dex_ir::Header*
+            header = nullptr)
+      : options_(options), info_(info), out_file_(out_file), header_(header) { }
+
+  int ProcessFile(const char* file_name);
+  void ProcessDexFile(const char* file_name, const DexFile* dex_file, size_t dex_file_index);
+
+  dex_ir::Header* GetHeader() const { return header_; }
+  void SetHeader(dex_ir::Header* header) { header_ = header; }
+
+  MemMap* GetAndReleaseMemMap() { return mem_map_.release(); }
+
+ private:
+  void DumpAnnotationSetItem(dex_ir::AnnotationSetItem* set_item);
+  void DumpBytecodes(uint32_t idx, const dex_ir::CodeItem* code, uint32_t code_offset);
+  void DumpCatches(const dex_ir::CodeItem* code);
+  void DumpClass(int idx, char** last_package);
+  void DumpClassAnnotations(int idx);
+  void DumpClassDef(int idx);
+  void DumpCode(uint32_t idx, const dex_ir::CodeItem* code, uint32_t code_offset);
+  void DumpEncodedAnnotation(dex_ir::EncodedAnnotation* annotation);
+  void DumpEncodedValue(const dex_ir::EncodedValue* data);
+  void DumpFileHeader();
+  void DumpIField(uint32_t idx, uint32_t flags, int i);
+  void DumpInstruction(const dex_ir::CodeItem* code,
+                       uint32_t code_offset,
+                       uint32_t insn_idx,
+                       uint32_t insn_width,
+                       const Instruction* dec_insn);
+  void DumpInterface(const dex_ir::TypeId* type_item, int i);
+  void DumpLocalInfo(const dex_ir::CodeItem* code);
+  void DumpMethod(uint32_t idx, uint32_t flags, const dex_ir::CodeItem* code, int i);
+  void DumpPositionInfo(const dex_ir::CodeItem* code);
+  void DumpSField(uint32_t idx, uint32_t flags, int i, dex_ir::EncodedValue* init);
+  void DumpDexFile();
+
+  std::vector<dex_ir::ClassDef*> LayoutClassDefsAndClassData(const DexFile* dex_file);
+  int32_t LayoutCodeItems(std::vector<dex_ir::ClassDef*> new_class_def_order);
+  template<class T> void FixupSection(std::map<uint32_t, std::unique_ptr<T>>& map, uint32_t diff);
+  void FixupSections(uint32_t offset, uint32_t diff);
+
+  // Creates a new layout for the dex file based on profile info.
+  // Currently reorders ClassDefs, ClassDataItems, and CodeItems.
+  void LayoutOutputFile(const DexFile* dex_file);
+  void OutputDexFile(const std::string& dex_file_location);
+
+  void DumpCFG(const DexFile* dex_file, int idx);
+  void DumpCFG(const DexFile* dex_file, uint32_t dex_method_idx, const DexFile::CodeItem* code);
+
+  Options& options_;
+  ProfileCompilationInfo* info_;
+  FILE* out_file_;
+  dex_ir::Header* header_;
+  std::unique_ptr<MemMap> mem_map_;
+
+  DISALLOW_COPY_AND_ASSIGN(DexLayout);
+};
 
 }  // namespace art
 
diff --git a/dexlayout/dexlayout_main.cc b/dexlayout/dexlayout_main.cc
index 825dd50..5f8a118 100644
--- a/dexlayout/dexlayout_main.cc
+++ b/dexlayout/dexlayout_main.cc
@@ -68,64 +68,67 @@
   InitLogging(argv, Runtime::Aborter);
   MemMap::Init();
 
-  // Reset options.
+  Options options;
+  options.dump_ = true;
+  options.verbose_ = true;
   bool want_usage = false;
-  memset(&options_, 0, sizeof(options_));
-  options_.verbose_ = true;
 
   // Parse all arguments.
   while (1) {
-    const int ic = getopt(argc, argv, "abcdefghil:o:p:sw:");
+    const int ic = getopt(argc, argv, "abcdefghil:mo:p:sw:");
     if (ic < 0) {
       break;  // done
     }
     switch (ic) {
       case 'a':  // display annotations
-        options_.show_annotations_ = true;
+        options.show_annotations_ = true;
         break;
       case 'b':  // build dex_ir
-        options_.build_dex_ir_ = true;
+        options.build_dex_ir_ = true;
         break;
       case 'c':  // verify the checksum then exit
-        options_.checksum_only_ = true;
+        options.checksum_only_ = true;
         break;
       case 'd':  // disassemble Dalvik instructions
-        options_.disassemble_ = true;
+        options.disassemble_ = true;
         break;
       case 'e':  // exported items only
-        options_.exports_only_ = true;
+        options.exports_only_ = true;
         break;
       case 'f':  // display outer file header
-        options_.show_file_headers_ = true;
+        options.show_file_headers_ = true;
         break;
       case 'h':  // display section headers, i.e. all meta-data
-        options_.show_section_headers_ = true;
+        options.show_section_headers_ = true;
         break;
       case 'i':  // continue even if checksum is bad
-        options_.ignore_bad_checksum_ = true;
+        options.ignore_bad_checksum_ = true;
         break;
       case 'l':  // layout
         if (strcmp(optarg, "plain") == 0) {
-          options_.output_format_ = kOutputPlain;
+          options.output_format_ = kOutputPlain;
         } else if (strcmp(optarg, "xml") == 0) {
-          options_.output_format_ = kOutputXml;
-          options_.verbose_ = false;
+          options.output_format_ = kOutputXml;
+          options.verbose_ = false;
         } else {
           want_usage = true;
         }
         break;
+      case 'm':  // output dex files to a memmap
+        options.output_to_memmap_ = true;
+        break;
       case 'o':  // output file
-        options_.output_file_name_ = optarg;
+        options.output_file_name_ = optarg;
         break;
       case 'p':  // profile file
-        options_.profile_file_name_ = optarg;
+        options.profile_file_name_ = optarg;
         break;
       case 's':  // visualize access pattern
-        options_.visualize_pattern_ = true;
-        options_.verbose_ = false;
+        options.visualize_pattern_ = true;
+        options.verbose_ = false;
         break;
       case 'w':  // output dex files directory
-        options_.output_dex_directory_ = optarg;
+        options.output_dex_directory_ = optarg;
         break;
       default:
         want_usage = true;
@@ -138,7 +141,7 @@
     fprintf(stderr, "%s: no file specified\n", kProgramName);
     want_usage = true;
   }
-  if (options_.checksum_only_ && options_.ignore_bad_checksum_) {
+  if (options.checksum_only_ && options.ignore_bad_checksum_) {
     fprintf(stderr, "Can't specify both -c and -i\n");
     want_usage = true;
   }
@@ -148,32 +151,37 @@
   }
 
   // Open alternative output file.
-  if (options_.output_file_name_) {
-    out_file_ = fopen(options_.output_file_name_, "w");
-    if (!out_file_) {
-      fprintf(stderr, "Can't open %s\n", options_.output_file_name_);
+  FILE* out_file = stdout;
+  if (options.output_file_name_) {
+    out_file = fopen(options.output_file_name_, "w");
+    if (!out_file) {
+      fprintf(stderr, "Can't open %s\n", options.output_file_name_);
       return 1;
     }
   }
 
   // Open profile file.
-  if (options_.profile_file_name_) {
-    int profile_fd = open(options_.profile_file_name_, O_RDONLY);
+  ProfileCompilationInfo* profile_info = nullptr;
+  if (options.profile_file_name_) {
+    int profile_fd = open(options.profile_file_name_, O_RDONLY);
     if (profile_fd < 0) {
-      fprintf(stderr, "Can't open %s\n", options_.profile_file_name_);
+      fprintf(stderr, "Can't open %s\n", options.profile_file_name_);
       return 1;
     }
-    profile_info_ = new ProfileCompilationInfo();
-    if (!profile_info_->Load(profile_fd)) {
-      fprintf(stderr, "Can't read profile info from %s\n", options_.profile_file_name_);
+    profile_info = new ProfileCompilationInfo();
+    if (!profile_info->Load(profile_fd)) {
+      fprintf(stderr, "Can't read profile info from %s\n", options.profile_file_name_);
       return 1;
     }
   }
 
+  // Create DexLayout instance.
+  DexLayout dex_layout(options, profile_info, out_file);
+
   // Process all files supplied on command line.
   int result = 0;
   while (optind < argc) {
-    result |= ProcessFile(argv[optind++]);
+    result |= dex_layout.ProcessFile(argv[optind++]);
   }  // while
   return result != 0;
 }
diff --git a/dexlayout/dexlayout_test.cc b/dexlayout/dexlayout_test.cc
index 89544d7..665baa6 100644
--- a/dexlayout/dexlayout_test.cc
+++ b/dexlayout/dexlayout_test.cc
@@ -22,11 +22,57 @@
 #include <unistd.h>
 
 #include "base/stringprintf.h"
+#include "base/unix_file/fd_file.h"
 #include "common_runtime_test.h"
 #include "utils.h"
 
 namespace art {
 
+static const char kDexFileLayoutInputDex[] =
+    "ZGV4CjAzNQD1KW3+B8NAB0f2A/ZVIBJ0aHrGIqcpVTAUAgAAcAAAAHhWNBIAAAAAAAAAAIwBAAAH"
+    "AAAAcAAAAAQAAACMAAAAAQAAAJwAAAAAAAAAAAAAAAMAAACoAAAAAgAAAMAAAAAUAQAAAAEAADAB"
+    "AAA4AQAAQAEAAEgBAABNAQAAUgEAAGYBAAADAAAABAAAAAUAAAAGAAAABgAAAAMAAAAAAAAAAAAA"
+    "AAAAAAABAAAAAAAAAAIAAAAAAAAAAAAAAAAAAAACAAAAAAAAAAEAAAAAAAAAdQEAAAAAAAABAAAA"
+    "AAAAAAIAAAAAAAAAAgAAAAAAAAB/AQAAAAAAAAEAAQABAAAAaQEAAAQAAABwEAIAAAAOAAEAAQAB"
+    "AAAAbwEAAAQAAABwEAIAAAAOAAY8aW5pdD4ABkEuamF2YQAGQi5qYXZhAANMQTsAA0xCOwASTGph"
+    "dmEvbGFuZy9PYmplY3Q7AAFWAAQABw48AAQABw48AAAAAQAAgIAEgAIAAAEAAYCABJgCAAAACwAA"
+    "AAAAAAABAAAAAAAAAAEAAAAHAAAAcAAAAAIAAAAEAAAAjAAAAAMAAAABAAAAnAAAAAUAAAADAAAA"
+    "qAAAAAYAAAACAAAAwAAAAAEgAAACAAAAAAEAAAIgAAAHAAAAMAEAAAMgAAACAAAAaQEAAAAgAAAC"
+    "AAAAdQEAAAAQAAABAAAAjAEAAA==";
+
+static const char kDexFileLayoutInputProfile[] =
+    "cHJvADAwMgABAAsAAAABAPUpbf5jbGFzc2VzLmRleAEA";
+
+static const char kDexFileLayoutExpectedOutputDex[] =
+    "ZGV4CjAzNQD1KW3+B8NAB0f2A/ZVIBJ0aHrGIqcpVTAUAgAAcAAAAHhWNBIAAAAAAAAAAIwBAAAH"
+    "AAAAcAAAAAQAAACMAAAAAQAAAJwAAAAAAAAAAAAAAAMAAACoAAAAAgAAAMAAAAAUAQAAAAEAADAB"
+    "AAA4AQAAQAEAAEgBAABNAQAAUgEAAGYBAAADAAAABAAAAAUAAAAGAAAABgAAAAMAAAAAAAAAAAAA"
+    "AAAAAAABAAAAAAAAAAIAAAAAAAAAAQAAAAAAAAACAAAAAAAAAAIAAAAAAAAAdQEAAAAAAAAAAAAA"
+    "AAAAAAIAAAAAAAAAAQAAAAAAAAB/AQAAAAAAAAEAAQABAAAAbwEAAAQAAABwEAIAAAAOAAEAAQAB"
+    "AAAAaQEAAAQAAABwEAIAAAAOAAY8aW5pdD4ABkEuamF2YQAGQi5qYXZhAANMQTsAA0xCOwASTGph"
+    "dmEvbGFuZy9PYmplY3Q7AAFWAAQABw48AAQABw48AAAAAQABgIAEgAIAAAEAAICABJgCAAAACwAA"
+    "AAAAAAABAAAAAAAAAAEAAAAHAAAAcAAAAAIAAAAEAAAAjAAAAAMAAAABAAAAnAAAAAUAAAADAAAA"
+    "qAAAAAYAAAACAAAAwAAAAAEgAAACAAAAAAEAAAIgAAAHAAAAMAEAAAMgAAACAAAAaQEAAAAgAAAC"
+    "AAAAdQEAAAAQAAABAAAAjAEAAA==";
+
+static void WriteFileBase64(const char* base64, const char* location) {
+  // Decode base64.
+  CHECK(base64 != nullptr);
+  size_t length;
+  std::unique_ptr<uint8_t[]> bytes(DecodeBase64(base64, &length));
+  CHECK(bytes.get() != nullptr);
+
+  // Write to provided file.
+  std::unique_ptr<File> file(OS::CreateEmptyFile(location));
+  CHECK(file.get() != nullptr);
+  if (!file->WriteFully(bytes.get(), length)) {
+    PLOG(FATAL) << "Failed to write base64 as file";
+  }
+  if (file->FlushCloseOrErase() != 0) {
+    PLOG(FATAL) << "Could not flush and close test file.";
+  }
+}
+
 class DexLayoutTest : public CommonRuntimeTest {
  protected:
   virtual void SetUp() {
@@ -37,12 +83,12 @@
   bool FullPlainOutputExec(std::string* error_msg) {
     // TODO: dexdump2 -> dexdump ?
     ScratchFile dexdump_output;
-    std::string dexdump_filename = dexdump_output.GetFilename();
+    const std::string& dexdump_filename = dexdump_output.GetFilename();
     std::string dexdump = GetTestAndroidRoot() + "/bin/dexdump2";
     EXPECT_TRUE(OS::FileExists(dexdump.c_str())) << dexdump << " should be a valid file path";
 
     ScratchFile dexlayout_output;
-    std::string dexlayout_filename = dexlayout_output.GetFilename();
+    const std::string& dexlayout_filename = dexlayout_output.GetFilename();
     std::string dexlayout = GetTestAndroidRoot() + "/bin/dexlayout";
     EXPECT_TRUE(OS::FileExists(dexlayout.c_str())) << dexlayout << " should be a valid file path";
 
@@ -51,7 +97,6 @@
           { dexdump, "-d", "-f", "-h", "-l", "plain", "-o", dexdump_filename, dex_file };
       std::vector<std::string> dexlayout_exec_argv =
           { dexlayout, "-d", "-f", "-h", "-l", "plain", "-o", dexlayout_filename, dex_file };
-
       if (!::art::Exec(dexdump_exec_argv, error_msg)) {
         return false;
       }
@@ -70,20 +115,18 @@
   // Runs DexFileOutput test.
   bool DexFileOutputExec(std::string* error_msg) {
     ScratchFile tmp_file;
-    std::string tmp_name = tmp_file.GetFilename();
-    size_t tmp_last_slash = tmp_name.rfind("/");
+    const std::string& tmp_name = tmp_file.GetFilename();
+    size_t tmp_last_slash = tmp_name.rfind('/');
     std::string tmp_dir = tmp_name.substr(0, tmp_last_slash + 1);
     std::string dexlayout = GetTestAndroidRoot() + "/bin/dexlayout";
     EXPECT_TRUE(OS::FileExists(dexlayout.c_str())) << dexlayout << " should be a valid file path";
 
     for (const std::string &dex_file : GetLibCoreDexFileNames()) {
       std::vector<std::string> dexlayout_exec_argv =
-          { dexlayout, "-d", "-f", "-h", "-l", "plain", "-w", tmp_dir, "-o", tmp_name, dex_file };
-
+          { dexlayout, "-w", tmp_dir, "-o", tmp_name, dex_file };
       if (!::art::Exec(dexlayout_exec_argv, error_msg)) {
         return false;
       }
-
       size_t dex_file_last_slash = dex_file.rfind("/");
       std::string dex_file_name = dex_file.substr(dex_file_last_slash + 1);
       std::vector<std::string> unzip_exec_argv =
@@ -105,7 +148,44 @@
         return false;
       }
     }
+    return true;
+  }
 
+  // Runs DexFileOutput test.
+  bool DexFileLayoutExec(std::string* error_msg) {
+    ScratchFile tmp_file;
+    std::string tmp_name = tmp_file.GetFilename();
+    size_t tmp_last_slash = tmp_name.rfind("/");
+    std::string tmp_dir = tmp_name.substr(0, tmp_last_slash + 1);
+
+    // Write inputs and expected outputs.
+    std::string dex_file = tmp_dir + "classes.dex";
+    WriteFileBase64(kDexFileLayoutInputDex, dex_file.c_str());
+    std::string profile_file = tmp_dir + "primary.prof";
+    WriteFileBase64(kDexFileLayoutInputProfile, profile_file.c_str());
+    std::string expected_output = tmp_dir + "expected.dex";
+    WriteFileBase64(kDexFileLayoutExpectedOutputDex, expected_output.c_str());
+    std::string output_dex = tmp_dir + "classes.dex.new";
+
+    std::string dexlayout = GetTestAndroidRoot() + "/bin/dexlayout";
+    EXPECT_TRUE(OS::FileExists(dexlayout.c_str())) << dexlayout << " should be a valid file path";
+
+    std::vector<std::string> dexlayout_exec_argv =
+    { dexlayout, "-w", tmp_dir, "-o", tmp_name, "-p", profile_file, dex_file };
+    if (!::art::Exec(dexlayout_exec_argv, error_msg)) {
+      return false;
+    }
+    std::vector<std::string> diff_exec_argv =
+        { "/usr/bin/diff", expected_output, output_dex };
+    if (!::art::Exec(diff_exec_argv, error_msg)) {
+      return false;
+    }
+
+    std::vector<std::string> rm_exec_argv =
+        { "/bin/rm", dex_file, profile_file, expected_output, output_dex };
+    if (!::art::Exec(rm_exec_argv, error_msg)) {
+      return false;
+    }
     return true;
   }
 };
@@ -125,4 +205,11 @@
   ASSERT_TRUE(DexFileOutputExec(&error_msg)) << error_msg;
 }
 
+TEST_F(DexLayoutTest, DexFileLayout) {
+  // Disable test on target.
+  TEST_DISABLED_FOR_TARGET();
+  std::string error_msg;
+  ASSERT_TRUE(DexFileLayoutExec(&error_msg)) << error_msg;
+}
+
 }  // namespace art
diff --git a/dexlist/dexlist.cc b/dexlist/dexlist.cc
index 68473c4..efe1aad 100644
--- a/dexlist/dexlist.cc
+++ b/dexlist/dexlist.cc
@@ -140,7 +140,7 @@
   const DexFile::ClassDef& pClassDef = pDexFile->GetClassDef(idx);
 
   const char* fileName;
-  if (pClassDef.source_file_idx_ == DexFile::kDexNoIndex) {
+  if (!pClassDef.source_file_idx_.IsValid()) {
     fileName = nullptr;
   } else {
     fileName = pDexFile->StringDataByIdx(pClassDef.source_file_idx_);
diff --git a/disassembler/disassembler_arm.cc b/disassembler/disassembler_arm.cc
index 30b708c..3347dac 100644
--- a/disassembler/disassembler_arm.cc
+++ b/disassembler/disassembler_arm.cc
@@ -63,9 +63,7 @@
         case kVld2Location:
         case kVld3Location:
         case kVld4Location: {
-          const uintptr_t pc_delta = disasm_->IsT32()
-              ? vixl::aarch32::kT32PcDelta
-              : vixl::aarch32::kA32PcDelta;
+          const uintptr_t pc_delta = label.GetLabel()->GetPcOffset();
           const int32_t offset = label.GetLabel()->GetLocation();
 
           os() << "[pc, #" << offset - pc_delta << "]";
@@ -77,7 +75,7 @@
       }
     }
 
-    DisassemblerStream& operator<<(const vixl::aarch32::Register reg) OVERRIDE {
+    DisassemblerStream& operator<<(vixl::aarch32::Register reg) OVERRIDE {
       if (reg.Is(tr)) {
         os() << "tr";
         return *this;
@@ -118,20 +116,11 @@
   CustomDisassembler(std::ostream& os, const DisassemblerOptions* options)
       : PrintDisassembler(&disassembler_stream_), disassembler_stream_(os, this, options) {}
 
-  void PrintPc(uint32_t prog_ctr) OVERRIDE {
+  void PrintCodeAddress(uint32_t prog_ctr) OVERRIDE {
     os() << "0x" << std::hex << std::setw(8) << std::setfill('0') << prog_ctr << ": ";
   }
 
-  bool IsT32() const {
-    return is_t32_;
-  }
-
-  void SetT32(bool is_t32) {
-    is_t32_ = is_t32;
-  }
-
  private:
-  bool is_t32_;
   CustomDisassemblerStream disassembler_stream_;
 };
 
@@ -152,7 +141,7 @@
       sizeof(unaligned_float), sizeof(unaligned_double)};
   const uintptr_t begin = reinterpret_cast<uintptr_t>(options_->base_address_);
   const uintptr_t end = reinterpret_cast<uintptr_t>(options_->end_address_);
-  uintptr_t literal_addr = RoundDown(disasm_->GetPc(), vixl::aarch32::kRegSizeInBytes) + offset;
+  uintptr_t literal_addr = RoundDown(disasm_->GetCodeAddress(), vixl::aarch32::kRegSizeInBytes) + offset;
 
   if (!options_->absolute_addresses_) {
     literal_addr += begin;
@@ -208,12 +197,14 @@
   // Remove the Thumb specifier bit; no effect if begin does not point to T32 code.
   const uintptr_t instr_ptr = reinterpret_cast<uintptr_t>(begin) & ~1;
 
-  disasm_->SetT32((reinterpret_cast<uintptr_t>(begin) & 1) != 0);
-  disasm_->JumpToPc(GetPc(instr_ptr));
+  const bool is_t32 = (reinterpret_cast<uintptr_t>(begin) & 1) != 0;
+  disasm_->SetCodeAddress(GetPc(instr_ptr));
 
-  if (disasm_->IsT32()) {
+  if (is_t32) {
     const uint16_t* const ip = reinterpret_cast<const uint16_t*>(instr_ptr);
-    next = reinterpret_cast<uintptr_t>(disasm_->DecodeT32At(ip));
+    const uint16_t* const end_address = reinterpret_cast<const uint16_t*>(
+        GetDisassemblerOptions()->end_address_);
+    next = reinterpret_cast<uintptr_t>(disasm_->DecodeT32At(ip, end_address));
   } else {
     const uint32_t* const ip = reinterpret_cast<const uint32_t*>(instr_ptr);
     next = reinterpret_cast<uintptr_t>(disasm_->DecodeA32At(ip));
@@ -230,10 +221,10 @@
   // Remove the Thumb specifier bit; no effect if begin does not point to T32 code.
   const uintptr_t base = reinterpret_cast<uintptr_t>(begin) & ~1;
 
-  disasm_->SetT32((reinterpret_cast<uintptr_t>(begin) & 1) != 0);
-  disasm_->JumpToPc(GetPc(base));
+  const bool is_t32 = (reinterpret_cast<uintptr_t>(begin) & 1) != 0;
+  disasm_->SetCodeAddress(GetPc(base));
 
-  if (disasm_->IsT32()) {
+  if (is_t32) {
     // The Thumb specifier bits cancel each other.
     disasm_->DisassembleT32Buffer(reinterpret_cast<const uint16_t*>(base), end - begin);
   } else {
diff --git a/imgdiag/imgdiag.cc b/imgdiag/imgdiag.cc
index f197fc1..a374686 100644
--- a/imgdiag/imgdiag.cc
+++ b/imgdiag/imgdiag.cc
@@ -89,7 +89,7 @@
 
   // Return suffix of the file path after the last /. (e.g. /foo/bar -> bar, bar -> bar)
   static std::string BaseName(const std::string& str) {
-    size_t idx = str.rfind("/");
+    size_t idx = str.rfind('/');
     if (idx == std::string::npos) {
       return str;
     }
@@ -516,8 +516,8 @@
 
       // Sanity check that we are reading a real object
       CHECK(obj->GetClass() != nullptr) << "Image object at address " << obj << " has null class";
-      if (kUseBakerOrBrooksReadBarrier) {
-        obj->AssertReadBarrierPointer();
+      if (kUseBakerReadBarrier) {
+        obj->AssertReadBarrierState();
       }
 
       // Iterate every page this object belongs to
diff --git a/oatdump/Android.bp b/oatdump/Android.bp
index bbe6cc1..f1fcf3d 100644
--- a/oatdump/Android.bp
+++ b/oatdump/Android.bp
@@ -54,13 +54,22 @@
 
 art_cc_binary {
     name: "oatdumps",
-    defaults: ["oatdump-defaults"],
     device_supported: false,
+    static_executable: true,
+    defaults: ["oatdump-defaults"],
     target: {
         darwin: {
             enabled: false,
         },
     },
+    ldflags: [
+        // We need this because GC stress mode makes use of
+        // _Unwind_GetIP and _Unwind_Backtrace and the symbols are also
+        // defined in libgcc_eh.a(unwind-dw2.o)
+        // TODO: Having this is not ideal as it might obscure errors.
+        // Try to get rid of it.
+        "-z muldefs",
+    ],
     static_libs: [
         "libart",
         "libart-compiler",
@@ -72,16 +81,25 @@
 
 art_cc_binary {
     name: "oatdumpds",
+    device_supported: false,
+    static_executable: true,
     defaults: [
         "art_debug_defaults",
         "oatdump-defaults",
     ],
-    device_supported: false,
     target: {
         darwin: {
             enabled: false,
         },
     },
+    ldflags: [
+        // We need this because GC stress mode makes use of
+        // _Unwind_GetIP and _Unwind_Backtrace and the symbols are also
+        // defined in libgcc_eh.a(unwind-dw2.o)
+        // TODO: Having this is not ideal as it might obscure errors.
+        // Try to get rid of it.
+        "-z muldefs",
+    ],
     static_libs: [
         "libartd",
         "libartd-compiler",
diff --git a/oatdump/oatdump.cc b/oatdump/oatdump.cc
index da0db01..692a951 100644
--- a/oatdump/oatdump.cc
+++ b/oatdump/oatdump.cc
@@ -66,6 +66,7 @@
 #include "type_lookup_table.h"
 #include "vdex_file.h"
 #include "verifier/method_verifier.h"
+#include "verifier/verifier_deps.h"
 #include "well_known_classes.h"
 
 #include <sys/stat.h>
@@ -483,6 +484,28 @@
     os << "\n";
 
     if (!options_.dump_header_only_) {
+      VariableIndentationOutputStream vios(&os);
+      VdexFile::Header vdex_header = oat_file_.GetVdexFile()->GetHeader();
+      if (vdex_header.IsValid()) {
+        std::string error_msg;
+        std::vector<const DexFile*> dex_files;
+        for (size_t i = 0; i < oat_dex_files_.size(); i++) {
+          const DexFile* dex_file = OpenDexFile(oat_dex_files_[i], &error_msg);
+          if (dex_file == nullptr) {
+            os << "Error opening dex file: " << error_msg << std::endl;
+            return false;
+          }
+          dex_files.push_back(dex_file);
+        }
+        verifier::VerifierDeps deps(dex_files, oat_file_.GetVdexFile()->GetVerifierDepsData());
+        deps.Dump(&vios);
+      } else {
+        os << "UNRECOGNIZED vdex file, magic "
+           << vdex_header.GetMagic()
+           << ", version "
+           << vdex_header.GetVersion()
+           << "\n";
+      }
       for (size_t i = 0; i < oat_dex_files_.size(); i++) {
         const OatFile::OatDexFile* oat_dex_file = oat_dex_files_[i];
         CHECK(oat_dex_file != nullptr);
@@ -680,13 +703,13 @@
         const Instruction* inst = Instruction::At(code_ptr);
         switch (inst->Opcode()) {
           case Instruction::CONST_STRING: {
-            const uint32_t string_index = inst->VRegB_21c();
+            const dex::StringIndex string_index(inst->VRegB_21c());
             unique_string_ids_from_code_.insert(StringReference(&dex_file, string_index));
             ++num_string_ids_from_code_;
             break;
           }
           case Instruction::CONST_STRING_JUMBO: {
-            const uint32_t string_index = inst->VRegB_31c();
+            const dex::StringIndex string_index(inst->VRegB_31c());
             unique_string_ids_from_code_.insert(StringReference(&dex_file, string_index));
             ++num_string_ids_from_code_;
             break;
@@ -771,7 +794,7 @@
       uint32_t oat_class_offset = oat_dex_file.GetOatClassOffset(class_def_index);
       const OatFile::OatClass oat_class = oat_dex_file.GetOatClass(class_def_index);
       os << StringPrintf("%zd: %s (offset=0x%08x) (type_idx=%d)",
-                         class_def_index, descriptor, oat_class_offset, class_def.class_idx_)
+                         class_def_index, descriptor, oat_class_offset, class_def.class_idx_.index_)
          << " (" << oat_class.GetStatus() << ")"
          << " (" << oat_class.GetType() << ")\n";
       // TODO: include bitmap here if type is kOatClassSomeCompiled?
@@ -1030,7 +1053,8 @@
       if (options_.absolute_addresses_) {
         vios->Stream() << StringPrintf("%p ", oat_method.GetVmapTable());
       }
-      uint32_t vmap_table_offset = method_header == nullptr ? 0 : method_header->vmap_table_offset_;
+      uint32_t vmap_table_offset = method_header ==
+          nullptr ? 0 : method_header->GetVmapTableOffset();
       vios->Stream() << StringPrintf("(offset=0x%08x)\n", vmap_table_offset);
 
       size_t vmap_table_offset_limit =
@@ -2760,7 +2784,7 @@
 
     bool result = klass->GetImt(pointer_size) == object_class->GetImt(pointer_size);
 
-    if (klass->GetIfTable() == nullptr) {
+    if (klass->GetIfTable()->Count() == 0) {
       DCHECK(result);
     }
 
@@ -2866,25 +2890,23 @@
     std::cerr << " Interfaces:" << std::endl;
     // Run through iftable, find methods that slot here, see if they fit.
     mirror::IfTable* if_table = klass->GetIfTable();
-    if (if_table != nullptr) {
-      for (size_t i = 0, num_interfaces = klass->GetIfTableCount(); i < num_interfaces; ++i) {
-        mirror::Class* iface = if_table->GetInterface(i);
-        std::string iface_name;
-        std::cerr << "  " << iface->GetDescriptor(&iface_name) << std::endl;
+    for (size_t i = 0, num_interfaces = klass->GetIfTableCount(); i < num_interfaces; ++i) {
+      mirror::Class* iface = if_table->GetInterface(i);
+      std::string iface_name;
+      std::cerr << "  " << iface->GetDescriptor(&iface_name) << std::endl;
 
-        for (ArtMethod& iface_method : iface->GetVirtualMethods(pointer_size)) {
-          uint32_t class_hash, name_hash, signature_hash;
-          ImTable::GetImtHashComponents(&iface_method, &class_hash, &name_hash, &signature_hash);
-          uint32_t imt_slot = ImTable::GetImtIndex(&iface_method);
-          std::cerr << "    " << iface_method.PrettyMethod(true)
-              << " slot=" << imt_slot
-              << std::hex
-              << " class_hash=0x" << class_hash
-              << " name_hash=0x" << name_hash
-              << " signature_hash=0x" << signature_hash
-              << std::dec
-              << std::endl;
-        }
+      for (ArtMethod& iface_method : iface->GetVirtualMethods(pointer_size)) {
+        uint32_t class_hash, name_hash, signature_hash;
+        ImTable::GetImtHashComponents(&iface_method, &class_hash, &name_hash, &signature_hash);
+        uint32_t imt_slot = ImTable::GetImtIndex(&iface_method);
+        std::cerr << "    " << iface_method.PrettyMethod(true)
+            << " slot=" << imt_slot
+            << std::hex
+            << " class_hash=0x" << class_hash
+            << " name_hash=0x" << name_hash
+            << " signature_hash=0x" << signature_hash
+            << std::dec
+            << std::endl;
       }
     }
   }
@@ -2949,18 +2971,16 @@
         } else {
           // Run through iftable, find methods that slot here, see if they fit.
           mirror::IfTable* if_table = klass->GetIfTable();
-          if (if_table != nullptr) {
-            for (size_t i = 0, num_interfaces = klass->GetIfTableCount(); i < num_interfaces; ++i) {
-              mirror::Class* iface = if_table->GetInterface(i);
-              size_t num_methods = iface->NumDeclaredVirtualMethods();
-              if (num_methods > 0) {
-                for (ArtMethod& iface_method : iface->GetMethods(pointer_size)) {
-                  if (ImTable::GetImtIndex(&iface_method) == index) {
-                    std::string i_name = iface_method.PrettyMethod(true);
-                    if (StartsWith(i_name, method.c_str())) {
-                      std::cerr << "  Slot " << index << " (1)" << std::endl;
-                      std::cerr << "    " << p_name << " (" << i_name << ")" << std::endl;
-                    }
+          for (size_t i = 0, num_interfaces = klass->GetIfTableCount(); i < num_interfaces; ++i) {
+            mirror::Class* iface = if_table->GetInterface(i);
+            size_t num_methods = iface->NumDeclaredVirtualMethods();
+            if (num_methods > 0) {
+              for (ArtMethod& iface_method : iface->GetMethods(pointer_size)) {
+                if (ImTable::GetImtIndex(&iface_method) == index) {
+                  std::string i_name = iface_method.PrettyMethod(true);
+                  if (StartsWith(i_name, method.c_str())) {
+                    std::cerr << "  Slot " << index << " (1)" << std::endl;
+                    std::cerr << "    " << p_name << " (" << i_name << ")" << std::endl;
                   }
                 }
               }
diff --git a/patchoat/patchoat.cc b/patchoat/patchoat.cc
index 986f265..db28a3f 100644
--- a/patchoat/patchoat.cc
+++ b/patchoat/patchoat.cc
@@ -498,7 +498,7 @@
   return true;
 }
 
-class PatchOatArtFieldVisitor : public ArtFieldVisitor {
+class PatchOat::PatchOatArtFieldVisitor : public ArtFieldVisitor {
  public:
   explicit PatchOatArtFieldVisitor(PatchOat* patch_oat) : patch_oat_(patch_oat) {}
 
@@ -517,7 +517,7 @@
   image_header->VisitPackedArtFields(&visitor, heap_->Begin());
 }
 
-class PatchOatArtMethodVisitor : public ArtMethodVisitor {
+class PatchOat::PatchOatArtMethodVisitor : public ArtMethodVisitor {
  public:
   explicit PatchOatArtMethodVisitor(PatchOat* patch_oat) : patch_oat_(patch_oat) {}
 
@@ -558,7 +558,7 @@
       pointer_size);
 }
 
-class FixupRootVisitor : public RootVisitor {
+class PatchOat::FixupRootVisitor : public RootVisitor {
  public:
   explicit FixupRootVisitor(const PatchOat* patch_oat) : patch_oat_(patch_oat) {
   }
@@ -610,7 +610,7 @@
 }
 
 
-class RelocatedPointerVisitor {
+class PatchOat::RelocatedPointerVisitor {
  public:
   explicit RelocatedPointerVisitor(PatchOat* patch_oat) : patch_oat_(patch_oat) {}
 
@@ -747,13 +747,8 @@
 void PatchOat::VisitObject(mirror::Object* object) {
   mirror::Object* copy = RelocatedCopyOf(object);
   CHECK(copy != nullptr);
-  if (kUseBakerOrBrooksReadBarrier) {
-    object->AssertReadBarrierPointer();
-    if (kUseBrooksReadBarrier) {
-      mirror::Object* moved_to = RelocatedAddressOfPointer(object);
-      copy->SetReadBarrierPointer(moved_to);
-      DCHECK_EQ(copy->GetReadBarrierPointer(), moved_to);
-    }
+  if (kUseBakerReadBarrier) {
+    object->AssertReadBarrierState();
   }
   PatchOat::PatchVisitor visitor(this, copy);
   object->VisitReferences<kVerifyNone>(visitor, visitor);
@@ -767,16 +762,14 @@
     if (vtable != nullptr) {
       vtable->Fixup(RelocatedCopyOfFollowImages(vtable), pointer_size, native_visitor);
     }
-    auto* iftable = klass->GetIfTable();
-    if (iftable != nullptr) {
-      for (int32_t i = 0; i < klass->GetIfTableCount(); ++i) {
-        if (iftable->GetMethodArrayCount(i) > 0) {
-          auto* method_array = iftable->GetMethodArray(i);
-          CHECK(method_array != nullptr);
-          method_array->Fixup(RelocatedCopyOfFollowImages(method_array),
-                              pointer_size,
-                              native_visitor);
-        }
+    mirror::IfTable* iftable = klass->GetIfTable();
+    for (int32_t i = 0; i < klass->GetIfTableCount(); ++i) {
+      if (iftable->GetMethodArrayCount(i) > 0) {
+        auto* method_array = iftable->GetMethodArray(i);
+        CHECK(method_array != nullptr);
+        method_array->Fixup(RelocatedCopyOfFollowImages(method_array),
+                            pointer_size,
+                            native_visitor);
       }
     }
   } else if (object->GetClass() == mirror::Method::StaticClass() ||
@@ -1073,7 +1066,7 @@
   TimingLogger::ScopedTiming pt("patch image and oat", &timings);
 
   std::string output_directory =
-      output_image_filename.substr(0, output_image_filename.find_last_of("/"));
+      output_image_filename.substr(0, output_image_filename.find_last_of('/'));
   bool ret = PatchOat::Patch(input_image_location, base_delta, output_directory, isa, &timings);
 
   if (kIsDebugBuild) {
diff --git a/patchoat/patchoat.h b/patchoat/patchoat.h
index e7a3e91..a519631 100644
--- a/patchoat/patchoat.h
+++ b/patchoat/patchoat.h
@@ -230,10 +230,11 @@
 
   TimingLogger* timings_;
 
-  friend class FixupRootVisitor;
-  friend class RelocatedPointerVisitor;
-  friend class PatchOatArtFieldVisitor;
-  friend class PatchOatArtMethodVisitor;
+  class FixupRootVisitor;
+  class RelocatedPointerVisitor;
+  class PatchOatArtFieldVisitor;
+  class PatchOatArtMethodVisitor;
+
   DISALLOW_IMPLICIT_CONSTRUCTORS(PatchOat);
 };
 
diff --git a/profman/profile_assistant_test.cc b/profman/profile_assistant_test.cc
index cd0aa6f..776c31a 100644
--- a/profman/profile_assistant_test.cc
+++ b/profman/profile_assistant_test.cc
@@ -42,7 +42,7 @@
       ASSERT_TRUE(info->AddMethodIndex(dex_location2, dex_location_checksum2, i));
     }
     for (uint16_t i = 0; i < number_of_classes; i++) {
-      ASSERT_TRUE(info->AddClassIndex(dex_location1, dex_location_checksum1, i));
+      ASSERT_TRUE(info->AddClassIndex(dex_location1, dex_location_checksum1, dex::TypeIndex(i)));
     }
 
     ASSERT_TRUE(info->Save(GetFd(profile)));
diff --git a/runtime/Android.bp b/runtime/Android.bp
index 8f961af..08be5b2 100644
--- a/runtime/Android.bp
+++ b/runtime/Android.bp
@@ -45,6 +45,7 @@
         "base/timing_logger.cc",
         "base/unix_file/fd_file.cc",
         "base/unix_file/random_access_file_utils.cc",
+        "cha.cc",
         "check_jni.cc",
         "class_linker.cc",
         "class_table.cc",
@@ -120,8 +121,10 @@
         "linear_alloc.cc",
         "mem_map.cc",
         "memory_region.cc",
+        "method_handles.cc",
         "mirror/array.cc",
         "mirror/class.cc",
+        "mirror/class_ext.cc",
         "mirror/dex_cache.cc",
         "mirror/emulated_stack_frame.cc",
         "mirror/executable.cc",
@@ -404,7 +407,8 @@
 
 gensrcs {
     name: "art_operator_srcs",
-    cmd: "art/tools/generate-operator-out.py art/runtime $in > $out",
+    cmd: "$(location generate-operator-out.py) art/runtime $(in) > $(out)",
+    tool_files: ["generate-operator-out.py"],
     srcs: [
         "arch/instruction_set.h",
         "base/allocator.h",
@@ -511,6 +515,7 @@
         "base/transform_iterator_test.cc",
         "base/variant_map_test.cc",
         "base/unix_file/fd_file_test.cc",
+        "cha_test.cc",
         "class_linker_test.cc",
         "compiler_filter_test.cc",
         "dex_file_test.cc",
diff --git a/runtime/arch/arm/entrypoints_init_arm.cc b/runtime/arch/arm/entrypoints_init_arm.cc
index cb8edff..de72d3a 100644
--- a/runtime/arch/arm/entrypoints_init_arm.cc
+++ b/runtime/arch/arm/entrypoints_init_arm.cc
@@ -30,8 +30,7 @@
 namespace art {
 
 // Cast entrypoints.
-extern "C" size_t artIsAssignableFromCode(const mirror::Class* klass,
-                                          const mirror::Class* ref_class);
+extern "C" size_t artInstanceOfFromCode(mirror::Object* obj, mirror::Class* ref_class);
 
 // Read barrier entrypoints.
 // art_quick_read_barrier_mark_regX uses an non-standard calling
@@ -68,12 +67,27 @@
 // Long long arithmetics - REM_LONG[_2ADDR] and DIV_LONG[_2ADDR]
 extern "C" int64_t __aeabi_ldivmod(int64_t, int64_t);
 
+void UpdateReadBarrierEntrypoints(QuickEntryPoints* qpoints, bool is_marking) {
+  qpoints->pReadBarrierMarkReg00 = is_marking ? art_quick_read_barrier_mark_reg00 : nullptr;
+  qpoints->pReadBarrierMarkReg01 = is_marking ? art_quick_read_barrier_mark_reg01 : nullptr;
+  qpoints->pReadBarrierMarkReg02 = is_marking ? art_quick_read_barrier_mark_reg02 : nullptr;
+  qpoints->pReadBarrierMarkReg03 = is_marking ? art_quick_read_barrier_mark_reg03 : nullptr;
+  qpoints->pReadBarrierMarkReg04 = is_marking ? art_quick_read_barrier_mark_reg04 : nullptr;
+  qpoints->pReadBarrierMarkReg05 = is_marking ? art_quick_read_barrier_mark_reg05 : nullptr;
+  qpoints->pReadBarrierMarkReg06 = is_marking ? art_quick_read_barrier_mark_reg06 : nullptr;
+  qpoints->pReadBarrierMarkReg07 = is_marking ? art_quick_read_barrier_mark_reg07 : nullptr;
+  qpoints->pReadBarrierMarkReg08 = is_marking ? art_quick_read_barrier_mark_reg08 : nullptr;
+  qpoints->pReadBarrierMarkReg09 = is_marking ? art_quick_read_barrier_mark_reg09 : nullptr;
+  qpoints->pReadBarrierMarkReg10 = is_marking ? art_quick_read_barrier_mark_reg10 : nullptr;
+  qpoints->pReadBarrierMarkReg11 = is_marking ? art_quick_read_barrier_mark_reg11 : nullptr;
+}
+
 void InitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints) {
   DefaultInitEntryPoints(jpoints, qpoints);
 
   // Cast
-  qpoints->pInstanceofNonTrivial = artIsAssignableFromCode;
-  qpoints->pCheckCast = art_quick_check_cast;
+  qpoints->pInstanceofNonTrivial = artInstanceOfFromCode;
+  qpoints->pCheckInstanceOf = art_quick_check_instance_of;
 
   // Math
   qpoints->pIdivmod = __aeabi_idivmod;
@@ -124,18 +138,7 @@
 
   // Read barrier.
   qpoints->pReadBarrierJni = ReadBarrierJni;
-  qpoints->pReadBarrierMarkReg00 = art_quick_read_barrier_mark_reg00;
-  qpoints->pReadBarrierMarkReg01 = art_quick_read_barrier_mark_reg01;
-  qpoints->pReadBarrierMarkReg02 = art_quick_read_barrier_mark_reg02;
-  qpoints->pReadBarrierMarkReg03 = art_quick_read_barrier_mark_reg03;
-  qpoints->pReadBarrierMarkReg04 = art_quick_read_barrier_mark_reg04;
-  qpoints->pReadBarrierMarkReg05 = art_quick_read_barrier_mark_reg05;
-  qpoints->pReadBarrierMarkReg06 = art_quick_read_barrier_mark_reg06;
-  qpoints->pReadBarrierMarkReg07 = art_quick_read_barrier_mark_reg07;
-  qpoints->pReadBarrierMarkReg08 = art_quick_read_barrier_mark_reg08;
-  qpoints->pReadBarrierMarkReg09 = art_quick_read_barrier_mark_reg09;
-  qpoints->pReadBarrierMarkReg10 = art_quick_read_barrier_mark_reg10;
-  qpoints->pReadBarrierMarkReg11 = art_quick_read_barrier_mark_reg11;
+  UpdateReadBarrierEntrypoints(qpoints, /*is_marking*/ false);
   qpoints->pReadBarrierMarkReg12 = nullptr;  // Cannot use register 12 (IP) to pass arguments.
   qpoints->pReadBarrierMarkReg13 = nullptr;  // Cannot use register 13 (SP) to pass arguments.
   qpoints->pReadBarrierMarkReg14 = nullptr;  // Cannot use register 14 (LR) to pass arguments.
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index bf70c55..a71ab4b 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -764,11 +764,12 @@
 END art_quick_unlock_object_no_inline
 
     /*
-     * Entry from managed code that calls artIsAssignableFromCode and on failure calls
-     * artThrowClassCastException.
+     * Entry from managed code that calls artInstanceOfFromCode and on failure calls
+     * artThrowClassCastExceptionForObject.
      */
-    .extern artThrowClassCastException
-ENTRY art_quick_check_cast
+    .extern artInstanceOfFromCode
+    .extern artThrowClassCastExceptionForObject
+ENTRY art_quick_check_instance_of
     push {r0-r1, lr}                    @ save arguments, link register and pad
     .cfi_adjust_cfa_offset 12
     .cfi_rel_offset r0, 0
@@ -776,7 +777,7 @@
     .cfi_rel_offset lr, 8
     sub sp, #4
     .cfi_adjust_cfa_offset 4
-    bl artIsAssignableFromCode
+    bl artInstanceOfFromCode
     cbz    r0, .Lthrow_class_cast_exception
     add sp, #4
     .cfi_adjust_cfa_offset -4
@@ -792,9 +793,9 @@
     .cfi_restore lr
     SETUP_SAVE_ALL_CALLEE_SAVES_FRAME r2       @ save all registers as basis for long jump context
     mov r2, r9                      @ pass Thread::Current
-    bl  artThrowClassCastException  @ (Class*, Class*, Thread*)
+    bl  artThrowClassCastExceptionForObject  @ (Object*, Class*, Thread*)
     bkpt
-END art_quick_check_cast
+END art_quick_check_instance_of
 
 // Restore rReg's value from [sp, #offset] if rReg is not the same as rExclude.
 .macro POP_REG_NE rReg, offset, rExclude
@@ -1108,62 +1109,7 @@
      */
 
 ENTRY art_quick_resolve_string
-    push   {r10-r12, lr}
-    .cfi_adjust_cfa_offset 16
-    .cfi_rel_offset r10, 0
-    .cfi_rel_offset r11, 4
-    .cfi_rel_offset ip, 8
-    .cfi_rel_offset lr, 12
-    ldr    r10, [sp, #16]                                        @ load referrer
-    ldr    r10, [r10, #ART_METHOD_DECLARING_CLASS_OFFSET]        @ load declaring class
-    ldr    r10, [r10, #DECLARING_CLASS_DEX_CACHE_STRINGS_OFFSET] @ load string dex cache
-    ubfx   r11, r0, #0, #STRING_DEX_CACHE_HASH_BITS
-    add    r10, r10, r11, LSL #STRING_DEX_CACHE_ELEMENT_SIZE_SHIFT
-    ldrd   r10, r11, [r10]                               @ load index into r11 and pointer into r10
-    cmp    r0, r11
-    bne    .Lart_quick_resolve_string_slow_path
-#ifdef USE_READ_BARRIER
-    ldr    r0, [rSELF, #THREAD_IS_GC_MARKING_OFFSET]
-    cbnz   r0, .Lart_quick_resolve_string_marking
-.Lart_quick_resolve_string_no_rb:
-#endif
-    mov    r0, r10
-    pop    {r10-r12, pc}
-
-#ifdef USE_READ_BARRIER
-// GC is marking case, need to check the mark bit.
-.Lart_quick_resolve_string_marking:
-    ldr    r0, [r10, MIRROR_OBJECT_LOCK_WORD_OFFSET]
-    lsrs   r0, #(LOCK_WORD_MARK_BIT_SHIFT + 1)
-    bcs    .Lart_quick_resolve_string_no_rb
-    mov    r0, r10
-    .cfi_remember_state
-    pop    {r10-r12, lr}
-    .cfi_adjust_cfa_offset -16
-    .cfi_restore r10
-    .cfi_restore r11
-    .cfi_restore r12
-    .cfi_restore lr
-    // Note: art_quick_read_barrier_mark_reg00 clobbers IP but the .Lslow_rb_* does not.
-    b      .Lslow_rb_art_quick_read_barrier_mark_reg00  @ Get the marked string back.
-    .cfi_restore_state
-#endif
-
-// Slow path case, the index did not match
-.Lart_quick_resolve_string_slow_path:
-    push {r0-r9}                  @ 10 words of callee saves and args; {r10-r12, lr} already saved.
-    .cfi_adjust_cfa_offset 40
-    .cfi_rel_offset r0, 0
-    .cfi_rel_offset r1, 4
-    .cfi_rel_offset r2, 8
-    .cfi_rel_offset r3, 12
-    .cfi_rel_offset r4, 16
-    .cfi_rel_offset r5, 20
-    .cfi_rel_offset r6, 24
-    .cfi_rel_offset r7, 28
-    .cfi_rel_offset r8, 32
-    .cfi_rel_offset r9, 36
-    SETUP_SAVE_EVERYTHING_FRAME_CORE_REGS_SAVED r1   @ save callee saves in case of GC
+    SETUP_SAVE_EVERYTHING_FRAME r1                   @ save everything in case of GC
     mov    r1, r9                                    @ pass Thread::Current
     bl     artResolveStringFromCode                  @ (uint32_t type_idx, Thread*)
     cbz    r0, 1f                                    @ If result is null, deliver the OOME.
@@ -1768,12 +1714,15 @@
     .cfi_rel_offset r10, 4
     .cfi_rel_offset r11, 8
     .cfi_rel_offset lr, 12
+#if (STRING_COMPRESSION_FEATURE)
+    ldr   r4, [r0, #MIRROR_STRING_COUNT_OFFSET]
+#else
     ldr   r3, [r0, #MIRROR_STRING_COUNT_OFFSET]
+#endif
     add   r0, #MIRROR_STRING_VALUE_OFFSET
 #if (STRING_COMPRESSION_FEATURE)
     /* r4 count (with flag) and r3 holds actual length */
-    mov   r4, r3
-    bic   r3, #2147483648
+    lsr   r3, r4, #1
 #endif
     /* Clamp start to [0..count] */
     cmp   r2, #0
@@ -1788,8 +1737,8 @@
 
     /* Build pointer to start of data to compare and pre-bias */
 #if (STRING_COMPRESSION_FEATURE)
-    cmp   r4, #0
-    blt   .Lstring_indexof_compressed
+    lsrs  r4, r4, #1
+    bcc   .Lstring_indexof_compressed
 #endif
     add   r0, r0, r2, lsl #1
     sub   r0, #2
@@ -1999,11 +1948,17 @@
     // Check lock word for mark bit, if marked return. Use IP for scratch since it is blocked.
     ldr ip, [\reg, MIRROR_OBJECT_LOCK_WORD_OFFSET]
     tst ip, #LOCK_WORD_MARK_BIT_MASK_SHIFTED
-    beq .Lslow_rb_\name
+    beq .Lnot_marked_rb_\name
     // Already marked, return right away.
 .Lret_rb_\name:
     bx lr
 
+.Lnot_marked_rb_\name:
+    // Test that both the forwarding state bits are 1.
+    mvn ip, ip
+    tst ip, #(LOCK_WORD_STATE_FORWARDING_ADDRESS << LOCK_WORD_STATE_SHIFT)
+    beq .Lret_forwarding_address\name
+
 .Lslow_rb_\name:
     // Save IP: the kSaveEverything entrypoint art_quick_resolve_string makes a tail call here.
     push  {r0-r4, r9, ip, lr}           @ save return address, core caller-save registers and ip
@@ -2064,6 +2019,12 @@
     .cfi_restore ip
     .cfi_restore lr
     bx lr
+.Lret_forwarding_address\name:
+    // Shift left by the forwarding address shift. This clears out the state bits since they are
+    // in the top 2 bits of the lock word.
+    mvn ip, ip
+    lsl \reg, ip, #LOCK_WORD_STATE_FORWARDING_ADDRESS_SHIFT
+    bx lr
 END \name
 .endm
 
diff --git a/runtime/arch/arm64/entrypoints_init_arm64.cc b/runtime/arch/arm64/entrypoints_init_arm64.cc
index c2078f0..6add107 100644
--- a/runtime/arch/arm64/entrypoints_init_arm64.cc
+++ b/runtime/arch/arm64/entrypoints_init_arm64.cc
@@ -30,8 +30,7 @@
 namespace art {
 
 // Cast entrypoints.
-extern "C" size_t artIsAssignableFromCode(const mirror::Class* klass,
-                                          const mirror::Class* ref_class);
+extern "C" size_t artInstanceOfFromCode(mirror::Object* obj, mirror::Class* ref_class);
 
 // Read barrier entrypoints.
 // art_quick_read_barrier_mark_regX uses an non-standard calling
@@ -71,12 +70,53 @@
 extern "C" mirror::Object* art_quick_read_barrier_mark_reg28(mirror::Object*);
 extern "C" mirror::Object* art_quick_read_barrier_mark_reg29(mirror::Object*);
 
+void UpdateReadBarrierEntrypoints(QuickEntryPoints* qpoints, bool is_marking) {
+  // ARM64 is the architecture with the largest number of core
+  // registers (32) that supports the read barrier configuration.
+  // Because registers 30 (LR) and 31 (SP/XZR) cannot be used to pass
+  // arguments, only define ReadBarrierMarkRegX entrypoints for the
+  // first 30 registers.  This limitation is not a problem on other
+  // supported architectures (ARM, x86 and x86-64) either, as they
+  // have less core registers (resp. 16, 8 and 16).  (We may have to
+  // revise that design choice if read barrier support is added for
+  // MIPS and/or MIPS64.)
+  qpoints->pReadBarrierMarkReg00 = is_marking ? art_quick_read_barrier_mark_reg00 : nullptr;
+  qpoints->pReadBarrierMarkReg01 = is_marking ? art_quick_read_barrier_mark_reg01 : nullptr;
+  qpoints->pReadBarrierMarkReg02 = is_marking ? art_quick_read_barrier_mark_reg02 : nullptr;
+  qpoints->pReadBarrierMarkReg03 = is_marking ? art_quick_read_barrier_mark_reg03 : nullptr;
+  qpoints->pReadBarrierMarkReg04 = is_marking ? art_quick_read_barrier_mark_reg04 : nullptr;
+  qpoints->pReadBarrierMarkReg05 = is_marking ? art_quick_read_barrier_mark_reg05 : nullptr;
+  qpoints->pReadBarrierMarkReg06 = is_marking ? art_quick_read_barrier_mark_reg06 : nullptr;
+  qpoints->pReadBarrierMarkReg07 = is_marking ? art_quick_read_barrier_mark_reg07 : nullptr;
+  qpoints->pReadBarrierMarkReg08 = is_marking ? art_quick_read_barrier_mark_reg08 : nullptr;
+  qpoints->pReadBarrierMarkReg09 = is_marking ? art_quick_read_barrier_mark_reg09 : nullptr;
+  qpoints->pReadBarrierMarkReg10 = is_marking ? art_quick_read_barrier_mark_reg10 : nullptr;
+  qpoints->pReadBarrierMarkReg11 = is_marking ? art_quick_read_barrier_mark_reg11 : nullptr;
+  qpoints->pReadBarrierMarkReg12 = is_marking ? art_quick_read_barrier_mark_reg12 : nullptr;
+  qpoints->pReadBarrierMarkReg13 = is_marking ? art_quick_read_barrier_mark_reg13 : nullptr;
+  qpoints->pReadBarrierMarkReg14 = is_marking ? art_quick_read_barrier_mark_reg14 : nullptr;
+  qpoints->pReadBarrierMarkReg15 = is_marking ? art_quick_read_barrier_mark_reg15 : nullptr;
+  qpoints->pReadBarrierMarkReg17 = is_marking ? art_quick_read_barrier_mark_reg17 : nullptr;
+  qpoints->pReadBarrierMarkReg18 = is_marking ? art_quick_read_barrier_mark_reg18 : nullptr;
+  qpoints->pReadBarrierMarkReg19 = is_marking ? art_quick_read_barrier_mark_reg19 : nullptr;
+  qpoints->pReadBarrierMarkReg20 = is_marking ? art_quick_read_barrier_mark_reg20 : nullptr;
+  qpoints->pReadBarrierMarkReg21 = is_marking ? art_quick_read_barrier_mark_reg21 : nullptr;
+  qpoints->pReadBarrierMarkReg22 = is_marking ? art_quick_read_barrier_mark_reg22 : nullptr;
+  qpoints->pReadBarrierMarkReg23 = is_marking ? art_quick_read_barrier_mark_reg23 : nullptr;
+  qpoints->pReadBarrierMarkReg24 = is_marking ? art_quick_read_barrier_mark_reg24 : nullptr;
+  qpoints->pReadBarrierMarkReg25 = is_marking ? art_quick_read_barrier_mark_reg25 : nullptr;
+  qpoints->pReadBarrierMarkReg26 = is_marking ? art_quick_read_barrier_mark_reg26 : nullptr;
+  qpoints->pReadBarrierMarkReg27 = is_marking ? art_quick_read_barrier_mark_reg27 : nullptr;
+  qpoints->pReadBarrierMarkReg28 = is_marking ? art_quick_read_barrier_mark_reg28 : nullptr;
+  qpoints->pReadBarrierMarkReg29 = is_marking ? art_quick_read_barrier_mark_reg29 : nullptr;
+}
+
 void InitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints) {
   DefaultInitEntryPoints(jpoints, qpoints);
 
   // Cast
-  qpoints->pInstanceofNonTrivial = artIsAssignableFromCode;
-  qpoints->pCheckCast = art_quick_check_cast;
+  qpoints->pInstanceofNonTrivial = artInstanceOfFromCode;
+  qpoints->pCheckInstanceOf = art_quick_check_instance_of;
 
   // Math
   // TODO null entrypoints not needed for ARM64 - generate inline.
@@ -127,45 +167,8 @@
 
   // Read barrier.
   qpoints->pReadBarrierJni = ReadBarrierJni;
-  // ARM64 is the architecture with the largest number of core
-  // registers (32) that supports the read barrier configuration.
-  // Because registers 30 (LR) and 31 (SP/XZR) cannot be used to pass
-  // arguments, only define ReadBarrierMarkRegX entrypoints for the
-  // first 30 registers.  This limitation is not a problem on other
-  // supported architectures (ARM, x86 and x86-64) either, as they
-  // have less core registers (resp. 16, 8 and 16).  (We may have to
-  // revise that design choice if read barrier support is added for
-  // MIPS and/or MIPS64.)
-  qpoints->pReadBarrierMarkReg00 = art_quick_read_barrier_mark_reg00;
-  qpoints->pReadBarrierMarkReg01 = art_quick_read_barrier_mark_reg01;
-  qpoints->pReadBarrierMarkReg02 = art_quick_read_barrier_mark_reg02;
-  qpoints->pReadBarrierMarkReg03 = art_quick_read_barrier_mark_reg03;
-  qpoints->pReadBarrierMarkReg04 = art_quick_read_barrier_mark_reg04;
-  qpoints->pReadBarrierMarkReg05 = art_quick_read_barrier_mark_reg05;
-  qpoints->pReadBarrierMarkReg06 = art_quick_read_barrier_mark_reg06;
-  qpoints->pReadBarrierMarkReg07 = art_quick_read_barrier_mark_reg07;
-  qpoints->pReadBarrierMarkReg08 = art_quick_read_barrier_mark_reg08;
-  qpoints->pReadBarrierMarkReg09 = art_quick_read_barrier_mark_reg09;
-  qpoints->pReadBarrierMarkReg10 = art_quick_read_barrier_mark_reg10;
-  qpoints->pReadBarrierMarkReg11 = art_quick_read_barrier_mark_reg11;
-  qpoints->pReadBarrierMarkReg12 = art_quick_read_barrier_mark_reg12;
-  qpoints->pReadBarrierMarkReg13 = art_quick_read_barrier_mark_reg13;
-  qpoints->pReadBarrierMarkReg14 = art_quick_read_barrier_mark_reg14;
-  qpoints->pReadBarrierMarkReg15 = art_quick_read_barrier_mark_reg15;
   qpoints->pReadBarrierMarkReg16 = nullptr;  // IP0 is used as a temp by the asm stub.
-  qpoints->pReadBarrierMarkReg17 = art_quick_read_barrier_mark_reg17;
-  qpoints->pReadBarrierMarkReg18 = art_quick_read_barrier_mark_reg18;
-  qpoints->pReadBarrierMarkReg19 = art_quick_read_barrier_mark_reg19;
-  qpoints->pReadBarrierMarkReg20 = art_quick_read_barrier_mark_reg20;
-  qpoints->pReadBarrierMarkReg21 = art_quick_read_barrier_mark_reg21;
-  qpoints->pReadBarrierMarkReg22 = art_quick_read_barrier_mark_reg22;
-  qpoints->pReadBarrierMarkReg23 = art_quick_read_barrier_mark_reg23;
-  qpoints->pReadBarrierMarkReg24 = art_quick_read_barrier_mark_reg24;
-  qpoints->pReadBarrierMarkReg25 = art_quick_read_barrier_mark_reg25;
-  qpoints->pReadBarrierMarkReg26 = art_quick_read_barrier_mark_reg26;
-  qpoints->pReadBarrierMarkReg27 = art_quick_read_barrier_mark_reg27;
-  qpoints->pReadBarrierMarkReg28 = art_quick_read_barrier_mark_reg28;
-  qpoints->pReadBarrierMarkReg29 = art_quick_read_barrier_mark_reg29;
+  UpdateReadBarrierEntrypoints(qpoints, /*is_marking*/ false);
   qpoints->pReadBarrierSlow = artReadBarrierSlow;
   qpoints->pReadBarrierForRootSlow = artReadBarrierForRootSlow;
 };
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index 483cee3..b88515f 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -1294,18 +1294,19 @@
 END art_quick_unlock_object_no_inline
 
     /*
-     * Entry from managed code that calls artIsAssignableFromCode and on failure calls
-     * artThrowClassCastException.
+     * Entry from managed code that calls artInstanceOfFromCode and on failure calls
+     * artThrowClassCastExceptionForObject.
      */
-    .extern artThrowClassCastException
-ENTRY art_quick_check_cast
+    .extern artInstanceOfFromCode
+    .extern artThrowClassCastExceptionForObject
+ENTRY art_quick_check_instance_of
     // Store arguments and link register
     // Stack needs to be 16B aligned on calls.
     SAVE_TWO_REGS_INCREASE_FRAME x0, x1, 32
     SAVE_REG xLR, 24
 
     // Call runtime code
-    bl artIsAssignableFromCode
+    bl artInstanceOfFromCode
 
     // Check for exception
     cbz x0, .Lthrow_class_cast_exception
@@ -1316,6 +1317,7 @@
     RESTORE_TWO_REGS_DECREASE_FRAME x0, x1, 32
     ret
     .cfi_restore_state                // Reset unwind info so following code unwinds.
+    .cfi_def_cfa_offset 32            // workaround for clang bug: 31975598
 
 .Lthrow_class_cast_exception:
     // Restore
@@ -1324,9 +1326,9 @@
 
     SETUP_SAVE_ALL_CALLEE_SAVES_FRAME // save all registers as basis for long jump context
     mov x2, xSELF                     // pass Thread::Current
-    bl artThrowClassCastException     // (Class*, Class*, Thread*)
+    bl artThrowClassCastExceptionForObject     // (Object*, Class*, Thread*)
     brk 0                             // We should not return here...
-END art_quick_check_cast
+END art_quick_check_instance_of
 
 // Restore xReg's value from [sp, #offset] if xReg is not the same as xExclude.
 .macro POP_REG_NE xReg, offset, xExclude
@@ -1483,6 +1485,7 @@
     strb w3, [x3, x0]
     ret
     .cfi_restore_state            // Reset unwind info so following code unwinds.
+    .cfi_def_cfa_offset 32        // workaround for clang bug: 31975598
 .Lthrow_array_store_exception:
     RESTORE_TWO_REGS x2, xLR, 16
     RESTORE_TWO_REGS_DECREASE_FRAME x0, x1, 32
@@ -1650,44 +1653,7 @@
      */
 
 ENTRY art_quick_resolve_string
-    SAVE_TWO_REGS_INCREASE_FRAME x29, xLR, 2 * __SIZEOF_POINTER__
-    ldr   x29, [sp, #(2 * __SIZEOF_POINTER__)]                   // load referrer
-    ldr   w29, [x29, #ART_METHOD_DECLARING_CLASS_OFFSET]         // load declaring class
-    ldr   x29, [x29, #DECLARING_CLASS_DEX_CACHE_STRINGS_OFFSET]  // load string dex cache
-    ubfx  lr, x0, #0, #STRING_DEX_CACHE_HASH_BITS                // get masked string index into LR
-    ldr   x29, [x29, lr, lsl #STRING_DEX_CACHE_ELEMENT_SIZE_SHIFT]  // load dex cache pair into x29
-    cmp   x0, x29, lsr #32                                       // compare against upper 32 bits
-    bne   .Lart_quick_resolve_string_slow_path
-    ubfx  x0, x29, #0, #32                                       // extract lower 32 bits into x0
-#ifdef USE_READ_BARRIER
-    // Most common case: GC is not marking.
-    ldr    w29, [xSELF, #THREAD_IS_GC_MARKING_OFFSET]
-    cbnz   x29, .Lart_quick_resolve_string_marking
-.Lart_quick_resolve_string_no_rb:
-#endif
-    .cfi_remember_state
-    RESTORE_TWO_REGS_DECREASE_FRAME x29, xLR, 2 * __SIZEOF_POINTER__
-    ret
-    .cfi_restore_state
-    .cfi_def_cfa_offset 16                          // workaround for clang bug: 31975598
-
-#ifdef USE_READ_BARRIER
-// GC is marking case, need to check the mark bit.
-.Lart_quick_resolve_string_marking:
-    ldr   x29, [x0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
-    tbnz  x29, #LOCK_WORD_MARK_BIT_SHIFT, .Lart_quick_resolve_string_no_rb
-    .cfi_remember_state
-    RESTORE_TWO_REGS_DECREASE_FRAME x29, xLR, 2 * __SIZEOF_POINTER__
-    // Note: art_quick_read_barrier_mark_reg00 clobbers IP0 but the .Lslow_rb_* does not.
-    b     .Lslow_rb_art_quick_read_barrier_mark_reg00  // Get the marked string back.
-    .cfi_restore_state
-    .cfi_def_cfa_offset 16                          // workaround for clang bug: 31975598
-#endif
-
-// Slow path case, the index did not match.
-.Lart_quick_resolve_string_slow_path:
-    INCREASE_FRAME (FRAME_SIZE_SAVE_EVERYTHING - 2 * __SIZEOF_POINTER__)
-    SETUP_SAVE_EVERYTHING_FRAME_DECREMENTED_SP_SKIP_X29_LR  // save callee saves in case of GC
+    SETUP_SAVE_EVERYTHING_FRAME                     // save everything for stack crawl
     mov   x1, xSELF                                 // pass Thread::Current
     bl    artResolveStringFromCode                  // (int32_t string_idx, Thread* self)
     cbz   w0, 1f                                    // If result is null, deliver the OOME.
@@ -2402,12 +2368,15 @@
      *    w2:   Starting offset in string data
      */
 ENTRY art_quick_indexof
+#if (STRING_COMPRESSION_FEATURE)
+    ldr   w4, [x0, #MIRROR_STRING_COUNT_OFFSET]
+#else
     ldr   w3, [x0, #MIRROR_STRING_COUNT_OFFSET]
+#endif
     add   x0, x0, #MIRROR_STRING_VALUE_OFFSET
 #if (STRING_COMPRESSION_FEATURE)
     /* w4 holds count (with flag) and w3 holds actual length */
-    mov   w4, w3
-    and   w3, w3, #2147483647
+    lsr   w3, w4, #1
 #endif
     /* Clamp start to [0..count] */
     cmp   w2, #0
@@ -2419,7 +2388,7 @@
     mov   x5, x0
 
 #if (STRING_COMPRESSION_FEATURE)
-    tbnz  w4, #31, .Lstring_indexof_compressed
+    tbz   w4, #0, .Lstring_indexof_compressed
 #endif
     /* Build pointer to start of data to compare and pre-bias */
     add   x0, x0, x2, lsl #1
@@ -2539,10 +2508,17 @@
      */
     // Use wIP0 as temp and check the mark bit of the reference. wIP0 is not used by the compiler.
     ldr   wIP0, [\xreg, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
-    tbz   wIP0, #LOCK_WORD_MARK_BIT_SHIFT, .Lslow_rb_\name
+    tbz   wIP0, #LOCK_WORD_MARK_BIT_SHIFT, .Lnot_marked_rb_\name
 .Lret_rb_\name:
     ret
+.Lnot_marked_rb_\name:
+    // Check if the top two bits are one, if this is the case it is a forwarding address.
+    mvn wIP0, wIP0
+    cmp wzr, wIP0, lsr #30
+    beq .Lret_forwarding_address\name
 .Lslow_rb_\name:
+    // We must not clobber IP0 since art_quick_resolve_string makes a tail call here and relies on
+    // IP0 being restored.
     // Save all potentially live caller-save core registers.
     SAVE_TWO_REGS_INCREASE_FRAME x0, x1, 368
     SAVE_TWO_REGS  x2,  x3, 16
@@ -2608,6 +2584,12 @@
     RESTORE_REG xLR, 360
     DECREASE_FRAME 368
     ret
+.Lret_forwarding_address\name:
+    mvn wIP0, wIP0
+    // Shift left by the forwarding address shift. This clears out the state bits since they are
+    // in the top 2 bits of the lock word.
+    lsl \wreg, wIP0, #LOCK_WORD_STATE_FORWARDING_ADDRESS_SHIFT
+    ret
 END \name
 .endm
 
diff --git a/runtime/arch/mips/entrypoints_init_mips.cc b/runtime/arch/mips/entrypoints_init_mips.cc
index e10d4e6..5c56923 100644
--- a/runtime/arch/mips/entrypoints_init_mips.cc
+++ b/runtime/arch/mips/entrypoints_init_mips.cc
@@ -30,8 +30,7 @@
 namespace art {
 
 // Cast entrypoints.
-extern "C" size_t artIsAssignableFromCode(const mirror::Class* klass,
-                                          const mirror::Class* ref_class);
+extern "C" size_t artInstanceOfFromCode(mirror::Object* obj, mirror::Class* ref_class);
 
 // Math entrypoints.
 extern int32_t CmpgDouble(double a, double b);
@@ -60,6 +59,10 @@
 extern "C" int64_t __divdi3(int64_t, int64_t);
 extern "C" int64_t __moddi3(int64_t, int64_t);
 
+// No read barrier entrypoints for marking registers.
+void UpdateReadBarrierEntrypoints(QuickEntryPoints* qpoints ATTRIBUTE_UNUSED,
+                                  bool is_marking ATTRIBUTE_UNUSED) {}
+
 void InitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints) {
   // Note: MIPS has asserts checking for the type of entrypoint. Don't move it
   //       to InitDefaultEntryPoints().
@@ -68,13 +71,13 @@
   jpoints->pDlsymLookup = art_jni_dlsym_lookup_stub;
 
   // Alloc
-  ResetQuickAllocEntryPoints(qpoints);
+  ResetQuickAllocEntryPoints(qpoints, /*is_marking*/ false);
 
   // Cast
-  qpoints->pInstanceofNonTrivial = artIsAssignableFromCode;
+  qpoints->pInstanceofNonTrivial = artInstanceOfFromCode;
   static_assert(IsDirectEntrypoint(kQuickInstanceofNonTrivial), "Direct C stub not marked direct.");
-  qpoints->pCheckCast = art_quick_check_cast;
-  static_assert(!IsDirectEntrypoint(kQuickCheckCast), "Non-direct C stub marked direct.");
+  qpoints->pCheckInstanceOf = art_quick_check_instance_of;
+  static_assert(!IsDirectEntrypoint(kQuickCheckInstanceOf), "Non-direct C stub marked direct.");
 
   // DexCache
   qpoints->pInitializeStaticStorage = art_quick_initialize_static_storage;
@@ -153,17 +156,24 @@
   // JNI
   qpoints->pJniMethodStart = JniMethodStart;
   static_assert(!IsDirectEntrypoint(kQuickJniMethodStart), "Non-direct C stub marked direct.");
+  qpoints->pJniMethodFastStart = JniMethodFastStart;
+  static_assert(!IsDirectEntrypoint(kQuickJniMethodFastStart), "Non-direct C stub marked direct.");
   qpoints->pJniMethodStartSynchronized = JniMethodStartSynchronized;
   static_assert(!IsDirectEntrypoint(kQuickJniMethodStartSynchronized),
                 "Non-direct C stub marked direct.");
   qpoints->pJniMethodEnd = JniMethodEnd;
   static_assert(!IsDirectEntrypoint(kQuickJniMethodEnd), "Non-direct C stub marked direct.");
+  qpoints->pJniMethodFastEnd = JniMethodFastEnd;
+  static_assert(!IsDirectEntrypoint(kQuickJniMethodFastEnd), "Non-direct C stub marked direct.");
   qpoints->pJniMethodEndSynchronized = JniMethodEndSynchronized;
   static_assert(!IsDirectEntrypoint(kQuickJniMethodEndSynchronized),
                 "Non-direct C stub marked direct.");
   qpoints->pJniMethodEndWithReference = JniMethodEndWithReference;
   static_assert(!IsDirectEntrypoint(kQuickJniMethodEndWithReference),
                 "Non-direct C stub marked direct.");
+  qpoints->pJniMethodFastEndWithReference = JniMethodFastEndWithReference;
+  static_assert(!IsDirectEntrypoint(kQuickJniMethodFastEndWithReference),
+                "Non-direct C stub marked direct.");
   qpoints->pJniMethodEndWithReferenceSynchronized = JniMethodEndWithReferenceSynchronized;
   static_assert(!IsDirectEntrypoint(kQuickJniMethodEndWithReferenceSynchronized),
                 "Non-direct C stub marked direct.");
diff --git a/runtime/arch/mips/quick_entrypoints_mips.S b/runtime/arch/mips/quick_entrypoints_mips.S
index c3c1882..34e34b4 100644
--- a/runtime/arch/mips/quick_entrypoints_mips.S
+++ b/runtime/arch/mips/quick_entrypoints_mips.S
@@ -1171,10 +1171,11 @@
 END art_quick_unlock_object_no_inline
 
     /*
-     * Entry from managed code that calls artCheckCastFromCode and delivers exception on failure.
+     * Entry from managed code that calls artInstanceOfFromCode and delivers exception on failure.
      */
-    .extern artThrowClassCastException
-ENTRY art_quick_check_cast
+    .extern artInstanceOfFromCode
+    .extern artThrowClassCastExceptionForObject
+ENTRY art_quick_check_instance_of
     addiu  $sp, $sp, -32
     .cfi_adjust_cfa_offset 32
     sw     $gp, 16($sp)
@@ -1183,7 +1184,7 @@
     sw     $t9, 8($sp)
     sw     $a1, 4($sp)
     sw     $a0, 0($sp)
-    la     $t9, artIsAssignableFromCode
+    la     $t9, artInstanceOfFromCode
     jalr   $t9
     addiu  $sp, $sp, -16             # reserve argument slots on the stack
     addiu  $sp, $sp, 16
@@ -1200,10 +1201,10 @@
     addiu  $sp, $sp, 32
     .cfi_adjust_cfa_offset -32
     SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
-    la   $t9, artThrowClassCastException
-    jalr $zero, $t9                 # artThrowClassCastException (Class*, Class*, Thread*)
+    la   $t9, artThrowClassCastExceptionForObject
+    jalr $zero, $t9                 # artThrowClassCastException (Object*, Class*, Thread*)
     move $a2, rSELF                 # pass Thread::Current
-END art_quick_check_cast
+END art_quick_check_instance_of
 
     /*
      * Restore rReg's value from offset($sp) if rReg is not the same as rExclude.
diff --git a/runtime/arch/mips64/entrypoints_init_mips64.cc b/runtime/arch/mips64/entrypoints_init_mips64.cc
index a037905..bc17d47 100644
--- a/runtime/arch/mips64/entrypoints_init_mips64.cc
+++ b/runtime/arch/mips64/entrypoints_init_mips64.cc
@@ -30,8 +30,8 @@
 namespace art {
 
 // Cast entrypoints.
-extern "C" size_t artIsAssignableFromCode(const mirror::Class* klass,
-                                          const mirror::Class* ref_class);
+extern "C" size_t artInstanceOfFromCode(mirror::Object* obj, mirror::Class* ref_class);
+
 // Math entrypoints.
 extern int32_t CmpgDouble(double a, double b);
 extern int32_t CmplDouble(double a, double b);
@@ -59,12 +59,16 @@
 extern "C" int64_t __divdi3(int64_t, int64_t);
 extern "C" int64_t __moddi3(int64_t, int64_t);
 
+// No read barrier entrypoints for marking registers.
+void UpdateReadBarrierEntrypoints(QuickEntryPoints* qpoints ATTRIBUTE_UNUSED,
+                                  bool is_marking ATTRIBUTE_UNUSED) {}
+
 void InitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints) {
   DefaultInitEntryPoints(jpoints, qpoints);
 
   // Cast
-  qpoints->pInstanceofNonTrivial = artIsAssignableFromCode;
-  qpoints->pCheckCast = art_quick_check_cast;
+  qpoints->pInstanceofNonTrivial = artInstanceOfFromCode;
+  qpoints->pCheckInstanceOf = art_quick_check_instance_of;
 
   // Math
   qpoints->pCmpgDouble = CmpgDouble;
diff --git a/runtime/arch/mips64/quick_entrypoints_mips64.S b/runtime/arch/mips64/quick_entrypoints_mips64.S
index cb2d1c8..0861d2d 100644
--- a/runtime/arch/mips64/quick_entrypoints_mips64.S
+++ b/runtime/arch/mips64/quick_entrypoints_mips64.S
@@ -1256,10 +1256,11 @@
 END art_quick_unlock_object_no_inline
 
     /*
-     * Entry from managed code that calls artCheckCastFromCode and delivers exception on failure.
+     * Entry from managed code that calls artInstanceOfFromCode and delivers exception on failure.
      */
-    .extern artThrowClassCastException
-ENTRY art_quick_check_cast
+    .extern artInstanceOfFromCode
+    .extern artThrowClassCastExceptionForObject
+ENTRY art_quick_check_instance_of
     daddiu $sp, $sp, -32
     .cfi_adjust_cfa_offset 32
     sd     $ra, 24($sp)
@@ -1267,7 +1268,7 @@
     sd     $t9, 16($sp)
     sd     $a1, 8($sp)
     sd     $a0, 0($sp)
-    jal    artIsAssignableFromCode
+    jal    artInstanceOfFromCode
     .cpreturn                       # Restore gp from t8 in branch delay slot.
                                     # t8 may be clobbered in artIsAssignableFromCode.
     beq    $v0, $zero, .Lthrow_class_cast_exception
@@ -1283,10 +1284,10 @@
     .cfi_adjust_cfa_offset -32
     SETUP_GP
     SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
-    dla  $t9, artThrowClassCastException
-    jalr $zero, $t9                 # artThrowClassCastException (Class*, Class*, Thread*)
+    dla  $t9, artThrowClassCastExceptionForObject
+    jalr $zero, $t9                 # artThrowClassCastException (Object*, Class*, Thread*)
     move $a2, rSELF                 # pass Thread::Current
-END art_quick_check_cast
+END art_quick_check_instance_of
 
 
     /*
diff --git a/runtime/arch/quick_alloc_entrypoints.S b/runtime/arch/quick_alloc_entrypoints.S
index fa86bf4..db2fdca 100644
--- a/runtime/arch/quick_alloc_entrypoints.S
+++ b/runtime/arch/quick_alloc_entrypoints.S
@@ -107,7 +107,28 @@
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_tlab, RegionTLAB)
 .endm
 
+.macro GENERATE_ALLOC_ENTRYPOINTS_FOR_TLAB_ALLOCATOR
+// This is to be separately defined for each architecture to allow a hand-written assembly fast path.
+// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_tlab, TLAB)
+.endm
+
 .macro GENERATE_ALLOC_ENTRYPOINTS_FOR_NON_REGION_TLAB_ALLOCATORS
+GENERATE_ALLOC_ENTRYPOINTS_FOR_NON_TLAB_ALLOCATORS
+GENERATE_ALLOC_ENTRYPOINTS_FOR_TLAB_ALLOCATOR
+.endm
+
+.macro GENERATE_ALLOC_ENTRYPOINTS_FOR_NON_TLAB_ALLOCATORS
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_dlmalloc, DlMalloc)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_dlmalloc, DlMalloc)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_dlmalloc, DlMalloc)
@@ -187,20 +208,6 @@
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_bump_pointer_instrumented, BumpPointerInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_bump_pointer_instrumented, BumpPointerInstrumented)
 
-// This is to be separately defined for each architecture to allow a hand-written assembly fast path.
-// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_tlab, TLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_tlab, TLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_tlab, TLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_tlab, TLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_tlab, TLAB)
-GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_tlab, TLAB)
-GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_tlab, TLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_tlab, TLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_tlab, TLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_tlab, TLAB)
-
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab_instrumented, TLABInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab_instrumented, TLABInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_tlab_instrumented, TLABInstrumented)
diff --git a/runtime/arch/stub_test.cc b/runtime/arch/stub_test.cc
index 4638c3f..6665897 100644
--- a/runtime/arch/stub_test.cc
+++ b/runtime/arch/stub_test.cc
@@ -23,6 +23,7 @@
 #include "common_runtime_test.h"
 #include "entrypoints/quick/quick_entrypoints_enum.h"
 #include "imt_conflict_table.h"
+#include "jni_internal.h"
 #include "linear_alloc.h"
 #include "mirror/class-inl.h"
 #include "mirror/string-inl.h"
@@ -805,7 +806,7 @@
 
 #if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__mips__) || \
     (defined(__x86_64__) && !defined(__APPLE__))
-extern "C" void art_quick_check_cast(void);
+extern "C" void art_quick_check_instance_of(void);
 #endif
 
 TEST_F(StubTest, CheckCast) {
@@ -813,40 +814,90 @@
     (defined(__x86_64__) && !defined(__APPLE__))
   Thread* self = Thread::Current();
 
-  const uintptr_t art_quick_check_cast = StubTest::GetEntrypoint(self, kQuickCheckCast);
+  const uintptr_t art_quick_check_instance_of =
+      StubTest::GetEntrypoint(self, kQuickCheckInstanceOf);
 
   // Find some classes.
   ScopedObjectAccess soa(self);
   // garbage is created during ClassLinker::Init
 
-  StackHandleScope<2> hs(soa.Self());
-  Handle<mirror::Class> c(
-      hs.NewHandle(class_linker_->FindSystemClass(soa.Self(), "[Ljava/lang/Object;")));
-  Handle<mirror::Class> c2(
-      hs.NewHandle(class_linker_->FindSystemClass(soa.Self(), "[Ljava/lang/String;")));
+  VariableSizedHandleScope hs(soa.Self());
+  Handle<mirror::Class> klass_obj(
+      hs.NewHandle(class_linker_->FindSystemClass(soa.Self(), "Ljava/lang/Object;")));
+  Handle<mirror::Class> klass_str(
+      hs.NewHandle(class_linker_->FindSystemClass(soa.Self(), "Ljava/lang/String;")));
+  Handle<mirror::Class> klass_list(
+      hs.NewHandle(class_linker_->FindSystemClass(soa.Self(), "Ljava/util/List;")));
+  Handle<mirror::Class> klass_cloneable(
+        hs.NewHandle(class_linker_->FindSystemClass(soa.Self(), "Ljava/lang/Cloneable;")));
+  Handle<mirror::Class> klass_array_list(
+      hs.NewHandle(class_linker_->FindSystemClass(soa.Self(), "Ljava/util/ArrayList;")));
+  Handle<mirror::Object> obj(hs.NewHandle(klass_obj->AllocObject(soa.Self())));
+  Handle<mirror::String> string(hs.NewHandle(
+      mirror::String::AllocFromModifiedUtf8(soa.Self(), "ABCD")));
+  Handle<mirror::Object> array_list(hs.NewHandle(klass_array_list->AllocObject(soa.Self())));
 
   EXPECT_FALSE(self->IsExceptionPending());
 
-  Invoke3(reinterpret_cast<size_t>(c.Get()), reinterpret_cast<size_t>(c.Get()), 0U,
-          art_quick_check_cast, self);
-
+  Invoke3(reinterpret_cast<size_t>(obj.Get()),
+          reinterpret_cast<size_t>(klass_obj.Get()),
+          0U,
+          art_quick_check_instance_of,
+          self);
   EXPECT_FALSE(self->IsExceptionPending());
 
-  Invoke3(reinterpret_cast<size_t>(c2.Get()), reinterpret_cast<size_t>(c2.Get()), 0U,
-          art_quick_check_cast, self);
-
+  // Expected true: Test string instance of java.lang.String.
+  Invoke3(reinterpret_cast<size_t>(string.Get()),
+          reinterpret_cast<size_t>(klass_str.Get()),
+          0U,
+          art_quick_check_instance_of,
+          self);
   EXPECT_FALSE(self->IsExceptionPending());
 
-  Invoke3(reinterpret_cast<size_t>(c.Get()), reinterpret_cast<size_t>(c2.Get()), 0U,
-          art_quick_check_cast, self);
-
+  // Expected true: Test string instance of java.lang.Object.
+  Invoke3(reinterpret_cast<size_t>(string.Get()),
+          reinterpret_cast<size_t>(klass_obj.Get()),
+          0U,
+          art_quick_check_instance_of,
+          self);
   EXPECT_FALSE(self->IsExceptionPending());
 
-  // TODO: Make the following work. But that would require correct managed frames.
+  // Expected false: Test object instance of java.lang.String.
+  Invoke3(reinterpret_cast<size_t>(obj.Get()),
+          reinterpret_cast<size_t>(klass_str.Get()),
+          0U,
+          art_quick_check_instance_of,
+          self);
+  EXPECT_TRUE(self->IsExceptionPending());
+  self->ClearException();
 
-  Invoke3(reinterpret_cast<size_t>(c2.Get()), reinterpret_cast<size_t>(c.Get()), 0U,
-          art_quick_check_cast, self);
+  Invoke3(reinterpret_cast<size_t>(array_list.Get()),
+          reinterpret_cast<size_t>(klass_list.Get()),
+          0U,
+          art_quick_check_instance_of,
+          self);
+  EXPECT_FALSE(self->IsExceptionPending());
 
+  Invoke3(reinterpret_cast<size_t>(array_list.Get()),
+          reinterpret_cast<size_t>(klass_cloneable.Get()),
+          0U,
+          art_quick_check_instance_of,
+          self);
+  EXPECT_FALSE(self->IsExceptionPending());
+
+  Invoke3(reinterpret_cast<size_t>(string.Get()),
+          reinterpret_cast<size_t>(klass_array_list.Get()),
+          0U,
+          art_quick_check_instance_of,
+          self);
+  EXPECT_TRUE(self->IsExceptionPending());
+  self->ClearException();
+
+  Invoke3(reinterpret_cast<size_t>(string.Get()),
+          reinterpret_cast<size_t>(klass_cloneable.Get()),
+          0U,
+          art_quick_check_instance_of,
+          self);
   EXPECT_TRUE(self->IsExceptionPending());
   self->ClearException();
 
@@ -1012,7 +1063,7 @@
   EXPECT_FALSE(self->IsExceptionPending());
   {
     // Use an arbitrary method from c to use as referrer
-    size_t result = Invoke3(static_cast<size_t>(c->GetDexTypeIndex()),    // type_idx
+    size_t result = Invoke3(static_cast<size_t>(c->GetDexTypeIndex().index_),    // type_idx
                             // arbitrary
                             reinterpret_cast<size_t>(c->GetVirtualMethod(0, kRuntimePointerSize)),
                             0U,
@@ -1146,7 +1197,7 @@
   if ((false)) {
     // Use an arbitrary method from c to use as referrer
     size_t result = Invoke3(
-        static_cast<size_t>(c->GetDexTypeIndex()),    // type_idx
+        static_cast<size_t>(c->GetDexTypeIndex().index_),    // type_idx
         10U,
         // arbitrary
         reinterpret_cast<size_t>(c_obj->GetVirtualMethod(0, kRuntimePointerSize)),
@@ -1964,7 +2015,7 @@
   ASSERT_NE(nullptr, add_jmethod);
 
   // Get representation.
-  ArtMethod* contains_amethod = soa.DecodeMethod(contains_jmethod);
+  ArtMethod* contains_amethod = jni::DecodeArtMethod(contains_jmethod);
 
   // Patch up ArrayList.contains.
   if (contains_amethod->GetEntryPointFromQuickCompiledCode() == nullptr) {
@@ -1982,7 +2033,7 @@
   ASSERT_NE(nullptr, inf_contains_jmethod);
 
   // Get mirror representation.
-  ArtMethod* inf_contains = soa.DecodeMethod(inf_contains_jmethod);
+  ArtMethod* inf_contains = jni::DecodeArtMethod(inf_contains_jmethod);
 
   // Object
 
diff --git a/runtime/arch/x86/context_x86.cc b/runtime/arch/x86/context_x86.cc
index 077d2db..cb3dfec 100644
--- a/runtime/arch/x86/context_x86.cc
+++ b/runtime/arch/x86/context_x86.cc
@@ -17,6 +17,7 @@
 #include "context_x86.h"
 
 #include "base/bit_utils.h"
+#include "base/memory_tool.h"
 #include "quick/quick_method_frame_info.h"
 
 namespace art {
@@ -102,6 +103,7 @@
   uintptr_t esp = gprs[kNumberOfCpuRegisters - ESP - 1] - sizeof(intptr_t);
   gprs[kNumberOfCpuRegisters] = esp;
   *(reinterpret_cast<uintptr_t*>(esp)) = eip_;
+  MEMORY_TOOL_HANDLE_NO_RETURN;
   __asm__ __volatile__(
       "movl %1, %%ebx\n\t"          // Address base of FPRs.
       "movsd 0(%%ebx), %%xmm0\n\t"  // Load up XMM0-XMM7.
diff --git a/runtime/arch/x86/entrypoints_init_x86.cc b/runtime/arch/x86/entrypoints_init_x86.cc
index 0a10a3c..9cd4a3e 100644
--- a/runtime/arch/x86/entrypoints_init_x86.cc
+++ b/runtime/arch/x86/entrypoints_init_x86.cc
@@ -27,8 +27,7 @@
 namespace art {
 
 // Cast entrypoints.
-extern "C" size_t art_quick_is_assignable(const mirror::Class* klass,
-                                          const mirror::Class* ref_class);
+extern "C" size_t art_quick_instance_of(mirror::Object* obj, mirror::Class* ref_class);
 
 // Read barrier entrypoints.
 // art_quick_read_barrier_mark_regX uses an non-standard calling
@@ -45,12 +44,22 @@
 extern "C" mirror::Object* art_quick_read_barrier_slow(mirror::Object*, mirror::Object*, uint32_t);
 extern "C" mirror::Object* art_quick_read_barrier_for_root_slow(GcRoot<mirror::Object>*);
 
+void UpdateReadBarrierEntrypoints(QuickEntryPoints* qpoints, bool is_marking) {
+  qpoints->pReadBarrierMarkReg00 = is_marking ? art_quick_read_barrier_mark_reg00 : nullptr;
+  qpoints->pReadBarrierMarkReg01 = is_marking ? art_quick_read_barrier_mark_reg01 : nullptr;
+  qpoints->pReadBarrierMarkReg02 = is_marking ? art_quick_read_barrier_mark_reg02 : nullptr;
+  qpoints->pReadBarrierMarkReg03 = is_marking ? art_quick_read_barrier_mark_reg03 : nullptr;
+  qpoints->pReadBarrierMarkReg05 = is_marking ? art_quick_read_barrier_mark_reg05 : nullptr;
+  qpoints->pReadBarrierMarkReg06 = is_marking ? art_quick_read_barrier_mark_reg06 : nullptr;
+  qpoints->pReadBarrierMarkReg07 = is_marking ? art_quick_read_barrier_mark_reg07 : nullptr;
+}
+
 void InitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints) {
   DefaultInitEntryPoints(jpoints, qpoints);
 
   // Cast
-  qpoints->pInstanceofNonTrivial = art_quick_is_assignable;
-  qpoints->pCheckCast = art_quick_check_cast;
+  qpoints->pInstanceofNonTrivial = art_quick_instance_of;
+  qpoints->pCheckInstanceOf = art_quick_check_instance_of;
 
   // More math.
   qpoints->pCos = cos;
@@ -88,14 +97,8 @@
 
   // Read barrier.
   qpoints->pReadBarrierJni = ReadBarrierJni;
-  qpoints->pReadBarrierMarkReg00 = art_quick_read_barrier_mark_reg00;
-  qpoints->pReadBarrierMarkReg01 = art_quick_read_barrier_mark_reg01;
-  qpoints->pReadBarrierMarkReg02 = art_quick_read_barrier_mark_reg02;
-  qpoints->pReadBarrierMarkReg03 = art_quick_read_barrier_mark_reg03;
+  UpdateReadBarrierEntrypoints(qpoints, /*is_marking*/ false);
   qpoints->pReadBarrierMarkReg04 = nullptr;  // Cannot use register 4 (ESP) to pass arguments.
-  qpoints->pReadBarrierMarkReg05 = art_quick_read_barrier_mark_reg05;
-  qpoints->pReadBarrierMarkReg06 = art_quick_read_barrier_mark_reg06;
-  qpoints->pReadBarrierMarkReg07 = art_quick_read_barrier_mark_reg07;
   // x86 has only 8 core registers.
   qpoints->pReadBarrierMarkReg08 = nullptr;
   qpoints->pReadBarrierMarkReg09 = nullptr;
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index f4f9a68..c6f4c03 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -1085,15 +1085,12 @@
     RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER             // return or deliver exception
 END_MACRO
 
-// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB).
+// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB). May be called
+// for CC if the GC is not marking.
 DEFINE_FUNCTION art_quick_alloc_object_tlab
     // Fast path tlab allocation.
     // EAX: uint32_t type_idx/return value, ECX: ArtMethod*.
     // EBX, EDX: free.
-#if defined(USE_READ_BARRIER)
-    int3
-    int3
-#endif
     PUSH esi
     PUSH edi
     movl ART_METHOD_DEX_CACHE_TYPES_OFFSET_32(%ecx), %edx   // Load dex cache resolved types array
@@ -1151,51 +1148,17 @@
 END_FUNCTION art_quick_alloc_object_region_tlab
 
 DEFINE_FUNCTION art_quick_resolve_string
-    PUSH edi
-    PUSH esi
-    // Save xmm0 at an aligned address on the stack.
-    subl MACRO_LITERAL(12), %esp
-    CFI_ADJUST_CFA_OFFSET(12)
-    movsd %xmm0, 0(%esp)
-    movl 24(%esp), %edi                                          // get referrer
-    movl ART_METHOD_DECLARING_CLASS_OFFSET(%edi), %edi           // get declaring class
-    movl DECLARING_CLASS_DEX_CACHE_STRINGS_OFFSET(%edi), %edi    // get string dex cache
-    movl LITERAL(STRING_DEX_CACHE_SIZE_MINUS_ONE), %esi
-    andl %eax, %esi
-    movlps (%edi, %esi, STRING_DEX_CACHE_ELEMENT_SIZE), %xmm0    // load string idx and ptr to xmm0
-    movd %xmm0, %edi                                             // extract pointer
-    pshufd LITERAL(0x55), %xmm0, %xmm0                           // shuffle index into lowest bits
-    movd %xmm0, %esi                                             // extract index
-    // Restore xmm0 and remove it together with padding from the stack.
-    movsd 0(%esp), %xmm0
-    addl MACRO_LITERAL(12), %esp
-    CFI_ADJUST_CFA_OFFSET(-12)
-    cmp %esi, %eax
-    jne .Lart_quick_resolve_string_slow_path
-    movl %edi, %eax
-    CFI_REMEMBER_STATE
-    POP esi
-    POP edi
-#ifdef USE_READ_BARRIER
-    cmpl LITERAL(0), %fs:THREAD_IS_GC_MARKING_OFFSET
-    jne .Lnot_null_art_quick_read_barrier_mark_reg00
-#endif
-    ret
-    CFI_RESTORE_STATE
-    CFI_DEF_CFA(esp, 24)                          // workaround for clang bug: 31975598
-
-.Lart_quick_resolve_string_slow_path:
+    SETUP_SAVE_EVERYTHING_FRAME ebx, ebx
     // Outgoing argument set up
-    SETUP_SAVE_EVERYTHING_FRAME_EDI_ESI_SAVED ebx, ebx
-    subl LITERAL(8), %esp                                        // push padding
+    subl LITERAL(8), %esp                                 // push padding
     CFI_ADJUST_CFA_OFFSET(8)
-    pushl %fs:THREAD_SELF_OFFSET                                 // pass Thread::Current()
+    pushl %fs:THREAD_SELF_OFFSET                          // pass Thread::Current()
     CFI_ADJUST_CFA_OFFSET(4)
-    PUSH eax                                                     // pass arg1
+    PUSH eax                                              // pass arg1
     call SYMBOL(artResolveStringFromCode)
-    addl LITERAL(16), %esp                                       // pop arguments
+    addl LITERAL(16), %esp                                // pop arguments
     CFI_ADJUST_CFA_OFFSET(-16)
-    testl %eax, %eax                                        // If result is null, deliver the OOME.
+    testl %eax, %eax                                      // If result is null, deliver the OOME.
     jz 1f
     CFI_REMEMBER_STATE
     RESTORE_SAVE_EVERYTHING_FRAME_KEEP_EAX
@@ -1351,21 +1314,21 @@
     RETURN_IF_EAX_ZERO
 END_FUNCTION art_quick_unlock_object_no_inline
 
-DEFINE_FUNCTION art_quick_is_assignable
+DEFINE_FUNCTION art_quick_instance_of
     PUSH eax                              // alignment padding
     PUSH ecx                              // pass arg2 - obj->klass
     PUSH eax                              // pass arg1 - checked class
-    call SYMBOL(artIsAssignableFromCode)  // (Class* klass, Class* ref_klass)
+    call SYMBOL(artInstanceOfFromCode)    // (Object* obj, Class* ref_klass)
     addl LITERAL(12), %esp                // pop arguments
     CFI_ADJUST_CFA_OFFSET(-12)
     ret
-END_FUNCTION art_quick_is_assignable
+END_FUNCTION art_quick_instance_of
 
-DEFINE_FUNCTION art_quick_check_cast
+DEFINE_FUNCTION art_quick_check_instance_of
     PUSH eax                              // alignment padding
-    PUSH ecx                              // pass arg2 - obj->klass
-    PUSH eax                              // pass arg1 - checked class
-    call SYMBOL(artIsAssignableFromCode)  // (Class* klass, Class* ref_klass)
+    PUSH ecx                              // pass arg2 - checked class
+    PUSH eax                              // pass arg1 - obj
+    call SYMBOL(artInstanceOfFromCode)    // (Object* obj, Class* ref_klass)
     testl %eax, %eax
     jz 1f                                 // jump forward if not assignable
     addl LITERAL(12), %esp                // pop arguments
@@ -1385,9 +1348,9 @@
     CFI_ADJUST_CFA_OFFSET(4)
     PUSH ecx                              // pass arg2
     PUSH eax                              // pass arg1
-    call SYMBOL(artThrowClassCastException) // (Class* a, Class* b, Thread*)
+    call SYMBOL(artThrowClassCastExceptionForObject)  // (Object* src, Class* dest, Thread*)
     UNREACHABLE
-END_FUNCTION art_quick_check_cast
+END_FUNCTION art_quick_check_instance_of
 
 // Restore reg's value if reg is not the same as exclude_reg, otherwise just adjust stack.
 MACRO2(POP_REG_NE, reg, exclude_reg)
@@ -2035,15 +1998,14 @@
     lea MIRROR_STRING_VALUE_OFFSET(%ecx), %edi
 #if (STRING_COMPRESSION_FEATURE)
     /* Differ cases */
-    cmpl    LITERAL(0), %edx
-    jl      .Lstring_compareto_this_is_compressed
-    cmpl    LITERAL(0), %ebx
-    jl      .Lstring_compareto_that_is_compressed
+    shrl    LITERAL(1), %edx
+    jnc     .Lstring_compareto_this_is_compressed
+    shrl    LITERAL(1), %ebx
+    jnc     .Lstring_compareto_that_is_compressed
     jmp     .Lstring_compareto_both_not_compressed
 .Lstring_compareto_this_is_compressed:
-    andl    LITERAL(0x7FFFFFFF), %edx
-    cmpl    LITERAL(0), %ebx
-    jl      .Lstring_compareto_both_compressed
+    shrl    LITERAL(1), %ebx
+    jnc     .Lstring_compareto_both_compressed
     /* If (this->IsCompressed() && that->IsCompressed() == false) */
     mov     %edx, %eax
     subl    %ebx, %eax
@@ -2061,7 +2023,6 @@
     cmovne  %edx, %eax                        // return eax = *(this_cur_char) - *(that_cur_char)
     jmp     .Lstring_compareto_return
 .Lstring_compareto_that_is_compressed:
-    andl    LITERAL(0x7FFFFFFF), %ebx
     mov     %edx, %eax
     subl    %ebx, %eax
     mov     %edx, %ecx
@@ -2078,7 +2039,6 @@
     cmovne  %edx, %eax
     jmp     .Lstring_compareto_return         // return eax = *(this_cur_char) - *(that_cur_char)
 .Lstring_compareto_both_compressed:
-    andl    LITERAL(0x7FFFFFFF), %ebx
     /* Calculate min length and count diff */
     mov     %edx, %ecx
     mov     %edx, %eax
@@ -2155,8 +2115,15 @@
     jz .Lslow_rb_\name
     ret
 .Lslow_rb_\name:
-    // Save all potentially live caller-save core registers.
     PUSH eax
+    mov MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(reg)), %eax
+    add LITERAL(LOCK_WORD_STATE_FORWARDING_ADDRESS_OVERFLOW), %eax
+    // Jump if overflow, the only case where it overflows should be the forwarding address one.
+    // Taken ~25% of the time.
+    jnae .Lret_forwarding_address\name
+
+    // Save all potentially live caller-save core registers.
+    mov 0(%esp), %eax
     PUSH ecx
     PUSH edx
     PUSH ebx
@@ -2204,6 +2171,12 @@
     POP_REG_NE eax, RAW_VAR(reg)
 .Lret_rb_\name:
     ret
+.Lret_forwarding_address\name:
+    // The overflow cleared the top bits.
+    sall LITERAL(LOCK_WORD_STATE_FORWARDING_ADDRESS_SHIFT), %eax
+    mov %eax, REG_VAR(reg)
+    POP_REG_NE eax, RAW_VAR(reg)
+    ret
     END_FUNCTION VAR(name)
 END_MACRO
 
diff --git a/runtime/arch/x86_64/entrypoints_init_x86_64.cc b/runtime/arch/x86_64/entrypoints_init_x86_64.cc
index 8c425d5..a326b4e 100644
--- a/runtime/arch/x86_64/entrypoints_init_x86_64.cc
+++ b/runtime/arch/x86_64/entrypoints_init_x86_64.cc
@@ -30,8 +30,7 @@
 namespace art {
 
 // Cast entrypoints.
-extern "C" size_t art_quick_assignable_from_code(const mirror::Class* klass,
-                                                 const mirror::Class* ref_class);
+extern "C" size_t art_quick_instance_of(mirror::Object* obj, mirror::Class* ref_class);
 
 // Read barrier entrypoints.
 // art_quick_read_barrier_mark_regX uses an non-standard calling
@@ -56,6 +55,24 @@
 extern "C" mirror::Object* art_quick_read_barrier_slow(mirror::Object*, mirror::Object*, uint32_t);
 extern "C" mirror::Object* art_quick_read_barrier_for_root_slow(GcRoot<mirror::Object>*);
 
+void UpdateReadBarrierEntrypoints(QuickEntryPoints* qpoints, bool is_marking) {
+  qpoints->pReadBarrierMarkReg00 = is_marking ? art_quick_read_barrier_mark_reg00 : nullptr;
+  qpoints->pReadBarrierMarkReg01 = is_marking ? art_quick_read_barrier_mark_reg01 : nullptr;
+  qpoints->pReadBarrierMarkReg02 = is_marking ? art_quick_read_barrier_mark_reg02 : nullptr;
+  qpoints->pReadBarrierMarkReg03 = is_marking ? art_quick_read_barrier_mark_reg03 : nullptr;
+  qpoints->pReadBarrierMarkReg05 = is_marking ? art_quick_read_barrier_mark_reg05 : nullptr;
+  qpoints->pReadBarrierMarkReg06 = is_marking ? art_quick_read_barrier_mark_reg06 : nullptr;
+  qpoints->pReadBarrierMarkReg07 = is_marking ? art_quick_read_barrier_mark_reg07 : nullptr;
+  qpoints->pReadBarrierMarkReg08 = is_marking ? art_quick_read_barrier_mark_reg08 : nullptr;
+  qpoints->pReadBarrierMarkReg09 = is_marking ? art_quick_read_barrier_mark_reg09 : nullptr;
+  qpoints->pReadBarrierMarkReg10 = is_marking ? art_quick_read_barrier_mark_reg10 : nullptr;
+  qpoints->pReadBarrierMarkReg11 = is_marking ? art_quick_read_barrier_mark_reg11 : nullptr;
+  qpoints->pReadBarrierMarkReg12 = is_marking ? art_quick_read_barrier_mark_reg12 : nullptr;
+  qpoints->pReadBarrierMarkReg13 = is_marking ? art_quick_read_barrier_mark_reg13 : nullptr;
+  qpoints->pReadBarrierMarkReg14 = is_marking ? art_quick_read_barrier_mark_reg14 : nullptr;
+  qpoints->pReadBarrierMarkReg15 = is_marking ? art_quick_read_barrier_mark_reg15 : nullptr;
+}
+
 void InitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints) {
 #if defined(__APPLE__)
   UNUSED(jpoints, qpoints);
@@ -64,8 +81,8 @@
   DefaultInitEntryPoints(jpoints, qpoints);
 
   // Cast
-  qpoints->pInstanceofNonTrivial = art_quick_assignable_from_code;
-  qpoints->pCheckCast = art_quick_check_cast;
+  qpoints->pInstanceofNonTrivial = art_quick_instance_of;
+  qpoints->pCheckInstanceOf = art_quick_check_instance_of;
 
   // More math.
   qpoints->pCos = cos;
@@ -102,22 +119,8 @@
 
   // Read barrier.
   qpoints->pReadBarrierJni = ReadBarrierJni;
-  qpoints->pReadBarrierMarkReg00 = art_quick_read_barrier_mark_reg00;
-  qpoints->pReadBarrierMarkReg01 = art_quick_read_barrier_mark_reg01;
-  qpoints->pReadBarrierMarkReg02 = art_quick_read_barrier_mark_reg02;
-  qpoints->pReadBarrierMarkReg03 = art_quick_read_barrier_mark_reg03;
+  UpdateReadBarrierEntrypoints(qpoints, /*is_marking*/ false);
   qpoints->pReadBarrierMarkReg04 = nullptr;  // Cannot use register 4 (RSP) to pass arguments.
-  qpoints->pReadBarrierMarkReg05 = art_quick_read_barrier_mark_reg05;
-  qpoints->pReadBarrierMarkReg06 = art_quick_read_barrier_mark_reg06;
-  qpoints->pReadBarrierMarkReg07 = art_quick_read_barrier_mark_reg07;
-  qpoints->pReadBarrierMarkReg08 = art_quick_read_barrier_mark_reg08;
-  qpoints->pReadBarrierMarkReg09 = art_quick_read_barrier_mark_reg09;
-  qpoints->pReadBarrierMarkReg10 = art_quick_read_barrier_mark_reg10;
-  qpoints->pReadBarrierMarkReg11 = art_quick_read_barrier_mark_reg11;
-  qpoints->pReadBarrierMarkReg12 = art_quick_read_barrier_mark_reg12;
-  qpoints->pReadBarrierMarkReg13 = art_quick_read_barrier_mark_reg13;
-  qpoints->pReadBarrierMarkReg14 = art_quick_read_barrier_mark_reg14;
-  qpoints->pReadBarrierMarkReg15 = art_quick_read_barrier_mark_reg15;
   // x86-64 has only 16 core registers.
   qpoints->pReadBarrierMarkReg16 = nullptr;
   qpoints->pReadBarrierMarkReg17 = nullptr;
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index afa1c0f..4c46b08 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -18,6 +18,13 @@
 
 #include "arch/quick_alloc_entrypoints.S"
 
+MACRO0(ASSERT_USE_READ_BARRIER)
+#if !defined(USE_READ_BARRIER)
+    int3
+    int3
+#endif
+END_MACRO
+
 MACRO0(SETUP_FP_CALLEE_SAVE_FRAME)
     // Create space for ART FP callee-saved registers
     subq MACRO_LITERAL(4 * 8), %rsp
@@ -972,8 +979,10 @@
 END_MACRO
 
 // Generate the allocation entrypoints for each allocator.
-GENERATE_ALLOC_ENTRYPOINTS_FOR_NON_REGION_TLAB_ALLOCATORS
+GENERATE_ALLOC_ENTRYPOINTS_FOR_NON_TLAB_ALLOCATORS
+
 // Comment out allocators that have x86_64 specific asm.
+// Region TLAB:
 // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab, RegionTLAB)
 // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB)
 // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_tlab, RegionTLAB)
@@ -986,6 +995,19 @@
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_region_tlab, RegionTLAB)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region_tlab, RegionTLAB)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_tlab, RegionTLAB)
+// Normal TLAB:
+// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB)
+// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB)
+// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_tlab, TLAB)
+// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_tlab, TLAB)
+// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_tlab, TLAB)
 
 // A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc, RosAlloc).
 DEFINE_FUNCTION art_quick_alloc_object_rosalloc
@@ -1162,16 +1184,11 @@
     RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER                    // return or deliver exception
 END_MACRO
 
-// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB).
+// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB). May be
+// called with CC if the GC is not active.
 DEFINE_FUNCTION art_quick_alloc_object_tlab
-    // Fast path tlab allocation.
     // RDI: uint32_t type_idx, RSI: ArtMethod*
     // RDX, RCX, R8, R9: free. RAX: return val.
-#if defined(USE_READ_BARRIER)
-    int3
-    int3
-#endif
-    // Might need a special macro since rsi and edx is 32b/64b mismatched.
     movq ART_METHOD_DEX_CACHE_TYPES_OFFSET_64(%rsi), %rdx  // Load dex cache resolved types array
     // Might need to break down into multiple instructions to get the base address in a register.
                                                                // Load the class
@@ -1181,29 +1198,69 @@
     ALLOC_OBJECT_TLAB_SLOW_PATH artAllocObjectFromCodeTLAB
 END_FUNCTION art_quick_alloc_object_tlab
 
+// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB). May be
+// called with CC if the GC is not active.
+DEFINE_FUNCTION art_quick_alloc_object_resolved_tlab
+    // RDI: mirror::Class* klass, RSI: ArtMethod*
+    // RDX, RCX, R8, R9: free. RAX: return val.
+    movq %rdi, %rdx
+    ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH .Lart_quick_alloc_object_resolved_tlab_slow_path
+.Lart_quick_alloc_object_resolved_tlab_slow_path:
+    ALLOC_OBJECT_TLAB_SLOW_PATH artAllocObjectFromCodeResolvedTLAB
+END_FUNCTION art_quick_alloc_object_resolved_tlab
+
+// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_tlab, TLAB).
+// May be called with CC if the GC is not active.
+DEFINE_FUNCTION art_quick_alloc_object_initialized_tlab
+    // RDI: mirror::Class* klass, RSI: ArtMethod*
+    // RDX, RCX, R8, R9: free. RAX: return val.
+    movq %rdi, %rdx
+    ALLOC_OBJECT_INITIALIZED_TLAB_FAST_PATH .Lart_quick_alloc_object_initialized_tlab_slow_path
+.Lart_quick_alloc_object_initialized_tlab_slow_path:
+    ALLOC_OBJECT_TLAB_SLOW_PATH artAllocObjectFromCodeInitializedTLAB
+END_FUNCTION art_quick_alloc_object_initialized_tlab
+
+// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_tlab, TLAB).
+DEFINE_FUNCTION art_quick_alloc_array_tlab
+    // RDI: uint32_t type_idx, RSI: int32_t component_count, RDX: ArtMethod*
+    // RCX: klass, R8, R9: free. RAX: return val.
+    movq ART_METHOD_DEX_CACHE_TYPES_OFFSET_64(%rdx), %rcx      // Load dex cache resolved types array
+    movl 0(%rcx, %rdi, COMPRESSED_REFERENCE_SIZE), %ecx        // Load the class
+    testl %ecx, %ecx
+    jz .Lart_quick_alloc_array_tlab_slow_path
+    ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED .Lart_quick_alloc_array_tlab_slow_path
+.Lart_quick_alloc_array_tlab_slow_path:
+    ALLOC_ARRAY_TLAB_SLOW_PATH artAllocArrayFromCodeTLAB
+END_FUNCTION art_quick_alloc_array_tlab
+
+// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_tlab, TLAB).
+DEFINE_FUNCTION art_quick_alloc_array_resolved_tlab
+    // RDI: mirror::Class* klass, RSI: int32_t component_count, RDX: ArtMethod*
+    // RCX: mirror::Class* klass, R8, R9: free. RAX: return val.
+    movq %rdi, %rcx
+    // Already resolved, no null check.
+    ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED .Lart_quick_alloc_array_resolved_tlab_slow_path
+.Lart_quick_alloc_array_resolved_tlab_slow_path:
+    ALLOC_ARRAY_TLAB_SLOW_PATH artAllocArrayFromCodeResolvedTLAB
+END_FUNCTION art_quick_alloc_array_resolved_tlab
+
 // A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_region_tlab, RegionTLAB).
 DEFINE_FUNCTION art_quick_alloc_array_region_tlab
     // Fast path region tlab allocation.
     // RDI: uint32_t type_idx, RSI: int32_t component_count, RDX: ArtMethod*
     // RCX: klass, R8, R9: free. RAX: return val.
-#if !defined(USE_READ_BARRIER)
-    int3
-    int3
-#endif
+    ASSERT_USE_READ_BARRIER
     movq ART_METHOD_DEX_CACHE_TYPES_OFFSET_64(%rdx), %rcx      // Load dex cache resolved types array
     movl 0(%rcx, %rdi, COMPRESSED_REFERENCE_SIZE), %ecx        // Load the class
     // Null check so that we can load the lock word.
     testl %ecx, %ecx
     jz .Lart_quick_alloc_array_region_tlab_slow_path
-
-    cmpl LITERAL(0), %gs:THREAD_IS_GC_MARKING_OFFSET
-    jne .Lart_quick_alloc_array_region_tlab_class_load_read_barrier_marking
+    // Since we have allocation entrypoint switching, we know the GC is marking.
+    // Check the mark bit, if it is 0, do the read barrier mark.
+    testl LITERAL(LOCK_WORD_MARK_BIT_MASK_SHIFTED), MIRROR_OBJECT_LOCK_WORD_OFFSET(%ecx)
+    jz .Lart_quick_alloc_array_region_tlab_class_load_read_barrier_slow_path
 .Lart_quick_alloc_array_region_tlab_class_load_read_barrier_slow_path_exit:
     ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED .Lart_quick_alloc_array_region_tlab_slow_path
-.Lart_quick_alloc_array_region_tlab_class_load_read_barrier_marking:
-    // Check the mark bit, if it is 1 return.
-    testl LITERAL(LOCK_WORD_MARK_BIT_MASK_SHIFTED), MIRROR_OBJECT_LOCK_WORD_OFFSET(%ecx)
-    jnz .Lart_quick_alloc_array_region_tlab_class_load_read_barrier_slow_path_exit
 .Lart_quick_alloc_array_region_tlab_class_load_read_barrier_slow_path:
     // The read barrier slow path. Mark the class.
     PUSH rdi
@@ -1226,33 +1283,11 @@
     // Fast path region tlab allocation.
     // RDI: mirror::Class* klass, RSI: int32_t component_count, RDX: ArtMethod*
     // RCX: mirror::Class* klass, R8, R9: free. RAX: return val.
-#if !defined(USE_READ_BARRIER)
-    int3
-    int3
-#endif
+    ASSERT_USE_READ_BARRIER
     movq %rdi, %rcx
+    // Caller is responsible for read barrier.
     // Already resolved, no null check.
-    cmpl LITERAL(0), %gs:THREAD_IS_GC_MARKING_OFFSET
-    jne .Lart_quick_alloc_array_resolved_region_tlab_class_load_read_barrier_marking
-.Lart_quick_alloc_array_resolved_region_tlab_class_load_read_barrier_slow_path_exit:
     ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED .Lart_quick_alloc_array_resolved_region_tlab_slow_path
-.Lart_quick_alloc_array_resolved_region_tlab_class_load_read_barrier_marking:
-    // Check the mark bit, if it is 1 return.
-    testl LITERAL(LOCK_WORD_MARK_BIT_MASK_SHIFTED), MIRROR_OBJECT_LOCK_WORD_OFFSET(%ecx)
-    jnz .Lart_quick_alloc_array_region_tlab_class_load_read_barrier_slow_path_exit
-.Lart_quick_alloc_array_resolved_region_tlab_class_load_read_barrier_slow_path:
-    // The read barrier slow path. Mark the class.
-    PUSH rdi
-    PUSH rsi
-    PUSH rdx
-    // Outgoing argument set up
-    movq %rcx, %rdi                                            // Pass the class as the first param.
-    call SYMBOL(artReadBarrierMark)                            // cxx_name(mirror::Object* obj)
-    movq %rax, %rcx
-    POP rdx
-    POP rsi
-    POP rdi
-    jmp .Lart_quick_alloc_array_resolved_region_tlab_class_load_read_barrier_slow_path_exit
 .Lart_quick_alloc_array_resolved_region_tlab_slow_path:
     ALLOC_ARRAY_TLAB_SLOW_PATH artAllocArrayFromCodeResolvedRegionTLAB
 END_FUNCTION art_quick_alloc_array_resolved_region_tlab
@@ -1262,24 +1297,19 @@
     // Fast path region tlab allocation.
     // RDI: uint32_t type_idx, RSI: ArtMethod*
     // RDX, RCX, R8, R9: free. RAX: return val.
-#if !defined(USE_READ_BARRIER)
-    int3
-    int3
-#endif
+    ASSERT_USE_READ_BARRIER
     movq ART_METHOD_DEX_CACHE_TYPES_OFFSET_64(%rsi), %rdx  // Load dex cache resolved types array
     movl 0(%rdx, %rdi, COMPRESSED_REFERENCE_SIZE), %edx    // Load the class
     // Null check so that we can load the lock word.
     testl %edx, %edx
     jz .Lart_quick_alloc_object_region_tlab_slow_path
-    // Test if the GC is marking.
-    cmpl LITERAL(0), %gs:THREAD_IS_GC_MARKING_OFFSET
-    jne .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_marking
-.Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path_exit:
-    ALLOC_OBJECT_TLAB_FAST_PATH .Lart_quick_alloc_object_region_tlab_slow_path
-.Lart_quick_alloc_object_region_tlab_class_load_read_barrier_marking:
-    // Check the mark bit, if it is 1 avoid the read barrier.
+    // Since we have allocation entrypoint switching, we know the GC is marking.
+    // Check the mark bit, if it is 0, do the read barrier mark.
     testl LITERAL(LOCK_WORD_MARK_BIT_MASK_SHIFTED), MIRROR_OBJECT_LOCK_WORD_OFFSET(%edx)
-    jnz .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path_exit
+    jz .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path
+.Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path_exit:
+    // Use resolved one since we already did the null check.
+    ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH .Lart_quick_alloc_object_region_tlab_slow_path
 .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path:
     // The read barrier slow path. Mark the class.
     PUSH rdi
@@ -1302,10 +1332,7 @@
     // Fast path region tlab allocation.
     // RDI: mirror::Class* klass, RSI: ArtMethod*
     // RDX, RCX, R8, R9: free. RAX: return val.
-#if !defined(USE_READ_BARRIER)
-    int3
-    int3
-#endif
+    ASSERT_USE_READ_BARRIER
     // No read barrier since the caller is responsible for that.
     movq %rdi, %rdx
     ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH .Lart_quick_alloc_object_resolved_region_tlab_slow_path
@@ -1318,10 +1345,7 @@
     // Fast path region tlab allocation.
     // RDI: mirror::Class* klass, RSI: ArtMethod*
     // RDX, RCX, R8, R9: free. RAX: return val.
-#if !defined(USE_READ_BARRIER)
-    int3
-    int3
-#endif
+    ASSERT_USE_READ_BARRIER
     movq %rdi, %rdx
     // No read barrier since the caller is responsible for that.
     ALLOC_OBJECT_INITIALIZED_TLAB_FAST_PATH .Lart_quick_alloc_object_initialized_region_tlab_slow_path
@@ -1330,34 +1354,7 @@
 END_FUNCTION art_quick_alloc_object_initialized_region_tlab
 
 DEFINE_FUNCTION art_quick_resolve_string
-    // Custom calling convention: RAX serves as both input and output.
-    PUSH r15
-    PUSH r14
-    movq 24(%rsp), %r15                                         // get referrer
-    movl ART_METHOD_DECLARING_CLASS_OFFSET(%r15), %r15d         // get declaring class
-    movq DECLARING_CLASS_DEX_CACHE_STRINGS_OFFSET(%r15d), %r15  // get string dex cache
-    movl LITERAL(STRING_DEX_CACHE_SIZE_MINUS_ONE), %r14d
-    andl %eax, %r14d
-    movq (%r15, %r14, STRING_DEX_CACHE_ELEMENT_SIZE), %r14
-    movl %r14d, %r15d
-    shrq LITERAL(32), %r14
-    cmpl %r14d, %eax
-    jne .Lart_quick_resolve_string_slow_path
-    movl %r15d, %eax
-    CFI_REMEMBER_STATE
-    POP r14
-    POP r15
-#ifdef USE_READ_BARRIER
-    cmpl LITERAL(0), %gs:THREAD_IS_GC_MARKING_OFFSET
-    jne .Lnot_null_art_quick_read_barrier_mark_reg00
-#endif
-    ret
-    CFI_RESTORE_STATE
-    CFI_DEF_CFA(rsp, 24)                        // workaround for clang bug: 31975598
-
-// Slow path, the index did not match.
-.Lart_quick_resolve_string_slow_path:
-    SETUP_SAVE_EVERYTHING_FRAME_R14_R15_SAVED
+    SETUP_SAVE_EVERYTHING_FRAME
     // Outgoing argument set up
     movl %eax, %edi                             // pass string index
     movq %gs:THREAD_SELF_OFFSET, %rsi           // pass Thread::Current()
@@ -1480,19 +1477,21 @@
     RETURN_IF_EAX_ZERO
 END_FUNCTION art_quick_unlock_object_no_inline
 
-DEFINE_FUNCTION art_quick_check_cast
+DEFINE_FUNCTION art_quick_check_instance_of
+    // We could check the super classes here but that is usually already checked in the caller.
     PUSH rdi                          // Save args for exc
     PUSH rsi
     subq LITERAL(8), %rsp             // Alignment padding.
     CFI_ADJUST_CFA_OFFSET(8)
     SETUP_FP_CALLEE_SAVE_FRAME
-    call SYMBOL(artIsAssignableFromCode)  // (Class* klass, Class* ref_klass)
+    call SYMBOL(artInstanceOfFromCode)  // (Object* obj, Class* ref_klass)
     testq %rax, %rax
     jz 1f                             // jump forward if not assignable
     RESTORE_FP_CALLEE_SAVE_FRAME
     addq LITERAL(24), %rsp            // pop arguments
     CFI_ADJUST_CFA_OFFSET(-24)
 
+.Lreturn:
     ret
 
     CFI_ADJUST_CFA_OFFSET(24 + 4 * 8)  // Reset unwind info so following code unwinds.
@@ -1504,9 +1503,9 @@
     POP rdi
     SETUP_SAVE_ALL_CALLEE_SAVES_FRAME // save all registers as basis for long jump context
     mov %gs:THREAD_SELF_OFFSET, %rdx  // pass Thread::Current()
-    call SYMBOL(artThrowClassCastException) // (Class* a, Class* b, Thread*)
+    call SYMBOL(artThrowClassCastExceptionForObject)  // (Object* src, Class* dest, Thread*)
     UNREACHABLE
-END_FUNCTION art_quick_check_cast
+END_FUNCTION art_quick_check_instance_of
 
 
 // Restore reg's value if reg is not the same as exclude_reg, otherwise just adjust stack.
@@ -2140,15 +2139,14 @@
     leal MIRROR_STRING_VALUE_OFFSET(%esi), %esi
 #if (STRING_COMPRESSION_FEATURE)
     /* Differ cases */
-    cmpl LITERAL(0), %r8d
-    jl      .Lstring_compareto_this_is_compressed
-    cmpl    LITERAL(0), %r9d
-    jl      .Lstring_compareto_that_is_compressed
+    shrl    LITERAL(1), %r8d
+    jnc     .Lstring_compareto_this_is_compressed
+    shrl    LITERAL(1), %r9d
+    jnc     .Lstring_compareto_that_is_compressed
     jmp     .Lstring_compareto_both_not_compressed
 .Lstring_compareto_this_is_compressed:
-    andl    LITERAL(0x7FFFFFFF), %r8d
-    cmpl    LITERAL(0), %r9d
-    jl      .Lstring_compareto_both_compressed
+    shrl    LITERAL(1), %r9d
+    jnc     .Lstring_compareto_both_compressed
     /* Comparison this (8-bit) and that (16-bit) */
     mov     %r8d, %eax
     subl    %r9d, %eax
@@ -2167,7 +2165,6 @@
 .Lstring_compareto_keep_length1:
     ret
 .Lstring_compareto_that_is_compressed:
-    andl    LITERAL(0x7FFFFFFF), %r9d
     movl    %r8d, %eax
     subl    %r9d, %eax
     mov     %r8d, %ecx
@@ -2185,7 +2182,6 @@
 .Lstring_compareto_keep_length2:
     ret
 .Lstring_compareto_both_compressed:
-    andl    LITERAL(0x7FFFFFFF), %r9d
     /* Calculate min length and count diff */
     movl    %r8d, %ecx
     movl    %r8d, %eax
@@ -2224,16 +2220,16 @@
 
 UNIMPLEMENTED art_quick_memcmp16
 
-DEFINE_FUNCTION art_quick_assignable_from_code
+DEFINE_FUNCTION art_quick_instance_of
     SETUP_FP_CALLEE_SAVE_FRAME
     subq LITERAL(8), %rsp                      // Alignment padding.
     CFI_ADJUST_CFA_OFFSET(8)
-    call SYMBOL(artIsAssignableFromCode)       // (const mirror::Class*, const mirror::Class*)
+    call SYMBOL(artInstanceOfFromCode)         // (mirror::Object*, mirror::Class*)
     addq LITERAL(8), %rsp
     CFI_ADJUST_CFA_OFFSET(-8)
     RESTORE_FP_CALLEE_SAVE_FRAME
     ret
-END_FUNCTION art_quick_assignable_from_code
+END_FUNCTION art_quick_instance_of
 
 
 // Return from a nested signal:
@@ -2274,8 +2270,16 @@
     jz .Lslow_rb_\name
     ret
 .Lslow_rb_\name:
-    // Save all potentially live caller-save core registers.
     PUSH rax
+    movl MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(reg)), %eax
+    addl LITERAL(LOCK_WORD_STATE_FORWARDING_ADDRESS_OVERFLOW), %eax
+    // Jump if the addl caused eax to unsigned overflow. The only case where it overflows is the
+    // forwarding address one.
+    // Taken ~25% of the time.
+    jnae .Lret_forwarding_address\name
+
+    // Save all potentially live caller-save core registers.
+    movq 0(%rsp), %rax
     PUSH rcx
     PUSH rdx
     PUSH rsi
@@ -2340,6 +2344,12 @@
     POP_REG_NE rax, RAW_VAR(reg)
 .Lret_rb_\name:
     ret
+.Lret_forwarding_address\name:
+    // The overflow cleared the top bits.
+    sall LITERAL(LOCK_WORD_STATE_FORWARDING_ADDRESS_SHIFT), %eax
+    movq %rax, REG_VAR(reg)
+    POP_REG_NE rax, RAW_VAR(reg)
+    ret
     END_FUNCTION VAR(name)
 END_MACRO
 
diff --git a/runtime/art_field.cc b/runtime/art_field.cc
index b46b058..a4a6e5a 100644
--- a/runtime/art_field.cc
+++ b/runtime/art_field.cc
@@ -48,13 +48,13 @@
   return Runtime::Current()->GetClassLinker()->FindSystemClass(Thread::Current(), descriptor);
 }
 
-ObjPtr<mirror::Class> ArtField::ResolveGetType(uint32_t type_idx) {
+ObjPtr<mirror::Class> ArtField::ResolveGetType(dex::TypeIndex type_idx) {
   return Runtime::Current()->GetClassLinker()->ResolveType(type_idx, this);
 }
 
 ObjPtr<mirror::String> ArtField::ResolveGetStringName(Thread* self,
                                                       const DexFile& dex_file,
-                                                      uint32_t string_idx,
+                                                      dex::StringIndex string_idx,
                                                       ObjPtr<mirror::DexCache> dex_cache) {
   StackHandleScope<1> hs(self);
   return Runtime::Current()->GetClassLinker()->ResolveString(dex_file,
diff --git a/runtime/art_field.h b/runtime/art_field.h
index 7c2f490..427e103 100644
--- a/runtime/art_field.h
+++ b/runtime/art_field.h
@@ -19,6 +19,7 @@
 
 #include <jni.h>
 
+#include "dex_file_types.h"
 #include "gc_root.h"
 #include "modifiers.h"
 #include "obj_ptr.h"
@@ -216,10 +217,11 @@
  private:
   ObjPtr<mirror::Class> ProxyFindSystemClass(const char* descriptor)
       REQUIRES_SHARED(Locks::mutator_lock_);
-  ObjPtr<mirror::Class> ResolveGetType(uint32_t type_idx) REQUIRES_SHARED(Locks::mutator_lock_);
+  ObjPtr<mirror::Class> ResolveGetType(dex::TypeIndex type_idx)
+      REQUIRES_SHARED(Locks::mutator_lock_);
   ObjPtr<mirror::String> ResolveGetStringName(Thread* self,
                                               const DexFile& dex_file,
-                                              uint32_t string_idx,
+                                              dex::StringIndex string_idx,
                                               ObjPtr<mirror::DexCache> dex_cache)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
diff --git a/runtime/art_method-inl.h b/runtime/art_method-inl.h
index a652178..ef03bb3 100644
--- a/runtime/art_method-inl.h
+++ b/runtime/art_method-inl.h
@@ -105,7 +105,7 @@
       DoGetAccessFlagsHelper<kReadBarrierOption>(this);
     }
   }
-  return access_flags_;
+  return access_flags_.load(std::memory_order_relaxed);
 }
 
 inline uint16_t ArtMethod::GetMethodIndex() {
@@ -183,17 +183,17 @@
 }
 
 template <bool kWithCheck>
-inline mirror::Class* ArtMethod::GetDexCacheResolvedType(uint32_t type_index,
+inline mirror::Class* ArtMethod::GetDexCacheResolvedType(dex::TypeIndex type_index,
                                                          PointerSize pointer_size) {
   if (kWithCheck) {
     mirror::DexCache* dex_cache =
         GetInterfaceMethodIfProxy(pointer_size)->GetDeclaringClass()->GetDexCache();
-    if (UNLIKELY(type_index >= dex_cache->NumResolvedTypes())) {
-      ThrowArrayIndexOutOfBoundsException(type_index, dex_cache->NumResolvedTypes());
+    if (UNLIKELY(type_index.index_ >= dex_cache->NumResolvedTypes())) {
+      ThrowArrayIndexOutOfBoundsException(type_index.index_, dex_cache->NumResolvedTypes());
       return nullptr;
     }
   }
-  mirror::Class* klass = GetDexCacheResolvedTypes(pointer_size)[type_index].Read();
+  mirror::Class* klass = GetDexCacheResolvedTypes(pointer_size)[type_index.index_].Read();
   return (klass != nullptr && !klass->IsErroneous()) ? klass : nullptr;
 }
 
@@ -210,7 +210,7 @@
   return GetDexCacheResolvedTypes(pointer_size) == other->GetDexCacheResolvedTypes(pointer_size);
 }
 
-inline mirror::Class* ArtMethod::GetClassFromTypeIndex(uint16_t type_idx,
+inline mirror::Class* ArtMethod::GetClassFromTypeIndex(dex::TypeIndex type_idx,
                                                        bool resolve,
                                                        PointerSize pointer_size) {
   mirror::Class* type = GetDexCacheResolvedType(type_idx, pointer_size);
@@ -336,7 +336,7 @@
   return GetDeclaringClass()->GetDexFile().GetCodeItem(GetCodeItemOffset());
 }
 
-inline bool ArtMethod::IsResolvedTypeIdx(uint16_t type_idx, PointerSize pointer_size) {
+inline bool ArtMethod::IsResolvedTypeIdx(dex::TypeIndex type_idx, PointerSize pointer_size) {
   DCHECK(!IsProxyMethod());
   return GetDexCacheResolvedType(type_idx, pointer_size) != nullptr;
 }
@@ -383,11 +383,10 @@
   const DexFile* dex_file = GetDexFile();
   const DexFile::MethodId& method_id = dex_file->GetMethodId(GetDexMethodIndex());
   const DexFile::ProtoId& proto_id = dex_file->GetMethodPrototype(method_id);
-  uint16_t return_type_idx = proto_id.return_type_idx_;
-  return dex_file->GetTypeDescriptor(dex_file->GetTypeId(return_type_idx));
+  return dex_file->GetTypeDescriptor(dex_file->GetTypeId(proto_id.return_type_idx_));
 }
 
-inline const char* ArtMethod::GetTypeDescriptorFromTypeIdx(uint16_t type_idx) {
+inline const char* ArtMethod::GetTypeDescriptorFromTypeIdx(dex::TypeIndex type_idx) {
   DCHECK(!IsProxyMethod());
   const DexFile* dex_file = GetDexFile();
   return dex_file->GetTypeDescriptor(dex_file->GetTypeId(type_idx));
@@ -400,7 +399,11 @@
 
 inline mirror::DexCache* ArtMethod::GetDexCache() {
   DCHECK(!IsProxyMethod());
-  return GetDeclaringClass()->GetDexCache();
+  if (UNLIKELY(IsObsolete())) {
+    return GetObsoleteDexCache();
+  } else {
+    return GetDeclaringClass()->GetDexCache();
+  }
 }
 
 template<ReadBarrierOption kReadBarrierOption>
@@ -440,7 +443,7 @@
   const DexFile* dex_file = GetDexFile();
   const DexFile::MethodId& method_id = dex_file->GetMethodId(GetDexMethodIndex());
   const DexFile::ProtoId& proto_id = dex_file->GetMethodPrototype(method_id);
-  uint16_t return_type_idx = proto_id.return_type_idx_;
+  dex::TypeIndex return_type_idx = proto_id.return_type_idx_;
   mirror::Class* type = GetDexCacheResolvedType(return_type_idx, pointer_size);
   if (type == nullptr && resolve) {
     type = Runtime::Current()->GetClassLinker()->ResolveType(return_type_idx, this);
@@ -449,6 +452,53 @@
   return type;
 }
 
+inline bool ArtMethod::HasSingleImplementation() {
+  if (IsFinal() || GetDeclaringClass()->IsFinal()) {
+    // We don't set kAccSingleImplementation for these cases since intrinsic
+    // can use the flag also.
+    return true;
+  }
+  return (GetAccessFlags() & kAccSingleImplementation) != 0;
+}
+
+inline void ArtMethod::SetIntrinsic(uint32_t intrinsic) {
+  DCHECK(IsUint<8>(intrinsic));
+  // Currently we only do intrinsics for static/final methods or methods of final
+  // classes. We don't set kHasSingleImplementation for those methods.
+  DCHECK(IsStatic() || IsFinal() || GetDeclaringClass()->IsFinal()) <<
+      "Potential conflict with kAccSingleImplementation";
+  uint32_t new_value = (GetAccessFlags() & kAccFlagsNotUsedByIntrinsic) |
+      kAccIntrinsic |
+      (intrinsic << POPCOUNT(kAccFlagsNotUsedByIntrinsic));
+  if (kIsDebugBuild) {
+    uint32_t java_flags = (GetAccessFlags() & kAccJavaFlagsMask);
+    bool is_constructor = IsConstructor();
+    bool is_synchronized = IsSynchronized();
+    bool skip_access_checks = SkipAccessChecks();
+    bool is_fast_native = IsFastNative();
+    bool is_copied = IsCopied();
+    bool is_miranda = IsMiranda();
+    bool is_default = IsDefault();
+    bool is_default_conflict = IsDefaultConflicting();
+    bool is_compilable = IsCompilable();
+    bool must_count_locks = MustCountLocks();
+    SetAccessFlags(new_value);
+    DCHECK_EQ(java_flags, (GetAccessFlags() & kAccJavaFlagsMask));
+    DCHECK_EQ(is_constructor, IsConstructor());
+    DCHECK_EQ(is_synchronized, IsSynchronized());
+    DCHECK_EQ(skip_access_checks, SkipAccessChecks());
+    DCHECK_EQ(is_fast_native, IsFastNative());
+    DCHECK_EQ(is_copied, IsCopied());
+    DCHECK_EQ(is_miranda, IsMiranda());
+    DCHECK_EQ(is_default, IsDefault());
+    DCHECK_EQ(is_default_conflict, IsDefaultConflicting());
+    DCHECK_EQ(is_compilable, IsCompilable());
+    DCHECK_EQ(must_count_locks, MustCountLocks());
+  } else {
+    SetAccessFlags(new_value);
+  }
+}
+
 template<ReadBarrierOption kReadBarrierOption, typename RootVisitorType>
 void ArtMethod::VisitRoots(RootVisitorType& visitor, PointerSize pointer_size) {
   if (LIKELY(!declaring_class_.IsNull())) {
@@ -467,20 +517,6 @@
                     klass, this));
       interface_method->VisitRoots(visitor, pointer_size);
     }
-    // We know we don't have profiling information if the class hasn't been verified. Note
-    // that this check also ensures the IsNative call can be made, as IsNative expects a fully
-    // created class (and not a retired one).
-    if (klass->IsVerified()) {
-      // Runtime methods and native methods use the same field as the profiling info for
-      // storing their own data (jni entrypoint for native methods, and ImtConflictTable for
-      // some runtime methods).
-      if (!IsNative<kReadBarrierOption>() && !IsRuntimeMethod()) {
-        ProfilingInfo* profiling_info = GetProfilingInfo(pointer_size);
-        if (profiling_info != nullptr) {
-          profiling_info->VisitRoots(visitor);
-        }
-      }
-    }
   }
 }
 
diff --git a/runtime/art_method.cc b/runtime/art_method.cc
index c550a1b..77d799f 100644
--- a/runtime/art_method.cc
+++ b/runtime/art_method.cc
@@ -35,6 +35,7 @@
 #include "jit/profiling_info.h"
 #include "jni_internal.h"
 #include "mirror/class-inl.h"
+#include "mirror/class_ext.h"
 #include "mirror/executable.h"
 #include "mirror/object_array-inl.h"
 #include "mirror/object-inl.h"
@@ -50,6 +51,17 @@
 extern "C" void art_quick_invoke_static_stub(ArtMethod*, uint32_t*, uint32_t, Thread*, JValue*,
                                              const char*);
 
+ArtMethod* ArtMethod::GetSingleImplementation() {
+  DCHECK(!IsNative());
+  if (!IsAbstract()) {
+    // A non-abstract's single implementation is itself.
+    return this;
+  }
+  // TODO: add single-implementation logic for abstract method by storing it
+  // in ptr_sized_fields_.
+  return nullptr;
+}
+
 ArtMethod* ArtMethod::FromReflectedMethod(const ScopedObjectAccessAlreadyRunnable& soa,
                                           jobject jlr_method) {
   ObjPtr<mirror::Executable> executable = soa.Decode<mirror::Executable>(jlr_method);
@@ -57,6 +69,28 @@
   return executable->GetArtMethod();
 }
 
+mirror::DexCache* ArtMethod::GetObsoleteDexCache() {
+  DCHECK(!Runtime::Current()->IsAotCompiler()) << PrettyMethod();
+  DCHECK(IsObsolete());
+  ObjPtr<mirror::ClassExt> ext(GetDeclaringClass()->GetExtData());
+  CHECK(!ext.IsNull());
+  ObjPtr<mirror::PointerArray> obsolete_methods(ext->GetObsoleteMethods());
+  CHECK(!obsolete_methods.IsNull());
+  DCHECK(ext->GetObsoleteDexCaches() != nullptr);
+  int32_t len = obsolete_methods->GetLength();
+  DCHECK_EQ(len, ext->GetObsoleteDexCaches()->GetLength());
+  // TODO I think this is fine since images should never have obsolete methods in them.
+  PointerSize pointer_size = kRuntimePointerSize;
+  DCHECK_EQ(kRuntimePointerSize, Runtime::Current()->GetClassLinker()->GetImagePointerSize());
+  for (int32_t i = 0; i < len; i++) {
+    if (this == obsolete_methods->GetElementPtrSize<ArtMethod*>(i, pointer_size)) {
+      return ext->GetObsoleteDexCaches()->Get(i);
+    }
+  }
+  LOG(FATAL) << "This method does not appear in the obsolete map of its class!";
+  UNREACHABLE();
+}
+
 mirror::String* ArtMethod::GetNameAsString(Thread* self) {
   CHECK(!IsProxyMethod());
   StackHandleScope<1> hs(self);
@@ -199,9 +233,9 @@
   // Iterate over the catch handlers associated with dex_pc.
   PointerSize pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
   for (CatchHandlerIterator it(*code_item, dex_pc); it.HasNext(); it.Next()) {
-    uint16_t iter_type_idx = it.GetHandlerTypeIndex();
+    dex::TypeIndex iter_type_idx = it.GetHandlerTypeIndex();
     // Catch all case
-    if (iter_type_idx == DexFile::kDexNoIndex16) {
+    if (!iter_type_idx.IsValid()) {
       found_dex_pc = it.GetHandlerAddress();
       break;
     }
@@ -322,7 +356,7 @@
   CHECK(!IsFastNative()) << PrettyMethod();
   CHECK(native_method != nullptr) << PrettyMethod();
   if (is_fast) {
-    SetAccessFlags(GetAccessFlags() | kAccFastNative);
+    AddAccessFlags(kAccFastNative);
   }
   SetEntryPointFromJni(native_method);
 }
@@ -476,7 +510,11 @@
     }
     // The table is in the .vdex file.
     const OatFile::OatDexFile* oat_dex_file = GetDexCache()->GetDexFile()->GetOatDexFile();
-    return oat_dex_file->GetOatFile()->DexBegin() + header->vmap_table_offset_;
+    const OatFile* oat_file = oat_dex_file->GetOatFile();
+    if (oat_file == nullptr) {
+      return nullptr;
+    }
+    return oat_file->DexBegin() + header->GetVmapTableOffset();
   } else {
     return oat_method.GetVmapTable();
   }
@@ -574,7 +612,7 @@
   DCHECK(method_header->Contains(pc))
       << PrettyMethod()
       << " " << std::hex << pc << " " << oat_entry_point
-      << " " << (uintptr_t)(method_header->code_ + method_header->code_size_);
+      << " " << (uintptr_t)(method_header->GetCode() + method_header->GetCodeSize());
   return method_header;
 }
 
diff --git a/runtime/art_method.h b/runtime/art_method.h
index b31999f..3bc6f5d 100644
--- a/runtime/art_method.h
+++ b/runtime/art_method.h
@@ -85,9 +85,29 @@
   template <ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   ALWAYS_INLINE uint32_t GetAccessFlags();
 
+  // This version should only be called when it's certain there is no
+  // concurrency so there is no need to guarantee atomicity. For example,
+  // before the method is linked.
   void SetAccessFlags(uint32_t new_access_flags) {
-    // Not called within a transaction.
-    access_flags_ = new_access_flags;
+    access_flags_.store(new_access_flags, std::memory_order_relaxed);
+  }
+
+  // This setter guarantees atomicity.
+  void AddAccessFlags(uint32_t flag) {
+    uint32_t old_access_flags = access_flags_.load(std::memory_order_relaxed);
+    uint32_t new_access_flags;
+    do {
+      new_access_flags = old_access_flags | flag;
+    } while (!access_flags_.compare_exchange_weak(old_access_flags, new_access_flags));
+  }
+
+  // This setter guarantees atomicity.
+  void ClearAccessFlags(uint32_t flag) {
+    uint32_t old_access_flags = access_flags_.load(std::memory_order_relaxed);
+    uint32_t new_access_flags;
+    do {
+      new_access_flags = old_access_flags & ~flag;
+    } while (!access_flags_.compare_exchange_weak(old_access_flags, new_access_flags));
   }
 
   // Approximate what kind of method call would be used for this method.
@@ -142,39 +162,7 @@
     return (GetAccessFlags() & kAccIntrinsic) != 0;
   }
 
-  void SetIntrinsic(uint32_t intrinsic) {
-    DCHECK(IsUint<8>(intrinsic));
-    uint32_t new_value = (GetAccessFlags() & kAccFlagsNotUsedByIntrinsic) |
-        kAccIntrinsic |
-        (intrinsic << POPCOUNT(kAccFlagsNotUsedByIntrinsic));
-    if (kIsDebugBuild) {
-      uint32_t java_flags = (GetAccessFlags() & kAccJavaFlagsMask);
-      bool is_constructor = IsConstructor();
-      bool is_synchronized = IsSynchronized();
-      bool skip_access_checks = SkipAccessChecks();
-      bool is_fast_native = IsFastNative();
-      bool is_copied = IsCopied();
-      bool is_miranda = IsMiranda();
-      bool is_default = IsDefault();
-      bool is_default_conflict = IsDefaultConflicting();
-      bool is_compilable = IsCompilable();
-      bool must_count_locks = MustCountLocks();
-      SetAccessFlags(new_value);
-      DCHECK_EQ(java_flags, (GetAccessFlags() & kAccJavaFlagsMask));
-      DCHECK_EQ(is_constructor, IsConstructor());
-      DCHECK_EQ(is_synchronized, IsSynchronized());
-      DCHECK_EQ(skip_access_checks, SkipAccessChecks());
-      DCHECK_EQ(is_fast_native, IsFastNative());
-      DCHECK_EQ(is_copied, IsCopied());
-      DCHECK_EQ(is_miranda, IsMiranda());
-      DCHECK_EQ(is_default, IsDefault());
-      DCHECK_EQ(is_default_conflict, IsDefaultConflicting());
-      DCHECK_EQ(is_compilable, IsCompilable());
-      DCHECK_EQ(must_count_locks, MustCountLocks());
-    } else {
-      SetAccessFlags(new_value);
-    }
-  }
+  ALWAYS_INLINE void SetIntrinsic(uint32_t intrinsic) REQUIRES_SHARED(Locks::mutator_lock_);
 
   uint32_t GetIntrinsic() {
     DCHECK(IsIntrinsic());
@@ -227,6 +215,11 @@
     return (GetAccessFlags() & kAccDefault) != 0;
   }
 
+  bool IsObsolete() {
+    // TODO Should maybe make this IsIntrinsic check not needed
+    return !IsIntrinsic() && (GetAccessFlags() & kAccObsoleteMethod) != 0;
+  }
+
   template <ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   bool IsNative() {
     return (GetAccessFlags<kReadBarrierOption>() & kAccNative) != 0;
@@ -245,6 +238,10 @@
     return (GetAccessFlags() & kAccSynthetic) != 0;
   }
 
+  bool IsVarargs() {
+    return (GetAccessFlags() & kAccVarargs) != 0;
+  }
+
   template<ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   bool IsProxyMethod() REQUIRES_SHARED(Locks::mutator_lock_);
 
@@ -254,7 +251,7 @@
 
   void SetSkipAccessChecks() {
     DCHECK(!SkipAccessChecks());
-    SetAccessFlags(GetAccessFlags() | kAccSkipAccessChecks);
+    AddAccessFlags(kAccSkipAccessChecks);
   }
 
   // Should this method be run in the interpreter and count locks (e.g., failed structured-
@@ -343,7 +340,7 @@
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   template <bool kWithCheck = true>
-  mirror::Class* GetDexCacheResolvedType(uint32_t type_idx, PointerSize pointer_size)
+  mirror::Class* GetDexCacheResolvedType(dex::TypeIndex type_idx, PointerSize pointer_size)
       REQUIRES_SHARED(Locks::mutator_lock_);
   void SetDexCacheResolvedTypes(GcRoot<mirror::Class>* new_dex_cache_types,
                                 PointerSize pointer_size)
@@ -355,7 +352,9 @@
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Get the Class* from the type index into this method's dex cache.
-  mirror::Class* GetClassFromTypeIndex(uint16_t type_idx, bool resolve, PointerSize pointer_size)
+  mirror::Class* GetClassFromTypeIndex(dex::TypeIndex type_idx,
+                                       bool resolve,
+                                       PointerSize pointer_size)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Returns true if this method has the same name and signature of the other method.
@@ -454,6 +453,26 @@
     return DataOffset(kRuntimePointerSize);
   }
 
+  ALWAYS_INLINE bool HasSingleImplementation() REQUIRES_SHARED(Locks::mutator_lock_);
+
+  ALWAYS_INLINE void SetHasSingleImplementation(bool single_impl) {
+    DCHECK(!IsIntrinsic()) << "conflict with intrinsic bits";
+    if (single_impl) {
+      AddAccessFlags(kAccSingleImplementation);
+    } else {
+      ClearAccessFlags(kAccSingleImplementation);
+    }
+  }
+
+  ArtMethod* GetSingleImplementation()
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
+  ALWAYS_INLINE void SetSingleImplementation(ArtMethod* method, PointerSize pointer_size) {
+    DCHECK(!IsNative());
+    DCHECK(IsAbstract());  // Non-abstract method's single implementation is just itself.
+    SetDataPtrSize(method, pointer_size);
+  }
+
   void* GetEntryPointFromJni() {
     DCHECK(IsNative());
     return GetEntryPointFromJniPtrSize(kRuntimePointerSize);
@@ -527,7 +546,7 @@
 
   const DexFile::CodeItem* GetCodeItem() REQUIRES_SHARED(Locks::mutator_lock_);
 
-  bool IsResolvedTypeIdx(uint16_t type_idx, PointerSize pointer_size)
+  bool IsResolvedTypeIdx(dex::TypeIndex type_idx, PointerSize pointer_size)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   int32_t GetLineNumFromDexPC(uint32_t dex_pc) REQUIRES_SHARED(Locks::mutator_lock_);
@@ -544,7 +563,7 @@
 
   const char* GetReturnTypeDescriptor() REQUIRES_SHARED(Locks::mutator_lock_);
 
-  const char* GetTypeDescriptorFromTypeIdx(uint16_t type_idx)
+  const char* GetTypeDescriptorFromTypeIdx(dex::TypeIndex type_idx)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   // May cause thread suspension due to GetClassFromTypeIdx calling ResolveType this caused a large
@@ -555,6 +574,7 @@
   mirror::ClassLoader* GetClassLoader() REQUIRES_SHARED(Locks::mutator_lock_);
 
   mirror::DexCache* GetDexCache() REQUIRES_SHARED(Locks::mutator_lock_);
+  mirror::DexCache* GetObsoleteDexCache() REQUIRES_SHARED(Locks::mutator_lock_);
 
   ALWAYS_INLINE ArtMethod* GetInterfaceMethodIfProxy(PointerSize pointer_size)
       REQUIRES_SHARED(Locks::mutator_lock_);
@@ -647,7 +667,10 @@
   GcRoot<mirror::Class> declaring_class_;
 
   // Access flags; low 16 bits are defined by spec.
-  uint32_t access_flags_;
+  // Getting and setting this flag needs to be atomic when concurrency is
+  // possible, e.g. after this method's class is linked. Such as when setting
+  // verifier flags and single-implementation flag.
+  std::atomic<std::uint32_t> access_flags_;
 
   /* Dex file fields. The defining dex file is available via declaring_class_->dex_cache_ */
 
diff --git a/runtime/asm_support.h b/runtime/asm_support.h
index cd8815b..5ef1f06 100644
--- a/runtime/asm_support.h
+++ b/runtime/asm_support.h
@@ -172,6 +172,9 @@
 #define MIRROR_CLASS_COMPONENT_TYPE_OFFSET (4 + MIRROR_OBJECT_HEADER_SIZE)
 ADD_TEST_EQ(MIRROR_CLASS_COMPONENT_TYPE_OFFSET,
             art::mirror::Class::ComponentTypeOffset().Int32Value())
+#define MIRROR_CLASS_IF_TABLE_OFFSET (16 + MIRROR_OBJECT_HEADER_SIZE)
+ADD_TEST_EQ(MIRROR_CLASS_IF_TABLE_OFFSET,
+            art::mirror::Class::IfTableOffset().Int32Value())
 #define MIRROR_CLASS_ACCESS_FLAGS_OFFSET (64 + MIRROR_OBJECT_HEADER_SIZE)
 ADD_TEST_EQ(MIRROR_CLASS_ACCESS_FLAGS_OFFSET,
             art::mirror::Class::AccessFlagsOffset().Int32Value())
diff --git a/runtime/base/arena_allocator.h b/runtime/base/arena_allocator.h
index 62cd2a7..2feb28a 100644
--- a/runtime/base/arena_allocator.h
+++ b/runtime/base/arena_allocator.h
@@ -94,6 +94,7 @@
   kArenaAllocGraphChecker,
   kArenaAllocVerifier,
   kArenaAllocCallingConvention,
+  kArenaAllocCHA,
   kNumArenaAllocKinds
 };
 
diff --git a/runtime/base/logging.cc b/runtime/base/logging.cc
index 6b21a56..1dca428 100644
--- a/runtime/base/logging.cc
+++ b/runtime/base/logging.cc
@@ -80,7 +80,7 @@
     gCmdLine.reset(new std::string("<unset>"));
   }
 
-#ifdef __ANDROID__
+#ifdef ART_TARGET_ANDROID
 #define INIT_LOGGING_DEFAULT_LOGGER android::base::LogdLogger()
 #else
 #define INIT_LOGGING_DEFAULT_LOGGER android::base::StderrLogger
diff --git a/runtime/base/memory_tool.h b/runtime/base/memory_tool.h
index e1a2e07..42cbaa0 100644
--- a/runtime/base/memory_tool.h
+++ b/runtime/base/memory_tool.h
@@ -40,7 +40,10 @@
 constexpr bool kMemoryToolIsAvailable = false;
 #endif
 
+extern "C" void __asan_handle_no_return();
+
 #define ATTRIBUTE_NO_SANITIZE_ADDRESS __attribute__((no_sanitize_address))
+#define MEMORY_TOOL_HANDLE_NO_RETURN __asan_handle_no_return()
 #define RUNNING_ON_MEMORY_TOOL 1U
 constexpr bool kMemoryToolIsValgrind = false;
 constexpr bool kMemoryToolDetectsLeaks = true;
@@ -55,6 +58,7 @@
 #define MEMORY_TOOL_MAKE_UNDEFINED(p, s) VALGRIND_MAKE_MEM_UNDEFINED(p, s)
 #define MEMORY_TOOL_MAKE_DEFINED(p, s) VALGRIND_MAKE_MEM_DEFINED(p, s)
 #define ATTRIBUTE_NO_SANITIZE_ADDRESS
+#define MEMORY_TOOL_HANDLE_NO_RETURN do { } while (0)
 #define RUNNING_ON_MEMORY_TOOL RUNNING_ON_VALGRIND
 constexpr bool kMemoryToolIsAvailable = true;
 constexpr bool kMemoryToolIsValgrind = true;
diff --git a/runtime/base/mutex.cc b/runtime/base/mutex.cc
index bde0327..6665f95 100644
--- a/runtime/base/mutex.cc
+++ b/runtime/base/mutex.cc
@@ -42,14 +42,13 @@
 ReaderWriterMutex* Locks::heap_bitmap_lock_ = nullptr;
 Mutex* Locks::instrument_entrypoints_lock_ = nullptr;
 Mutex* Locks::intern_table_lock_ = nullptr;
-Mutex* Locks::interpreter_string_init_map_lock_ = nullptr;
 Mutex* Locks::jni_libraries_lock_ = nullptr;
 Mutex* Locks::logging_lock_ = nullptr;
 Mutex* Locks::mem_maps_lock_ = nullptr;
 Mutex* Locks::modify_ldt_lock_ = nullptr;
 MutatorMutex* Locks::mutator_lock_ = nullptr;
 Mutex* Locks::profiler_lock_ = nullptr;
-Mutex* Locks::verifier_deps_lock_ = nullptr;
+ReaderWriterMutex* Locks::verifier_deps_lock_ = nullptr;
 ReaderWriterMutex* Locks::oat_file_manager_lock_ = nullptr;
 Mutex* Locks::host_dlopen_handles_lock_ = nullptr;
 Mutex* Locks::reference_processor_lock_ = nullptr;
@@ -59,6 +58,7 @@
 Mutex* Locks::reference_queue_soft_references_lock_ = nullptr;
 Mutex* Locks::reference_queue_weak_references_lock_ = nullptr;
 Mutex* Locks::runtime_shutdown_lock_ = nullptr;
+Mutex* Locks::cha_lock_ = nullptr;
 Mutex* Locks::thread_list_lock_ = nullptr;
 ConditionVariable* Locks::thread_exit_cond_ = nullptr;
 Mutex* Locks::thread_suspend_count_lock_ = nullptr;
@@ -673,7 +673,7 @@
   ScopedContentionRecorder scr(this, GetExclusiveOwnerTid(), SafeGetTid(self));
   ++num_pending_readers_;
   if (futex(state_.Address(), FUTEX_WAIT, cur_state, nullptr, nullptr, 0) != 0) {
-    if (errno != EAGAIN) {
+    if (errno != EAGAIN && errno != EINTR) {
       PLOG(FATAL) << "futex wait failed for " << name_;
     }
   }
@@ -797,7 +797,7 @@
                    reinterpret_cast<const timespec*>(std::numeric_limits<int32_t>::max()),
                    guard_.state_.Address(), cur_sequence) != -1;
       if (!done) {
-        if (errno != EAGAIN) {
+        if (errno != EAGAIN && errno != EINTR) {
           PLOG(FATAL) << "futex cmp requeue failed for " << name_;
         }
       }
@@ -956,6 +956,7 @@
     DCHECK(logging_lock_ != nullptr);
     DCHECK(mutator_lock_ != nullptr);
     DCHECK(profiler_lock_ != nullptr);
+    DCHECK(cha_lock_ != nullptr);
     DCHECK(thread_list_lock_ != nullptr);
     DCHECK(thread_suspend_count_lock_ != nullptr);
     DCHECK(trace_lock_ != nullptr);
@@ -1015,6 +1016,10 @@
     DCHECK(breakpoint_lock_ == nullptr);
     breakpoint_lock_ = new ReaderWriterMutex("breakpoint lock", current_lock_level);
 
+    UPDATE_CURRENT_LOCK_LEVEL(kCHALock);
+    DCHECK(cha_lock_ == nullptr);
+    cha_lock_ = new Mutex("CHA lock", current_lock_level);
+
     UPDATE_CURRENT_LOCK_LEVEL(kClassLinkerClassesLock);
     DCHECK(classlinker_classes_lock_ == nullptr);
     classlinker_classes_lock_ = new ReaderWriterMutex("ClassLinker classes lock",
@@ -1040,7 +1045,7 @@
 
     UPDATE_CURRENT_LOCK_LEVEL(kVerifierDepsLock);
     DCHECK(verifier_deps_lock_ == nullptr);
-    verifier_deps_lock_ = new Mutex("verifier deps lock", current_lock_level);
+    verifier_deps_lock_ = new ReaderWriterMutex("verifier deps lock", current_lock_level);
 
     UPDATE_CURRENT_LOCK_LEVEL(kHostDlOpenHandlesLock);
     DCHECK(host_dlopen_handles_lock_ == nullptr);
diff --git a/runtime/base/mutex.h b/runtime/base/mutex.h
index 3f2c5a9..21b5bb9 100644
--- a/runtime/base/mutex.h
+++ b/runtime/base/mutex.h
@@ -79,7 +79,6 @@
   kAllocSpaceLock,
   kBumpPointerSpaceBlockLock,
   kArenaPoolLock,
-  kDexFileToMethodInlinerMapLock,
   kInternTableLock,
   kOatFileSecondaryLookupLock,
   kHostDlOpenHandlesLock,
@@ -92,14 +91,13 @@
   kDefaultMutexLevel,
   kDexLock,
   kMarkSweepLargeObjectLock,
-  kPinTableLock,
   kJdwpObjectRegistryLock,
   kModifyLdtLock,
   kAllocatedThreadIdsLock,
   kMonitorPoolLock,
-  kMethodVerifiersLock,
   kClassLinkerClassesLock,  // TODO rename.
   kJitCodeCacheLock,
+  kCHALock,
   kBreakpointLock,
   kMonitorLock,
   kMonitorListLock,
@@ -630,12 +628,12 @@
   // TODO: improve name, perhaps instrumentation_update_lock_.
   static Mutex* deoptimization_lock_ ACQUIRED_AFTER(alloc_tracker_lock_);
 
-  // Guards String initializer register map in interpreter.
-  static Mutex* interpreter_string_init_map_lock_ ACQUIRED_AFTER(deoptimization_lock_);
+  // Guards Class Hierarchy Analysis (CHA).
+  static Mutex* cha_lock_ ACQUIRED_AFTER(deoptimization_lock_);
 
   // The thread_list_lock_ guards ThreadList::list_. It is also commonly held to stop threads
   // attaching and detaching.
-  static Mutex* thread_list_lock_ ACQUIRED_AFTER(interpreter_string_init_map_lock_);
+  static Mutex* thread_list_lock_ ACQUIRED_AFTER(cha_lock_);
 
   // Signaled when threads terminate. Used to determine when all non-daemons have terminated.
   static ConditionVariable* thread_exit_cond_ GUARDED_BY(Locks::thread_list_lock_);
@@ -664,8 +662,8 @@
   // Guards opened oat files in OatFileManager.
   static ReaderWriterMutex* oat_file_manager_lock_ ACQUIRED_AFTER(modify_ldt_lock_);
 
-  // Guards verifier dependency collection in VerifierDeps.
-  static Mutex* verifier_deps_lock_ ACQUIRED_AFTER(oat_file_manager_lock_);
+  // Guards extra string entries for VerifierDeps.
+  static ReaderWriterMutex* verifier_deps_lock_ ACQUIRED_AFTER(oat_file_manager_lock_);
 
   // Guards dlopen_handles_ in DlOpenOatFile.
   static Mutex* host_dlopen_handles_lock_ ACQUIRED_AFTER(verifier_deps_lock_);
diff --git a/runtime/base/stl_util.h b/runtime/base/stl_util.h
index a53dcea..d5f375a 100644
--- a/runtime/base/stl_util.h
+++ b/runtime/base/stl_util.h
@@ -18,6 +18,7 @@
 #define ART_RUNTIME_BASE_STL_UTIL_H_
 
 #include <algorithm>
+#include <set>
 #include <sstream>
 
 #include "base/logging.h"
@@ -187,6 +188,12 @@
   using type = T;
 };
 
+// Merge `other` entries into `to_update`.
+template <typename T>
+static inline void MergeSets(std::set<T>& to_update, const std::set<T>& other) {
+  to_update.insert(other.begin(), other.end());
+}
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_BASE_STL_UTIL_H_
diff --git a/runtime/base/unix_file/fd_file.cc b/runtime/base/unix_file/fd_file.cc
index 4498198..ff2dd1b 100644
--- a/runtime/base/unix_file/fd_file.cc
+++ b/runtime/base/unix_file/fd_file.cc
@@ -339,22 +339,59 @@
   return true;
 }
 
-void FdFile::Erase() {
+bool FdFile::Unlink() {
+  if (file_path_.empty()) {
+    return false;
+  }
+
+  // Try to figure out whether this file is still referring to the one on disk.
+  bool is_current = false;
+  {
+    struct stat this_stat, current_stat;
+    int cur_fd = TEMP_FAILURE_RETRY(open(file_path_.c_str(), O_RDONLY));
+    if (cur_fd > 0) {
+      // File still exists.
+      if (fstat(fd_, &this_stat) == 0 && fstat(cur_fd, &current_stat) == 0) {
+        is_current = (this_stat.st_dev == current_stat.st_dev) &&
+                     (this_stat.st_ino == current_stat.st_ino);
+      }
+      close(cur_fd);
+    }
+  }
+
+  if (is_current) {
+    unlink(file_path_.c_str());
+  }
+
+  return is_current;
+}
+
+bool FdFile::Erase(bool unlink) {
   DCHECK(!read_only_mode_);
-  TEMP_FAILURE_RETRY(SetLength(0));
-  TEMP_FAILURE_RETRY(Flush());
-  TEMP_FAILURE_RETRY(Close());
+
+  bool ret_result = true;
+  if (unlink) {
+    ret_result = Unlink();
+  }
+
+  int result;
+  result = SetLength(0);
+  result = Flush();
+  result = Close();
+  // Ignore the errors.
+
+  return ret_result;
 }
 
 int FdFile::FlushCloseOrErase() {
   DCHECK(!read_only_mode_);
-  int flush_result = TEMP_FAILURE_RETRY(Flush());
+  int flush_result = Flush();
   if (flush_result != 0) {
     LOG(ERROR) << "CloseOrErase failed while flushing a file.";
     Erase();
     return flush_result;
   }
-  int close_result = TEMP_FAILURE_RETRY(Close());
+  int close_result = Close();
   if (close_result != 0) {
     LOG(ERROR) << "CloseOrErase failed while closing a file.";
     Erase();
@@ -365,11 +402,11 @@
 
 int FdFile::FlushClose() {
   DCHECK(!read_only_mode_);
-  int flush_result = TEMP_FAILURE_RETRY(Flush());
+  int flush_result = Flush();
   if (flush_result != 0) {
     LOG(ERROR) << "FlushClose failed while flushing a file.";
   }
-  int close_result = TEMP_FAILURE_RETRY(Close());
+  int close_result = Close();
   if (close_result != 0) {
     LOG(ERROR) << "FlushClose failed while closing a file.";
   }
diff --git a/runtime/base/unix_file/fd_file.h b/runtime/base/unix_file/fd_file.h
index d896ee9..eb85c4f 100644
--- a/runtime/base/unix_file/fd_file.h
+++ b/runtime/base/unix_file/fd_file.h
@@ -97,7 +97,14 @@
   int Flush() OVERRIDE WARN_UNUSED;
 
   // Short for SetLength(0); Flush(); Close();
-  void Erase();
+  // If the file was opened with a path name and unlink = true, also calls Unlink() on the path.
+  // Note that it is the the caller's responsibility to avoid races.
+  bool Erase(bool unlink = false);
+
+  // Call unlink() if the file was opened with a path, and if open() with the name shows that
+  // the file descriptor of this file is still up-to-date. This is still racy, though, and it
+  // is up to the caller to ensure correctness in a multi-process setup.
+  bool Unlink();
 
   // Try to Flush(), then try to Close(); If either fails, call Erase().
   int FlushCloseOrErase() WARN_UNUSED;
diff --git a/runtime/base/unix_file/fd_file_test.cc b/runtime/base/unix_file/fd_file_test.cc
index 99ef6f7..7657a38 100644
--- a/runtime/base/unix_file/fd_file_test.cc
+++ b/runtime/base/unix_file/fd_file_test.cc
@@ -186,4 +186,24 @@
   ASSERT_EQ(file2.Close(), 0);
 }
 
+TEST_F(FdFileTest, EraseWithPathUnlinks) {
+  // New scratch file, zero-length.
+  art::ScratchFile tmp;
+  std::string filename = tmp.GetFilename();
+  tmp.Close();  // This is required because of the unlink race between the scratch file and the
+                // FdFile, which leads to close-guard breakage.
+  FdFile file(filename, O_RDWR, false);
+  ASSERT_TRUE(file.IsOpened());
+  EXPECT_GE(file.Fd(), 0);
+  uint8_t buffer[16] = { 0 };
+  EXPECT_TRUE(file.WriteFully(&buffer, sizeof(buffer)));
+  EXPECT_EQ(file.Flush(), 0);
+
+  EXPECT_TRUE(file.Erase(true));
+
+  EXPECT_FALSE(file.IsOpened());
+
+  EXPECT_FALSE(art::OS::FileExists(filename.c_str())) << filename;
+}
+
 }  // namespace unix_file
diff --git a/runtime/base/variant_map_test.cc b/runtime/base/variant_map_test.cc
index ccb22eb..93336e0 100644
--- a/runtime/base/variant_map_test.cc
+++ b/runtime/base/variant_map_test.cc
@@ -107,8 +107,8 @@
   fmFilled.Set(FruitMap::Orange, 555.0);
   EXPECT_EQ(size_t(2), fmFilled.Size());
 
-  // Test copy constructor
-  FruitMap fmEmptyCopy(fmEmpty);
+  // Test copy constructor (NOLINT as a reference is suggested, instead)
+  FruitMap fmEmptyCopy(fmEmpty);  // NOLINT
   EXPECT_EQ(size_t(0), fmEmptyCopy.Size());
 
   // Test copy constructor
diff --git a/runtime/cha.cc b/runtime/cha.cc
new file mode 100644
index 0000000..be675a8
--- /dev/null
+++ b/runtime/cha.cc
@@ -0,0 +1,356 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "cha.h"
+
+#include "jit/jit.h"
+#include "jit/jit_code_cache.h"
+#include "runtime.h"
+#include "scoped_thread_state_change-inl.h"
+#include "stack.h"
+#include "thread.h"
+#include "thread_list.h"
+#include "thread_pool.h"
+
+namespace art {
+
+void ClassHierarchyAnalysis::AddDependency(ArtMethod* method,
+                                           ArtMethod* dependent_method,
+                                           OatQuickMethodHeader* dependent_header) {
+  auto it = cha_dependency_map_.find(method);
+  if (it == cha_dependency_map_.end()) {
+    cha_dependency_map_[method] =
+        new std::vector<std::pair<art::ArtMethod*, art::OatQuickMethodHeader*>>();
+    it = cha_dependency_map_.find(method);
+  } else {
+    DCHECK(it->second != nullptr);
+  }
+  it->second->push_back(std::make_pair(dependent_method, dependent_header));
+}
+
+std::vector<std::pair<ArtMethod*, OatQuickMethodHeader*>>*
+    ClassHierarchyAnalysis::GetDependents(ArtMethod* method) {
+  auto it = cha_dependency_map_.find(method);
+  if (it != cha_dependency_map_.end()) {
+    DCHECK(it->second != nullptr);
+    return it->second;
+  }
+  return nullptr;
+}
+
+void ClassHierarchyAnalysis::RemoveDependencyFor(ArtMethod* method) {
+  auto it = cha_dependency_map_.find(method);
+  if (it != cha_dependency_map_.end()) {
+    auto dependents = it->second;
+    cha_dependency_map_.erase(it);
+    delete dependents;
+  }
+}
+
+void ClassHierarchyAnalysis::RemoveDependentsWithMethodHeaders(
+    const std::unordered_set<OatQuickMethodHeader*>& method_headers) {
+  // Iterate through all entries in the dependency map and remove any entry that
+  // contains one of those in method_headers.
+  for (auto map_it = cha_dependency_map_.begin(); map_it != cha_dependency_map_.end(); ) {
+    auto dependents = map_it->second;
+    for (auto vec_it = dependents->begin(); vec_it != dependents->end(); ) {
+      OatQuickMethodHeader* method_header = vec_it->second;
+      auto it = std::find(method_headers.begin(), method_headers.end(), method_header);
+      if (it != method_headers.end()) {
+        vec_it = dependents->erase(vec_it);
+      } else {
+        vec_it++;
+      }
+    }
+    // Remove the map entry if there are no more dependents.
+    if (dependents->empty()) {
+      map_it = cha_dependency_map_.erase(map_it);
+      delete dependents;
+    } else {
+      map_it++;
+    }
+  }
+}
+
+// This stack visitor walks the stack and for compiled code with certain method
+// headers, sets the should_deoptimize flag on stack to 1.
+// TODO: also set the register value to 1 when should_deoptimize is allocated in
+// a register.
+class CHAStackVisitor FINAL  : public StackVisitor {
+ public:
+  CHAStackVisitor(Thread* thread_in,
+                  Context* context,
+                  const std::unordered_set<OatQuickMethodHeader*>& method_headers)
+      : StackVisitor(thread_in, context, StackVisitor::StackWalkKind::kSkipInlinedFrames),
+        method_headers_(method_headers) {
+  }
+
+  bool VisitFrame() OVERRIDE REQUIRES_SHARED(Locks::mutator_lock_) {
+    ArtMethod* method = GetMethod();
+    if (method == nullptr || method->IsRuntimeMethod() || method->IsNative()) {
+      return true;
+    }
+    if (GetCurrentQuickFrame() == nullptr) {
+      // Not compiled code.
+      return true;
+    }
+    // Method may have multiple versions of compiled code. Check
+    // the method header to see if it has should_deoptimize flag.
+    const OatQuickMethodHeader* method_header = GetCurrentOatQuickMethodHeader();
+    if (!method_header->HasShouldDeoptimizeFlag()) {
+      // This compiled version doesn't have should_deoptimize flag. Skip.
+      return true;
+    }
+    auto it = std::find(method_headers_.begin(), method_headers_.end(), method_header);
+    if (it == method_headers_.end()) {
+      // Not in the list of method headers that should be deoptimized.
+      return true;
+    }
+
+    // The compiled code on stack is not valid anymore. Need to deoptimize.
+    SetShouldDeoptimizeFlag();
+
+    return true;
+  }
+
+ private:
+  void SetShouldDeoptimizeFlag() REQUIRES_SHARED(Locks::mutator_lock_) {
+    QuickMethodFrameInfo frame_info = GetCurrentQuickFrameInfo();
+    size_t frame_size = frame_info.FrameSizeInBytes();
+    uint8_t* sp = reinterpret_cast<uint8_t*>(GetCurrentQuickFrame());
+    size_t core_spill_size = POPCOUNT(frame_info.CoreSpillMask()) *
+        GetBytesPerGprSpillLocation(kRuntimeISA);
+    size_t fpu_spill_size = POPCOUNT(frame_info.FpSpillMask()) *
+        GetBytesPerFprSpillLocation(kRuntimeISA);
+    size_t offset = frame_size - core_spill_size - fpu_spill_size - kShouldDeoptimizeFlagSize;
+    uint8_t* should_deoptimize_addr = sp + offset;
+    // Set deoptimization flag to 1.
+    DCHECK(*should_deoptimize_addr == 0 || *should_deoptimize_addr == 1);
+    *should_deoptimize_addr = 1;
+  }
+
+  // Set of method headers for compiled code that should be deoptimized.
+  const std::unordered_set<OatQuickMethodHeader*>& method_headers_;
+
+  DISALLOW_COPY_AND_ASSIGN(CHAStackVisitor);
+};
+
+class CHACheckpoint FINAL : public Closure {
+ public:
+  explicit CHACheckpoint(const std::unordered_set<OatQuickMethodHeader*>& method_headers)
+      : barrier_(0),
+        method_headers_(method_headers) {}
+
+  void Run(Thread* thread) OVERRIDE {
+    // Note thread and self may not be equal if thread was already suspended at
+    // the point of the request.
+    Thread* self = Thread::Current();
+    ScopedObjectAccess soa(self);
+    CHAStackVisitor visitor(thread, nullptr, method_headers_);
+    visitor.WalkStack();
+    barrier_.Pass(self);
+  }
+
+  void WaitForThreadsToRunThroughCheckpoint(size_t threads_running_checkpoint) {
+    Thread* self = Thread::Current();
+    ScopedThreadStateChange tsc(self, kWaitingForCheckPointsToRun);
+    barrier_.Increment(self, threads_running_checkpoint);
+  }
+
+ private:
+  // The barrier to be passed through and for the requestor to wait upon.
+  Barrier barrier_;
+  // List of method headers for invalidated compiled code.
+  const std::unordered_set<OatQuickMethodHeader*>& method_headers_;
+
+  DISALLOW_COPY_AND_ASSIGN(CHACheckpoint);
+};
+
+void ClassHierarchyAnalysis::VerifyNonSingleImplementation(mirror::Class* verify_class,
+                                                           uint16_t verify_index) {
+  // Grab cha_lock_ to make sure all single-implementation updates are seen.
+  PointerSize image_pointer_size =
+      Runtime::Current()->GetClassLinker()->GetImagePointerSize();
+  MutexLock cha_mu(Thread::Current(), *Locks::cha_lock_);
+  while (verify_class != nullptr) {
+    if (verify_index >= verify_class->GetVTableLength()) {
+      return;
+    }
+    ArtMethod* verify_method = verify_class->GetVTableEntry(verify_index, image_pointer_size);
+    DCHECK(!verify_method->HasSingleImplementation())
+        << "class: " << verify_class->PrettyClass()
+        << " verify_method: " << verify_method->PrettyMethod(true);
+    verify_class = verify_class->GetSuperClass();
+  }
+}
+
+void ClassHierarchyAnalysis::CheckSingleImplementationInfo(
+    Handle<mirror::Class> klass,
+    ArtMethod* virtual_method,
+    ArtMethod* method_in_super,
+    std::unordered_set<ArtMethod*>& invalidated_single_impl_methods) {
+  // TODO: if klass is not instantiable, virtual_method isn't invocable yet so
+  // even if it overrides, it doesn't invalidate single-implementation
+  // assumption.
+
+  DCHECK_NE(virtual_method, method_in_super);
+  DCHECK(method_in_super->GetDeclaringClass()->IsResolved()) << "class isn't resolved";
+  // If virtual_method doesn't come from a default interface method, it should
+  // be supplied by klass.
+  DCHECK(virtual_method->IsCopied() ||
+         virtual_method->GetDeclaringClass() == klass.Get());
+
+  // A new virtual_method should set method_in_super to
+  // non-single-implementation (if not set already).
+  // We don't grab cha_lock_. Single-implementation flag won't be set to true
+  // again once it's set to false.
+  if (!method_in_super->HasSingleImplementation()) {
+    // method_in_super already has multiple implementations. All methods in the
+    // same vtable slots in its super classes should have
+    // non-single-implementation already.
+    if (kIsDebugBuild) {
+      VerifyNonSingleImplementation(klass->GetSuperClass()->GetSuperClass(),
+                                    method_in_super->GetMethodIndex());
+    }
+    return;
+  }
+
+  // Native methods don't have single-implementation flag set.
+  DCHECK(!method_in_super->IsNative());
+  // Invalidate method_in_super's single-implementation status.
+  invalidated_single_impl_methods.insert(method_in_super);
+}
+
+void ClassHierarchyAnalysis::InitSingleImplementationFlag(Handle<mirror::Class> klass,
+                                                          ArtMethod* method) {
+  DCHECK(method->IsCopied() || method->GetDeclaringClass() == klass.Get());
+  if (klass->IsFinal() || method->IsFinal()) {
+    // Final classes or methods do not need CHA for devirtualization.
+    // This frees up modifier bits for intrinsics which currently are only
+    // used for static methods or methods of final classes.
+    return;
+  }
+  if (method->IsNative()) {
+    // Native method's invocation overhead is already high and it
+    // cannot be inlined. It's not worthwhile to devirtualize the
+    // call which can add a deoptimization point.
+    DCHECK(!method->HasSingleImplementation());
+  } else {
+    method->SetHasSingleImplementation(true);
+    if (method->IsAbstract()) {
+      // There is no real implementation yet.
+      // TODO: implement single-implementation logic for abstract methods.
+      DCHECK(method->GetSingleImplementation() == nullptr);
+    } else {
+      // Single implementation of non-abstract method is itself.
+      DCHECK_EQ(method->GetSingleImplementation(), method);
+    }
+  }
+}
+
+void ClassHierarchyAnalysis::UpdateAfterLoadingOf(Handle<mirror::Class> klass) {
+  if (klass->IsInterface()) {
+    return;
+  }
+  mirror::Class* super_class = klass->GetSuperClass();
+  if (super_class == nullptr) {
+    return;
+  }
+
+  // Keeps track of all methods whose single-implementation assumption
+  // is invalidated by linking `klass`.
+  std::unordered_set<ArtMethod*> invalidated_single_impl_methods;
+
+  PointerSize image_pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
+  // Do an entry-by-entry comparison of vtable contents with super's vtable.
+  for (int32_t i = 0; i < super_class->GetVTableLength(); ++i) {
+    ArtMethod* method = klass->GetVTableEntry(i, image_pointer_size);
+    ArtMethod* method_in_super = super_class->GetVTableEntry(i, image_pointer_size);
+    if (method == method_in_super) {
+      // vtable slot entry is inherited from super class.
+      continue;
+    }
+    InitSingleImplementationFlag(klass, method);
+    CheckSingleImplementationInfo(klass,
+                                  method,
+                                  method_in_super,
+                                  invalidated_single_impl_methods);
+  }
+
+  // For new virtual methods that don't override.
+  for (int32_t i = super_class->GetVTableLength(); i < klass->GetVTableLength(); ++i) {
+    ArtMethod* method = klass->GetVTableEntry(i, image_pointer_size);
+    InitSingleImplementationFlag(klass, method);
+  }
+
+  Runtime* const runtime = Runtime::Current();
+  if (!invalidated_single_impl_methods.empty()) {
+    Thread *self = Thread::Current();
+    // Method headers for compiled code to be invalidated.
+    std::unordered_set<OatQuickMethodHeader*> dependent_method_headers;
+
+    {
+      // We do this under cha_lock_. Committing code also grabs this lock to
+      // make sure the code is only committed when all single-implementation
+      // assumptions are still true.
+      MutexLock cha_mu(self, *Locks::cha_lock_);
+      // Invalidate compiled methods that assume some virtual calls have only
+      // single implementations.
+      for (ArtMethod* invalidated : invalidated_single_impl_methods) {
+        if (!invalidated->HasSingleImplementation()) {
+          // It might have been invalidated already when other class linking is
+          // going on.
+          continue;
+        }
+        invalidated->SetHasSingleImplementation(false);
+
+        if (runtime->IsAotCompiler()) {
+          // No need to invalidate any compiled code as the AotCompiler doesn't
+          // run any code.
+          continue;
+        }
+
+        // Invalidate all dependents.
+        auto dependents = GetDependents(invalidated);
+        if (dependents == nullptr) {
+          continue;
+        }
+        for (const auto& dependent : *dependents) {
+          ArtMethod* method = dependent.first;;
+          OatQuickMethodHeader* method_header = dependent.second;
+          VLOG(class_linker) << "CHA invalidated compiled code for " << method->PrettyMethod();
+          DCHECK(runtime->UseJitCompilation());
+          runtime->GetJit()->GetCodeCache()->InvalidateCompiledCodeFor(
+              method, method_header);
+          dependent_method_headers.insert(method_header);
+        }
+        RemoveDependencyFor(invalidated);
+      }
+    }
+
+    if (dependent_method_headers.empty()) {
+      return;
+    }
+    // Deoptimze compiled code on stack that should have been invalidated.
+    CHACheckpoint checkpoint(dependent_method_headers);
+    size_t threads_running_checkpoint = runtime->GetThreadList()->RunCheckpoint(&checkpoint);
+    if (threads_running_checkpoint != 0) {
+      checkpoint.WaitForThreadsToRunThroughCheckpoint(threads_running_checkpoint);
+    }
+  }
+}
+
+}  // namespace art
diff --git a/runtime/cha.h b/runtime/cha.h
new file mode 100644
index 0000000..ada5c89
--- /dev/null
+++ b/runtime/cha.h
@@ -0,0 +1,144 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_CHA_H_
+#define ART_RUNTIME_CHA_H_
+
+#include "art_method.h"
+#include "base/enums.h"
+#include "base/mutex.h"
+#include "handle.h"
+#include "mirror/class.h"
+#include "oat_quick_method_header.h"
+#include <unordered_map>
+#include <unordered_set>
+
+namespace art {
+
+/**
+ * Class Hierarchy Analysis (CHA) tries to devirtualize virtual calls into
+ * direct calls based on the info generated by analyzing class hierarchies.
+ * If a class is not subclassed, or even if it's subclassed but one of its
+ * virtual methods isn't overridden, a virtual call for that method can be
+ * changed into a direct call.
+ *
+ * Each virtual method carries a single-implementation status. The status is
+ * incrementally maintained at the end of class linking time when method
+ * overriding takes effect.
+ *
+ * Compiler takes advantage of the single-implementation info of a
+ * method. If a method A has the single-implementation flag set, the compiler
+ * devirtualizes the virtual call for method A into a direct call, and
+ * further try to inline the direct call as a result. The compiler will
+ * also register a dependency that the compiled code depends on the
+ * assumption that method A has single-implementation status.
+ *
+ * When single-implementation info is updated at the end of class linking,
+ * and if method A's single-implementation status is invalidated, all compiled
+ * code that depends on the assumption that method A has single-implementation
+ * status need to be invalidated. Method entrypoints that have this dependency
+ * will be updated as a result. Method A can later be recompiled with less
+ * aggressive assumptions.
+ *
+ * For live compiled code that's on stack, deoptmization will be initiated
+ * to force the invalidated compiled code into interpreter mode to guarantee
+ * correctness. The deoptimization mechanism used is a hybrid of
+ * synchronous and asynchronous deoptimization. The synchronous deoptimization
+ * part checks a hidden local variable flag for the method, and if true,
+ * initiates deoptimization. The asynchronous deoptimization part issues a
+ * checkpoint that walks the stack and for any compiled code on the stack
+ * that should be deoptimized, set the hidden local variable value to be true.
+ *
+ * A cha_lock_ needs to be held for updating single-implementation status,
+ * and registering/unregistering CHA dependencies. Registering CHA dependency
+ * and making compiled code visible also need to be atomic. Otherwise, we
+ * may miss invalidating CHA dependents or making compiled code visible even
+ * after it is invalidated. Care needs to be taken between cha_lock_ and
+ * JitCodeCache::lock_ to guarantee the atomicity.
+ *
+ * We base our CHA on dynamically linked class profiles instead of doing static
+ * analysis. Static analysis can be too aggressive due to dynamic class loading
+ * at runtime, and too conservative since some classes may not be really loaded
+ * at runtime.
+ */
+class ClassHierarchyAnalysis {
+ public:
+  // Types for recording CHA dependencies.
+  // For invalidating CHA dependency, we need to know both the ArtMethod and
+  // the method header. If the ArtMethod has compiled code with the method header
+  // as the entrypoint, we update the entrypoint to the interpreter bridge.
+  // We will also deoptimize frames that are currently executing the code of
+  // the method header.
+  typedef std::pair<ArtMethod*, OatQuickMethodHeader*> MethodAndMethodHeaderPair;
+  typedef std::vector<MethodAndMethodHeaderPair> ListOfDependentPairs;
+
+  ClassHierarchyAnalysis() {}
+
+  // Add a dependency that compiled code with `dependent_header` for `dependent_method`
+  // assumes that virtual `method` has single-implementation.
+  void AddDependency(ArtMethod* method,
+                     ArtMethod* dependent_method,
+                     OatQuickMethodHeader* dependent_header) REQUIRES(Locks::cha_lock_);
+
+  // Return compiled code that assumes that `method` has single-implementation.
+  std::vector<MethodAndMethodHeaderPair>* GetDependents(ArtMethod* method)
+      REQUIRES(Locks::cha_lock_);
+
+  // Remove dependency tracking for compiled code that assumes that
+  // `method` has single-implementation.
+  void RemoveDependencyFor(ArtMethod* method) REQUIRES(Locks::cha_lock_);
+
+  // Remove from cha_dependency_map_ all entries that contain OatQuickMethodHeader from
+  // the given `method_headers` set.
+  // This is used when some compiled code is freed.
+  void RemoveDependentsWithMethodHeaders(
+      const std::unordered_set<OatQuickMethodHeader*>& method_headers)
+      REQUIRES(Locks::cha_lock_);
+
+  // Update CHA info for methods that `klass` overrides, after loading `klass`.
+  void UpdateAfterLoadingOf(Handle<mirror::Class> klass) REQUIRES_SHARED(Locks::mutator_lock_);
+
+ private:
+  void InitSingleImplementationFlag(Handle<mirror::Class> klass, ArtMethod* method)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
+  // `virtual_method` in `klass` overrides `method_in_super`.
+  // This will invalidate some assumptions on single-implementation.
+  // Append methods that should have their single-implementation flag invalidated
+  // to `invalidated_single_impl_methods`.
+  void CheckSingleImplementationInfo(
+      Handle<mirror::Class> klass,
+      ArtMethod* virtual_method,
+      ArtMethod* method_in_super,
+      std::unordered_set<ArtMethod*>& invalidated_single_impl_methods)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
+  // Verify all methods in the same vtable slot from verify_class and its supers
+  // don't have single-implementation.
+  void VerifyNonSingleImplementation(mirror::Class* verify_class, uint16_t verify_index)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
+  // A map that maps a method to a set of compiled code that assumes that method has a
+  // single implementation, which is used to do CHA-based devirtualization.
+  std::unordered_map<ArtMethod*, ListOfDependentPairs*> cha_dependency_map_
+    GUARDED_BY(Locks::cha_lock_);
+
+  DISALLOW_COPY_AND_ASSIGN(ClassHierarchyAnalysis);
+};
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_CHA_H_
diff --git a/runtime/cha_test.cc b/runtime/cha_test.cc
new file mode 100644
index 0000000..d2f335e
--- /dev/null
+++ b/runtime/cha_test.cc
@@ -0,0 +1,93 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "cha.h"
+
+#include "common_runtime_test.h"
+
+namespace art {
+
+class CHATest : public CommonRuntimeTest {};
+
+// Mocks some methods.
+#define METHOD1 (reinterpret_cast<ArtMethod*>(8u))
+#define METHOD2 (reinterpret_cast<ArtMethod*>(16u))
+#define METHOD3 (reinterpret_cast<ArtMethod*>(24u))
+
+// Mocks some method headers.
+#define METHOD_HEADER1 (reinterpret_cast<OatQuickMethodHeader*>(128u))
+#define METHOD_HEADER2 (reinterpret_cast<OatQuickMethodHeader*>(136u))
+#define METHOD_HEADER3 (reinterpret_cast<OatQuickMethodHeader*>(144u))
+
+TEST_F(CHATest, CHACheckDependency) {
+  ClassHierarchyAnalysis cha;
+  MutexLock cha_mu(Thread::Current(), *Locks::cha_lock_);
+
+  ASSERT_EQ(cha.GetDependents(METHOD1), nullptr);
+  ASSERT_EQ(cha.GetDependents(METHOD2), nullptr);
+  ASSERT_EQ(cha.GetDependents(METHOD3), nullptr);
+
+  cha.AddDependency(METHOD1, METHOD2, METHOD_HEADER2);
+  ASSERT_EQ(cha.GetDependents(METHOD2), nullptr);
+  ASSERT_EQ(cha.GetDependents(METHOD3), nullptr);
+  auto dependents = cha.GetDependents(METHOD1);
+  ASSERT_EQ(dependents->size(), 1u);
+  ASSERT_EQ(dependents->at(0).first, METHOD2);
+  ASSERT_EQ(dependents->at(0).second, METHOD_HEADER2);
+
+  cha.AddDependency(METHOD1, METHOD3, METHOD_HEADER3);
+  ASSERT_EQ(cha.GetDependents(METHOD2), nullptr);
+  ASSERT_EQ(cha.GetDependents(METHOD3), nullptr);
+  dependents = cha.GetDependents(METHOD1);
+  ASSERT_EQ(dependents->size(), 2u);
+  ASSERT_EQ(dependents->at(0).first, METHOD2);
+  ASSERT_EQ(dependents->at(0).second, METHOD_HEADER2);
+  ASSERT_EQ(dependents->at(1).first, METHOD3);
+  ASSERT_EQ(dependents->at(1).second, METHOD_HEADER3);
+
+  std::unordered_set<OatQuickMethodHeader*> headers;
+  headers.insert(METHOD_HEADER2);
+  cha.RemoveDependentsWithMethodHeaders(headers);
+  ASSERT_EQ(cha.GetDependents(METHOD2), nullptr);
+  ASSERT_EQ(cha.GetDependents(METHOD3), nullptr);
+  dependents = cha.GetDependents(METHOD1);
+  ASSERT_EQ(dependents->size(), 1u);
+  ASSERT_EQ(dependents->at(0).first, METHOD3);
+  ASSERT_EQ(dependents->at(0).second, METHOD_HEADER3);
+
+  cha.AddDependency(METHOD2, METHOD1, METHOD_HEADER1);
+  ASSERT_EQ(cha.GetDependents(METHOD3), nullptr);
+  dependents = cha.GetDependents(METHOD1);
+  ASSERT_EQ(dependents->size(), 1u);
+  dependents = cha.GetDependents(METHOD2);
+  ASSERT_EQ(dependents->size(), 1u);
+
+  headers.insert(METHOD_HEADER3);
+  cha.RemoveDependentsWithMethodHeaders(headers);
+  ASSERT_EQ(cha.GetDependents(METHOD1), nullptr);
+  ASSERT_EQ(cha.GetDependents(METHOD3), nullptr);
+  dependents = cha.GetDependents(METHOD2);
+  ASSERT_EQ(dependents->size(), 1u);
+  ASSERT_EQ(dependents->at(0).first, METHOD1);
+  ASSERT_EQ(dependents->at(0).second, METHOD_HEADER1);
+
+  cha.RemoveDependencyFor(METHOD2);
+  ASSERT_EQ(cha.GetDependents(METHOD1), nullptr);
+  ASSERT_EQ(cha.GetDependents(METHOD2), nullptr);
+  ASSERT_EQ(cha.GetDependents(METHOD3), nullptr);
+}
+
+}  // namespace art
diff --git a/runtime/check_jni.cc b/runtime/check_jni.cc
index 5399dc5..6c27bc6 100644
--- a/runtime/check_jni.cc
+++ b/runtime/check_jni.cc
@@ -282,7 +282,7 @@
       return false;
     }
 
-    ArtField* f = CheckFieldID(soa, fid);
+    ArtField* f = CheckFieldID(fid);
     if (f == nullptr) {
       return false;
     }
@@ -313,7 +313,7 @@
   bool CheckMethodAndSig(ScopedObjectAccess& soa, jobject jobj, jclass jc,
                          jmethodID mid, Primitive::Type type, InvokeType invoke)
       REQUIRES_SHARED(Locks::mutator_lock_) {
-    ArtMethod* m = CheckMethodID(soa, mid);
+    ArtMethod* m = CheckMethodID(mid);
     if (m == nullptr) {
       return false;
     }
@@ -362,7 +362,7 @@
   bool CheckStaticFieldID(ScopedObjectAccess& soa, jclass java_class, jfieldID fid)
       REQUIRES_SHARED(Locks::mutator_lock_) {
     ObjPtr<mirror::Class> c = soa.Decode<mirror::Class>(java_class);
-    ArtField* f = CheckFieldID(soa, fid);
+    ArtField* f = CheckFieldID(fid);
     if (f == nullptr) {
       return false;
     }
@@ -385,7 +385,7 @@
    */
   bool CheckStaticMethod(ScopedObjectAccess& soa, jclass java_class, jmethodID mid)
       REQUIRES_SHARED(Locks::mutator_lock_) {
-    ArtMethod* m = CheckMethodID(soa, mid);
+    ArtMethod* m = CheckMethodID(mid);
     if (m == nullptr) {
       return false;
     }
@@ -407,7 +407,7 @@
    */
   bool CheckVirtualMethod(ScopedObjectAccess& soa, jobject java_object, jmethodID mid)
       REQUIRES_SHARED(Locks::mutator_lock_) {
-    ArtMethod* m = CheckMethodID(soa, mid);
+    ArtMethod* m = CheckMethodID(mid);
     if (m == nullptr) {
       return false;
     }
@@ -577,9 +577,8 @@
     return true;
   }
 
-  bool CheckConstructor(ScopedObjectAccess& soa, jmethodID mid)
-      REQUIRES_SHARED(Locks::mutator_lock_) {
-    ArtMethod* method = soa.DecodeMethod(mid);
+  bool CheckConstructor(jmethodID mid) REQUIRES_SHARED(Locks::mutator_lock_) {
+    ArtMethod* method = jni::DecodeArtMethod(mid);
     if (method == nullptr) {
       AbortF("expected non-null constructor");
       return false;
@@ -682,7 +681,7 @@
     if (!is_static && !CheckInstanceFieldID(soa, obj, fid)) {
       return false;
     }
-    ArtField* field = soa.DecodeField(fid);
+    ArtField* field = jni::DecodeArtField(fid);
     DCHECK(field != nullptr);  // Already checked by Check.
     if (is_static != field->IsStatic()) {
       AbortF("attempt to access %s field %s: %p",
@@ -844,9 +843,9 @@
       case 'c':  // jclass
         return CheckInstance(soa, kClass, arg.c, false);
       case 'f':  // jfieldID
-        return CheckFieldID(soa, arg.f) != nullptr;
+        return CheckFieldID(arg.f) != nullptr;
       case 'm':  // jmethodID
-        return CheckMethodID(soa, arg.m) != nullptr;
+        return CheckMethodID(arg.m) != nullptr;
       case 'r':  // release int
         return CheckReleaseMode(arg.r);
       case 's':  // jstring
@@ -868,7 +867,7 @@
       REQUIRES_SHARED(Locks::mutator_lock_) {
     CHECK(args_p != nullptr);
     VarArgs args(args_p->Clone());
-    ArtMethod* m = CheckMethodID(soa, args.GetMethodID());
+    ArtMethod* m = CheckMethodID(args.GetMethodID());
     if (m == nullptr) {
       return false;
     }
@@ -962,7 +961,7 @@
       }
       case 'f': {  // jfieldID
         jfieldID fid = arg.f;
-        ArtField* f = soa.DecodeField(fid);
+        ArtField* f = jni::DecodeArtField(fid);
         *msg += ArtField::PrettyField(f);
         if (!entry) {
           StringAppendF(msg, " (%p)", fid);
@@ -971,7 +970,7 @@
       }
       case 'm': {  // jmethodID
         jmethodID mid = arg.m;
-        ArtMethod* m = soa.DecodeMethod(mid);
+        ArtMethod* m = jni::DecodeArtMethod(mid);
         *msg += ArtMethod::PrettyMethod(m);
         if (!entry) {
           StringAppendF(msg, " (%p)", mid);
@@ -981,7 +980,7 @@
       case '.': {
         const VarArgs* va = arg.va;
         VarArgs args(va->Clone());
-        ArtMethod* m = soa.DecodeMethod(args.GetMethodID());
+        ArtMethod* m = jni::DecodeArtMethod(args.GetMethodID());
         uint32_t len;
         const char* shorty = m->GetShorty(&len);
         CHECK_GE(len, 1u);
@@ -1147,13 +1146,12 @@
     return true;
   }
 
-  ArtField* CheckFieldID(ScopedObjectAccess& soa, jfieldID fid)
-      REQUIRES_SHARED(Locks::mutator_lock_) {
+  ArtField* CheckFieldID(jfieldID fid) REQUIRES_SHARED(Locks::mutator_lock_) {
     if (fid == nullptr) {
       AbortF("jfieldID was NULL");
       return nullptr;
     }
-    ArtField* f = soa.DecodeField(fid);
+    ArtField* f = jni::DecodeArtField(fid);
     // TODO: Better check here.
     if (!Runtime::Current()->GetHeap()->IsValidObjectAddress(f->GetDeclaringClass().Ptr())) {
       Runtime::Current()->GetHeap()->DumpSpaces(LOG_STREAM(ERROR));
@@ -1163,13 +1161,12 @@
     return f;
   }
 
-  ArtMethod* CheckMethodID(ScopedObjectAccess& soa, jmethodID mid)
-      REQUIRES_SHARED(Locks::mutator_lock_) {
+  ArtMethod* CheckMethodID(jmethodID mid) REQUIRES_SHARED(Locks::mutator_lock_) {
     if (mid == nullptr) {
       AbortF("jmethodID was NULL");
       return nullptr;
     }
-    ArtMethod* m = soa.DecodeMethod(mid);
+    ArtMethod* m = jni::DecodeArtMethod(mid);
     // TODO: Better check here.
     if (!Runtime::Current()->GetHeap()->IsValidObjectAddress(m->GetDeclaringClass())) {
       Runtime::Current()->GetHeap()->DumpSpaces(LOG_STREAM(ERROR));
@@ -2005,7 +2002,7 @@
     VarArgs rest(mid, vargs);
     JniValueType args[4] = {{.E = env}, {.c = c}, {.m = mid}, {.va = &rest}};
     if (sc.Check(soa, true, "Ecm.", args) && sc.CheckInstantiableNonArray(soa, c) &&
-        sc.CheckConstructor(soa, mid)) {
+        sc.CheckConstructor(mid)) {
       JniValueType result;
       result.L = baseEnv(env)->NewObjectV(env, c, mid, vargs);
       if (sc.Check(soa, false, "L", &result)) {
@@ -2029,7 +2026,7 @@
     VarArgs rest(mid, vargs);
     JniValueType args[4] = {{.E = env}, {.c = c}, {.m = mid}, {.va = &rest}};
     if (sc.Check(soa, true, "Ecm.", args) && sc.CheckInstantiableNonArray(soa, c) &&
-        sc.CheckConstructor(soa, mid)) {
+        sc.CheckConstructor(mid)) {
       JniValueType result;
       result.L = baseEnv(env)->NewObjectA(env, c, mid, vargs);
       if (sc.Check(soa, false, "L", &result)) {
diff --git a/runtime/class_linker-inl.h b/runtime/class_linker-inl.h
index 350855b..7005c29 100644
--- a/runtime/class_linker-inl.h
+++ b/runtime/class_linker-inl.h
@@ -65,14 +65,15 @@
   return array_class.Ptr();
 }
 
-inline mirror::String* ClassLinker::ResolveString(uint32_t string_idx, ArtMethod* referrer) {
+inline mirror::String* ClassLinker::ResolveString(dex::StringIndex string_idx,
+                                                  ArtMethod* referrer) {
   Thread::PoisonObjectPointersIfDebug();
   ObjPtr<mirror::Class> declaring_class = referrer->GetDeclaringClass();
   // MethodVerifier refuses methods with string_idx out of bounds.
-  DCHECK_LT(string_idx, declaring_class->GetDexFile().NumStringIds());;
+  DCHECK_LT(string_idx.index_, declaring_class->GetDexFile().NumStringIds());
   ObjPtr<mirror::String> string =
         mirror::StringDexCachePair::Lookup(declaring_class->GetDexCacheStrings(),
-                                           string_idx,
+                                           string_idx.index_,
                                            mirror::DexCache::kDexCacheStringCacheSize).Read();
   if (UNLIKELY(string == nullptr)) {
     StackHandleScope<1> hs(Thread::Current());
@@ -86,7 +87,7 @@
   return string.Ptr();
 }
 
-inline mirror::Class* ClassLinker::ResolveType(uint16_t type_idx, ArtMethod* referrer) {
+inline mirror::Class* ClassLinker::ResolveType(dex::TypeIndex type_idx, ArtMethod* referrer) {
   Thread::PoisonObjectPointersIfDebug();
   ObjPtr<mirror::Class> resolved_type =
       referrer->GetDexCacheResolvedType(type_idx, image_pointer_size_);
@@ -103,7 +104,7 @@
   return resolved_type.Ptr();
 }
 
-inline mirror::Class* ClassLinker::ResolveType(uint16_t type_idx, ArtField* referrer) {
+inline mirror::Class* ClassLinker::ResolveType(dex::TypeIndex type_idx, ArtField* referrer) {
   Thread::PoisonObjectPointersIfDebug();
   ObjPtr<mirror::Class> declaring_class = referrer->GetDeclaringClass();
   ObjPtr<mirror::DexCache> dex_cache_ptr = declaring_class->GetDexCache();
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index 4823caa..e9f5978 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -40,6 +40,7 @@
 #include "base/time_utils.h"
 #include "base/unix_file/fd_file.h"
 #include "base/value_object.h"
+#include "cha.h"
 #include "class_linker-inl.h"
 #include "class_table-inl.h"
 #include "compiler_callbacks.h"
@@ -63,10 +64,12 @@
 #include "jit/jit.h"
 #include "jit/jit_code_cache.h"
 #include "jit/offline_profiling_info.h"
+#include "jni_internal.h"
 #include "leb128.h"
 #include "linear_alloc.h"
 #include "mirror/class.h"
 #include "mirror/class-inl.h"
+#include "mirror/class_ext.h"
 #include "mirror/class_loader.h"
 #include "mirror/dex_cache.h"
 #include "mirror/dex_cache-inl.h"
@@ -94,6 +97,7 @@
 #include "ScopedLocalRef.h"
 #include "scoped_thread_state_change-inl.h"
 #include "thread-inl.h"
+#include "thread_list.h"
 #include "trace.h"
 #include "utils.h"
 #include "utils/dex_cache_arrays_layout-inl.h"
@@ -136,10 +140,22 @@
   return exception_init_method != nullptr;
 }
 
-// Helper for ThrowEarlierClassFailure. Throws the stored error.
-static void HandleEarlierVerifyError(Thread* self, ClassLinker* class_linker, ObjPtr<mirror::Class> c)
+static mirror::Object* GetVerifyError(ObjPtr<mirror::Class> c)
     REQUIRES_SHARED(Locks::mutator_lock_) {
-  ObjPtr<mirror::Object> obj = c->GetVerifyError();
+  ObjPtr<mirror::ClassExt> ext(c->GetExtData());
+  if (ext == nullptr) {
+    return nullptr;
+  } else {
+    return ext->GetVerifyError();
+  }
+}
+
+// Helper for ThrowEarlierClassFailure. Throws the stored error.
+static void HandleEarlierVerifyError(Thread* self,
+                                     ClassLinker* class_linker,
+                                     ObjPtr<mirror::Class> c)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  ObjPtr<mirror::Object> obj = GetVerifyError(c);
   DCHECK(obj != nullptr);
   self->AssertNoPendingException();
   if (obj->IsClass()) {
@@ -173,8 +189,8 @@
   Runtime* const runtime = Runtime::Current();
   if (!runtime->IsAotCompiler()) {  // Give info if this occurs at runtime.
     std::string extra;
-    if (c->GetVerifyError() != nullptr) {
-      ObjPtr<mirror::Object> verify_error = c->GetVerifyError();
+    if (GetVerifyError(c) != nullptr) {
+      ObjPtr<mirror::Object> verify_error = GetVerifyError(c);
       if (verify_error->IsClass()) {
         extra = mirror::Class::PrettyDescriptor(verify_error->AsClass());
       } else {
@@ -192,11 +208,14 @@
     ObjPtr<mirror::Throwable> pre_allocated = runtime->GetPreAllocatedNoClassDefFoundError();
     self->SetException(pre_allocated);
   } else {
-    if (c->GetVerifyError() != nullptr) {
+    if (GetVerifyError(c) != nullptr) {
       // Rethrow stored error.
       HandleEarlierVerifyError(self, this, c);
     }
-    if (c->GetVerifyError() == nullptr || wrap_in_no_class_def) {
+    // TODO This might be wrong if we hit an OOME while allocating the ClassExt. In that case we
+    // might have meant to go down the earlier if statement with the original error but it got
+    // swallowed by the OOM so we end up here.
+    if (GetVerifyError(c) == nullptr || wrap_in_no_class_def) {
       // If there isn't a recorded earlier error, or this is a repeat throw from initialization,
       // the top-level exception must be a NoClassDefFoundError. The potentially already pending
       // exception will be a cause.
@@ -223,6 +242,12 @@
   ScopedLocalRef<jthrowable> cause(env, env->ExceptionOccurred());
   CHECK(cause.get() != nullptr);
 
+  // Boot classpath classes should not fail initialization.
+  if (!Runtime::Current()->IsAotCompiler()) {
+    std::string tmp;
+    CHECK(klass->GetClassLoader() != nullptr) << klass->GetDescriptor(&tmp);
+  }
+
   env->ExceptionClear();
   bool is_error = env->IsInstanceOf(cause.get(), WellKnownClasses::java_lang_Error);
   env->Throw(cause.get());
@@ -378,8 +403,8 @@
   CHECK(java_lang_Class.Get() != nullptr);
   mirror::Class::SetClassClass(java_lang_Class.Get());
   java_lang_Class->SetClass(java_lang_Class.Get());
-  if (kUseBakerOrBrooksReadBarrier) {
-    java_lang_Class->AssertReadBarrierPointer();
+  if (kUseBakerReadBarrier) {
+    java_lang_Class->AssertReadBarrierState();
   }
   java_lang_Class->SetClassSize(class_class_size);
   java_lang_Class->SetPrimitiveType(Primitive::kPrimNot);
@@ -458,6 +483,9 @@
   SetClassRoot(kJavaLangString, java_lang_String.Get());
   SetClassRoot(kJavaLangRefReference, java_lang_ref_Reference.Get());
 
+  // Fill in the empty iftable. Needs to be done after the kObjectArrayClass root is set.
+  java_lang_Object->SetIfTable(AllocIfTable(self, 0));
+
   // Setup the primitive type classes.
   SetClassRoot(kPrimitiveBoolean, CreatePrimitiveClass(self, Primitive::kPrimBoolean));
   SetClassRoot(kPrimitiveByte, CreatePrimitiveClass(self, Primitive::kPrimByte));
@@ -495,6 +523,14 @@
   java_lang_DexCache->SetObjectSize(mirror::DexCache::InstanceSize());
   mirror::Class::SetStatus(java_lang_DexCache, mirror::Class::kStatusResolved, self);
 
+
+  // Setup dalvik.system.ClassExt
+  Handle<mirror::Class> dalvik_system_ClassExt(hs.NewHandle(
+      AllocClass(self, java_lang_Class.Get(), mirror::ClassExt::ClassSize(image_pointer_size_))));
+  SetClassRoot(kDalvikSystemClassExt, dalvik_system_ClassExt.Get());
+  mirror::ClassExt::SetClass(dalvik_system_ClassExt.Get());
+  mirror::Class::SetStatus(dalvik_system_ClassExt, mirror::Class::kStatusResolved, self);
+
   // Set up array classes for string, field, method
   Handle<mirror::Class> object_array_string(hs.NewHandle(
       AllocClass(self, java_lang_Class.Get(),
@@ -540,7 +576,7 @@
     quick_to_interpreter_bridge_trampoline_ = GetQuickToInterpreterBridge();
   }
 
-  // Object, String and DexCache need to be rerun through FindSystemClass to finish init
+  // Object, String, ClassExt and DexCache need to be rerun through FindSystemClass to finish init
   mirror::Class::SetStatus(java_lang_Object, mirror::Class::kStatusNotReady, self);
   CheckSystemClass(self, java_lang_Object, "Ljava/lang/Object;");
   CHECK_EQ(java_lang_Object->GetObjectSize(), mirror::Object::InstanceSize());
@@ -549,6 +585,9 @@
   mirror::Class::SetStatus(java_lang_DexCache, mirror::Class::kStatusNotReady, self);
   CheckSystemClass(self, java_lang_DexCache, "Ljava/lang/DexCache;");
   CHECK_EQ(java_lang_DexCache->GetObjectSize(), mirror::DexCache::InstanceSize());
+  mirror::Class::SetStatus(dalvik_system_ClassExt, mirror::Class::kStatusNotReady, self);
+  CheckSystemClass(self, dalvik_system_ClassExt, "Ldalvik/system/ClassExt;");
+  CHECK_EQ(dalvik_system_ClassExt->GetObjectSize(), mirror::ClassExt::InstanceSize());
 
   // Setup the primitive array type classes - can't be done until Object has a vtable.
   SetClassRoot(kBooleanArrayClass, FindSystemClass(self, "[Z"));
@@ -699,10 +738,12 @@
     const DexFile& dex_file = java_lang_Object->GetDexFile();
     const DexFile::TypeId* void_type_id = dex_file.FindTypeId("V");
     CHECK(void_type_id != nullptr);
-    uint16_t void_type_idx = dex_file.GetIndexForTypeId(*void_type_id);
+    dex::TypeIndex void_type_idx = dex_file.GetIndexForTypeId(*void_type_id);
     // Now we resolve void type so the dex cache contains it. We use java.lang.Object class
     // as referrer so the used dex cache is core's one.
-    ObjPtr<mirror::Class> resolved_type = ResolveType(dex_file, void_type_idx, java_lang_Object.Get());
+    ObjPtr<mirror::Class> resolved_type = ResolveType(dex_file,
+                                                      void_type_idx,
+                                                      java_lang_Object.Get());
     CHECK_EQ(resolved_type, GetClassRoot(kPrimitiveVoid));
     self->AssertNoPendingException();
   }
@@ -781,125 +822,6 @@
   }
 }
 
-static void SanityCheckArtMethod(ArtMethod* m,
-                                 ObjPtr<mirror::Class> expected_class,
-                                 const std::vector<gc::space::ImageSpace*>& spaces)
-    REQUIRES_SHARED(Locks::mutator_lock_) {
-  if (m->IsRuntimeMethod()) {
-    ObjPtr<mirror::Class> declaring_class = m->GetDeclaringClassUnchecked();
-    CHECK(declaring_class == nullptr) << declaring_class << " " << m->PrettyMethod();
-  } else if (m->IsCopied()) {
-    CHECK(m->GetDeclaringClass() != nullptr) << m->PrettyMethod();
-  } else if (expected_class != nullptr) {
-    CHECK_EQ(m->GetDeclaringClassUnchecked(), expected_class) << m->PrettyMethod();
-  }
-  if (!spaces.empty()) {
-    bool contains = false;
-    for (gc::space::ImageSpace* space : spaces) {
-      auto& header = space->GetImageHeader();
-      size_t offset = reinterpret_cast<uint8_t*>(m) - space->Begin();
-
-      const ImageSection& methods = header.GetMethodsSection();
-      contains = contains || methods.Contains(offset);
-
-      const ImageSection& runtime_methods = header.GetRuntimeMethodsSection();
-      contains = contains || runtime_methods.Contains(offset);
-    }
-    CHECK(contains) << m << " not found";
-  }
-}
-
-static void SanityCheckArtMethodPointerArray(ObjPtr<mirror::PointerArray> arr,
-                                             ObjPtr<mirror::Class> expected_class,
-                                             PointerSize pointer_size,
-                                             const std::vector<gc::space::ImageSpace*>& spaces)
-    REQUIRES_SHARED(Locks::mutator_lock_) {
-  CHECK(arr != nullptr);
-  for (int32_t j = 0; j < arr->GetLength(); ++j) {
-    auto* method = arr->GetElementPtrSize<ArtMethod*>(j, pointer_size);
-    // expected_class == null means we are a dex cache.
-    if (expected_class != nullptr) {
-      CHECK(method != nullptr);
-    }
-    if (method != nullptr) {
-      SanityCheckArtMethod(method, expected_class, spaces);
-    }
-  }
-}
-
-static void SanityCheckArtMethodPointerArray(ArtMethod** arr,
-                                             size_t size,
-                                             PointerSize pointer_size,
-                                             const std::vector<gc::space::ImageSpace*>& spaces)
-    REQUIRES_SHARED(Locks::mutator_lock_) {
-  CHECK_EQ(arr != nullptr, size != 0u);
-  if (arr != nullptr) {
-    bool contains = false;
-    for (auto space : spaces) {
-      auto offset = reinterpret_cast<uint8_t*>(arr) - space->Begin();
-      if (space->GetImageHeader().GetImageSection(
-          ImageHeader::kSectionDexCacheArrays).Contains(offset)) {
-        contains = true;
-        break;
-      }
-    }
-    CHECK(contains);
-  }
-  for (size_t j = 0; j < size; ++j) {
-    ArtMethod* method = mirror::DexCache::GetElementPtrSize(arr, j, pointer_size);
-    // expected_class == null means we are a dex cache.
-    if (method != nullptr) {
-      SanityCheckArtMethod(method, nullptr, spaces);
-    }
-  }
-}
-
-static void SanityCheckObjectsCallback(mirror::Object* obj, void* arg ATTRIBUTE_UNUSED)
-    REQUIRES_SHARED(Locks::mutator_lock_) {
-  DCHECK(obj != nullptr);
-  CHECK(obj->GetClass() != nullptr) << "Null class in object " << obj;
-  CHECK(obj->GetClass()->GetClass() != nullptr) << "Null class class " << obj;
-  if (obj->IsClass()) {
-    auto klass = obj->AsClass();
-    for (ArtField& field : klass->GetIFields()) {
-      CHECK_EQ(field.GetDeclaringClass(), klass);
-    }
-    for (ArtField& field : klass->GetSFields()) {
-      CHECK_EQ(field.GetDeclaringClass(), klass);
-    }
-    auto* runtime = Runtime::Current();
-    auto image_spaces = runtime->GetHeap()->GetBootImageSpaces();
-    auto pointer_size = runtime->GetClassLinker()->GetImagePointerSize();
-    for (auto& m : klass->GetMethods(pointer_size)) {
-      SanityCheckArtMethod(&m, klass, image_spaces);
-    }
-    auto* vtable = klass->GetVTable();
-    if (vtable != nullptr) {
-      SanityCheckArtMethodPointerArray(vtable, nullptr, pointer_size, image_spaces);
-    }
-    if (klass->ShouldHaveImt()) {
-      ImTable* imt = klass->GetImt(pointer_size);
-      for (size_t i = 0; i < ImTable::kSize; ++i) {
-        SanityCheckArtMethod(imt->Get(i, pointer_size), nullptr, image_spaces);
-      }
-    }
-    if (klass->ShouldHaveEmbeddedVTable()) {
-      for (int32_t i = 0; i < klass->GetEmbeddedVTableLength(); ++i) {
-        SanityCheckArtMethod(klass->GetEmbeddedVTableEntry(i, pointer_size), nullptr, image_spaces);
-      }
-    }
-    auto* iftable = klass->GetIfTable();
-    if (iftable != nullptr) {
-      for (int32_t i = 0; i < klass->GetIfTableCount(); ++i) {
-        if (iftable->GetMethodArrayCount(i) > 0) {
-          SanityCheckArtMethodPointerArray(
-              iftable->GetMethodArray(i), nullptr, pointer_size, image_spaces);
-        }
-      }
-    }
-  }
-}
-
 // Set image methods' entry point to interpreter.
 class SetInterpreterEntrypointArtMethodVisitor : public ArtMethodVisitor {
  public:
@@ -1066,6 +988,7 @@
   mirror::Throwable::SetClass(GetClassRoot(kJavaLangThrowable));
   mirror::StackTraceElement::SetClass(GetClassRoot(kJavaLangStackTraceElement));
   mirror::EmulatedStackFrame::SetClass(GetClassRoot(kDalvikSystemEmulatedStackFrame));
+  mirror::ClassExt::SetClass(GetClassRoot(kDalvikSystemClassExt));
 
   for (gc::space::ImageSpace* image_space : spaces) {
     // Boot class loader, use a null handle.
@@ -1096,13 +1019,12 @@
            class_loader->GetClass();
 }
 
-static mirror::String* GetDexPathListElementName(ScopedObjectAccessUnchecked& soa,
-                                                 ObjPtr<mirror::Object> element)
+static mirror::String* GetDexPathListElementName(ObjPtr<mirror::Object> element)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   ArtField* const dex_file_field =
-      soa.DecodeField(WellKnownClasses::dalvik_system_DexPathList__Element_dexFile);
+      jni::DecodeArtField(WellKnownClasses::dalvik_system_DexPathList__Element_dexFile);
   ArtField* const dex_file_name_field =
-      soa.DecodeField(WellKnownClasses::dalvik_system_DexFile_fileName);
+      jni::DecodeArtField(WellKnownClasses::dalvik_system_DexFile_fileName);
   DCHECK(dex_file_field != nullptr);
   DCHECK(dex_file_name_field != nullptr);
   DCHECK(element != nullptr);
@@ -1126,9 +1048,9 @@
   DCHECK(error_msg != nullptr);
   ScopedObjectAccessUnchecked soa(Thread::Current());
   ArtField* const dex_path_list_field =
-      soa.DecodeField(WellKnownClasses::dalvik_system_PathClassLoader_pathList);
+      jni::DecodeArtField(WellKnownClasses::dalvik_system_BaseDexClassLoader_pathList);
   ArtField* const dex_elements_field =
-      soa.DecodeField(WellKnownClasses::dalvik_system_DexPathList_dexElements);
+      jni::DecodeArtField(WellKnownClasses::dalvik_system_DexPathList_dexElements);
   CHECK(dex_path_list_field != nullptr);
   CHECK(dex_elements_field != nullptr);
   while (!ClassLinker::IsBootClassLoader(soa, class_loader)) {
@@ -1155,7 +1077,7 @@
             *error_msg = StringPrintf("Null dex element at index %d", i);
             return false;
           }
-          ObjPtr<mirror::String> const name = GetDexPathListElementName(soa, element);
+          ObjPtr<mirror::String> const name = GetDexPathListElementName(element);
           if (name == nullptr) {
             *error_msg = StringPrintf("Null name for dex element at index %d", i);
             return false;
@@ -1570,6 +1492,153 @@
   return true;
 }
 
+// Helper class for ArtMethod checks when adding an image. Keeps all required functionality
+// together and caches some intermediate results.
+class ImageSanityChecks FINAL {
+ public:
+  static void CheckObjects(gc::Heap* heap, ClassLinker* class_linker)
+      REQUIRES_SHARED(Locks::mutator_lock_) {
+    ImageSanityChecks isc(heap, class_linker);
+    heap->VisitObjects(ImageSanityChecks::SanityCheckObjectsCallback, &isc);
+  }
+
+  static void CheckPointerArray(gc::Heap* heap,
+                                ClassLinker* class_linker,
+                                ArtMethod** arr,
+                                size_t size)
+      REQUIRES_SHARED(Locks::mutator_lock_) {
+    ImageSanityChecks isc(heap, class_linker);
+    isc.SanityCheckArtMethodPointerArray(arr, size);
+  }
+
+  static void SanityCheckObjectsCallback(mirror::Object* obj, void* arg)
+      REQUIRES_SHARED(Locks::mutator_lock_) {
+    DCHECK(obj != nullptr);
+    CHECK(obj->GetClass() != nullptr) << "Null class in object " << obj;
+    CHECK(obj->GetClass()->GetClass() != nullptr) << "Null class class " << obj;
+    if (obj->IsClass()) {
+      ImageSanityChecks* isc = reinterpret_cast<ImageSanityChecks*>(arg);
+
+      auto klass = obj->AsClass();
+      for (ArtField& field : klass->GetIFields()) {
+        CHECK_EQ(field.GetDeclaringClass(), klass);
+      }
+      for (ArtField& field : klass->GetSFields()) {
+        CHECK_EQ(field.GetDeclaringClass(), klass);
+      }
+      const auto pointer_size = isc->pointer_size_;
+      for (auto& m : klass->GetMethods(pointer_size)) {
+        isc->SanityCheckArtMethod(&m, klass);
+      }
+      auto* vtable = klass->GetVTable();
+      if (vtable != nullptr) {
+        isc->SanityCheckArtMethodPointerArray(vtable, nullptr);
+      }
+      if (klass->ShouldHaveImt()) {
+        ImTable* imt = klass->GetImt(pointer_size);
+        for (size_t i = 0; i < ImTable::kSize; ++i) {
+          isc->SanityCheckArtMethod(imt->Get(i, pointer_size), nullptr);
+        }
+      }
+      if (klass->ShouldHaveEmbeddedVTable()) {
+        for (int32_t i = 0; i < klass->GetEmbeddedVTableLength(); ++i) {
+          isc->SanityCheckArtMethod(klass->GetEmbeddedVTableEntry(i, pointer_size), nullptr);
+        }
+      }
+      mirror::IfTable* iftable = klass->GetIfTable();
+      for (int32_t i = 0; i < klass->GetIfTableCount(); ++i) {
+        if (iftable->GetMethodArrayCount(i) > 0) {
+          isc->SanityCheckArtMethodPointerArray(iftable->GetMethodArray(i), nullptr);
+        }
+      }
+    }
+  }
+
+ private:
+  ImageSanityChecks(gc::Heap* heap, ClassLinker* class_linker)
+     :  spaces_(heap->GetBootImageSpaces()),
+        pointer_size_(class_linker->GetImagePointerSize()) {
+    space_begin_.reserve(spaces_.size());
+    method_sections_.reserve(spaces_.size());
+    runtime_method_sections_.reserve(spaces_.size());
+    for (gc::space::ImageSpace* space : spaces_) {
+      space_begin_.push_back(space->Begin());
+      auto& header = space->GetImageHeader();
+      method_sections_.push_back(&header.GetMethodsSection());
+      runtime_method_sections_.push_back(&header.GetRuntimeMethodsSection());
+    }
+  }
+
+  void SanityCheckArtMethod(ArtMethod* m, ObjPtr<mirror::Class> expected_class)
+      REQUIRES_SHARED(Locks::mutator_lock_) {
+    if (m->IsRuntimeMethod()) {
+      ObjPtr<mirror::Class> declaring_class = m->GetDeclaringClassUnchecked();
+      CHECK(declaring_class == nullptr) << declaring_class << " " << m->PrettyMethod();
+    } else if (m->IsCopied()) {
+      CHECK(m->GetDeclaringClass() != nullptr) << m->PrettyMethod();
+    } else if (expected_class != nullptr) {
+      CHECK_EQ(m->GetDeclaringClassUnchecked(), expected_class) << m->PrettyMethod();
+    }
+    if (!spaces_.empty()) {
+      bool contains = false;
+      for (size_t i = 0; !contains && i != space_begin_.size(); ++i) {
+        const size_t offset = reinterpret_cast<uint8_t*>(m) - space_begin_[i];
+        contains = method_sections_[i]->Contains(offset) ||
+            runtime_method_sections_[i]->Contains(offset);
+      }
+      CHECK(contains) << m << " not found";
+    }
+  }
+
+  void SanityCheckArtMethodPointerArray(ObjPtr<mirror::PointerArray> arr,
+                                        ObjPtr<mirror::Class> expected_class)
+      REQUIRES_SHARED(Locks::mutator_lock_) {
+    CHECK(arr != nullptr);
+    for (int32_t j = 0; j < arr->GetLength(); ++j) {
+      auto* method = arr->GetElementPtrSize<ArtMethod*>(j, pointer_size_);
+      // expected_class == null means we are a dex cache.
+      if (expected_class != nullptr) {
+        CHECK(method != nullptr);
+      }
+      if (method != nullptr) {
+        SanityCheckArtMethod(method, expected_class);
+      }
+    }
+  }
+
+  void SanityCheckArtMethodPointerArray(ArtMethod** arr, size_t size)
+      REQUIRES_SHARED(Locks::mutator_lock_) {
+    CHECK_EQ(arr != nullptr, size != 0u);
+    if (arr != nullptr) {
+      bool contains = false;
+      for (auto space : spaces_) {
+        auto offset = reinterpret_cast<uint8_t*>(arr) - space->Begin();
+        if (space->GetImageHeader().GetImageSection(
+            ImageHeader::kSectionDexCacheArrays).Contains(offset)) {
+          contains = true;
+          break;
+        }
+      }
+      CHECK(contains);
+    }
+    for (size_t j = 0; j < size; ++j) {
+      ArtMethod* method = mirror::DexCache::GetElementPtrSize(arr, j, pointer_size_);
+      // expected_class == null means we are a dex cache.
+      if (method != nullptr) {
+        SanityCheckArtMethod(method, nullptr);
+      }
+    }
+  }
+
+  const std::vector<gc::space::ImageSpace*>& spaces_;
+  const PointerSize pointer_size_;
+
+  // Cached sections from the spaces.
+  std::vector<const uint8_t*> space_begin_;
+  std::vector<const ImageSection*> method_sections_;
+  std::vector<const ImageSection*> runtime_method_sections_;
+};
+
 bool ClassLinker::AddImageSpace(
     gc::space::ImageSpace* space,
     Handle<mirror::ClassLoader> class_loader,
@@ -1660,10 +1729,10 @@
       }
     } else {
       if (kSanityCheckObjects) {
-        SanityCheckArtMethodPointerArray(h_dex_cache->GetResolvedMethods(),
-                                         h_dex_cache->NumResolvedMethods(),
-                                         image_pointer_size_,
-                                         heap->GetBootImageSpaces());
+        ImageSanityChecks::CheckPointerArray(heap,
+                                             this,
+                                             h_dex_cache->GetResolvedMethods(),
+                                             h_dex_cache->NumResolvedMethods());
       }
       // Register dex files, keep track of existing ones that are conflicts.
       AppendToBootClassPath(*dex_file.get(), h_dex_cache);
@@ -1705,7 +1774,7 @@
         ObjPtr<mirror::Object> element = elements->GetWithoutChecks(i);
         if (element != nullptr) {
           // If we are somewhere in the middle of the array, there may be nulls at the end.
-          loader_dex_file_names.push_back(GetDexPathListElementName(soa, element));
+          loader_dex_file_names.push_back(GetDexPathListElementName(element));
         }
       }
       // Ignore the number of image dex files since we are adding those to the class loader anyways.
@@ -1748,7 +1817,7 @@
       }
     }
     if (!app_image) {
-      heap->VisitObjects(SanityCheckObjectsCallback, nullptr);
+      ImageSanityChecks::CheckObjects(heap, this);
     }
   }
 
@@ -1863,7 +1932,7 @@
     boot_class_table_.VisitRoots(buffered_visitor);
 
     // If tracing is enabled, then mark all the class loaders to prevent unloading.
-    if (tracing_enabled) {
+    if ((flags & kVisitRootFlagClassLoader) != 0 || tracing_enabled) {
       for (const ClassLoaderData& data : class_loaders_) {
         GcRoot<mirror::Object> root(GcRoot<mirror::Object>(self->DecodeJObject(data.weak_root)));
         root.VisitRoot(visitor, RootInfo(kRootVMInternal));
@@ -2329,12 +2398,12 @@
   return ClassPathEntry(nullptr, nullptr);
 }
 
-bool ClassLinker::FindClassInPathClassLoader(ScopedObjectAccessAlreadyRunnable& soa,
-                                             Thread* self,
-                                             const char* descriptor,
-                                             size_t hash,
-                                             Handle<mirror::ClassLoader> class_loader,
-                                             ObjPtr<mirror::Class>* result) {
+bool ClassLinker::FindClassInBaseDexClassLoader(ScopedObjectAccessAlreadyRunnable& soa,
+                                                Thread* self,
+                                                const char* descriptor,
+                                                size_t hash,
+                                                Handle<mirror::ClassLoader> class_loader,
+                                                ObjPtr<mirror::Class>* result) {
   // Termination case: boot class-loader.
   if (IsBootClassLoader(soa, class_loader.Get())) {
     // The boot class loader, search the boot class path.
@@ -2364,14 +2433,24 @@
   // Unsupported class-loader?
   if (soa.Decode<mirror::Class>(WellKnownClasses::dalvik_system_PathClassLoader) !=
       class_loader->GetClass()) {
-    *result = nullptr;
-    return false;
+    // PathClassLoader is the most common case, so it's the one we check first. For secondary dex
+    // files, we also check DexClassLoader here.
+    if (soa.Decode<mirror::Class>(WellKnownClasses::dalvik_system_DexClassLoader) !=
+        class_loader->GetClass()) {
+      *result = nullptr;
+      return false;
+    }
   }
 
   // Handles as RegisterDexFile may allocate dex caches (and cause thread suspension).
   StackHandleScope<4> hs(self);
   Handle<mirror::ClassLoader> h_parent(hs.NewHandle(class_loader->GetParent()));
-  bool recursive_result = FindClassInPathClassLoader(soa, self, descriptor, hash, h_parent, result);
+  bool recursive_result = FindClassInBaseDexClassLoader(soa,
+                                                        self,
+                                                        descriptor,
+                                                        hash,
+                                                        h_parent,
+                                                        result);
 
   if (!recursive_result) {
     // Something wrong up the chain.
@@ -2387,16 +2466,17 @@
   // Handle as if this is the child PathClassLoader.
   // The class loader is a PathClassLoader which inherits from BaseDexClassLoader.
   // We need to get the DexPathList and loop through it.
-  ArtField* const cookie_field = soa.DecodeField(WellKnownClasses::dalvik_system_DexFile_cookie);
+  ArtField* const cookie_field =
+      jni::DecodeArtField(WellKnownClasses::dalvik_system_DexFile_cookie);
   ArtField* const dex_file_field =
-      soa.DecodeField(WellKnownClasses::dalvik_system_DexPathList__Element_dexFile);
+      jni::DecodeArtField(WellKnownClasses::dalvik_system_DexPathList__Element_dexFile);
   ObjPtr<mirror::Object> dex_path_list =
-      soa.DecodeField(WellKnownClasses::dalvik_system_PathClassLoader_pathList)->
-      GetObject(class_loader.Get());
+      jni::DecodeArtField(WellKnownClasses::dalvik_system_BaseDexClassLoader_pathList)->
+          GetObject(class_loader.Get());
   if (dex_path_list != nullptr && dex_file_field != nullptr && cookie_field != nullptr) {
     // DexPathList has an array dexElements of Elements[] which each contain a dex file.
     ObjPtr<mirror::Object> dex_elements_obj =
-        soa.DecodeField(WellKnownClasses::dalvik_system_DexPathList_dexElements)->
+        jni::DecodeArtField(WellKnownClasses::dalvik_system_DexPathList_dexElements)->
         GetObject(dex_path_list);
     // Loop through each dalvik.system.DexPathList$Element's dalvik.system.DexFile and look
     // at the mCookie which is a DexFile vector.
@@ -2493,14 +2573,14 @@
   } else {
     ScopedObjectAccessUnchecked soa(self);
     ObjPtr<mirror::Class> cp_klass;
-    if (FindClassInPathClassLoader(soa, self, descriptor, hash, class_loader, &cp_klass)) {
+    if (FindClassInBaseDexClassLoader(soa, self, descriptor, hash, class_loader, &cp_klass)) {
       // The chain was understood. So the value in cp_klass is either the class we were looking
       // for, or not found.
       if (cp_klass != nullptr) {
         return cp_klass.Ptr();
       }
-      // TODO: We handle the boot classpath loader in FindClassInPathClassLoader. Try to unify this
-      //       and the branch above. TODO: throw the right exception here.
+      // TODO: We handle the boot classpath loader in FindClassInBaseDexClassLoader. Try to unify
+      //       this and the branch above. TODO: throw the right exception here.
 
       // We'll let the Java-side rediscover all this and throw the exception with the right stack
       // trace.
@@ -2568,6 +2648,8 @@
       klass.Assign(GetClassRoot(kJavaLangRefReference));
     } else if (strcmp(descriptor, "Ljava/lang/DexCache;") == 0) {
       klass.Assign(GetClassRoot(kJavaLangDexCache));
+    } else if (strcmp(descriptor, "Ldalvik/system/ClassExt;") == 0) {
+      klass.Assign(GetClassRoot(kDalvikSystemClassExt));
     }
   }
 
@@ -2787,6 +2869,13 @@
     return true;
   }
 
+  if (runtime->IsFullyDeoptable()) {
+    // We need to be able to deoptimize at any time so we should always just ignore precompiled
+    // code and go to the interpreter assuming we don't already have jitted code.
+    jit::Jit* jit = Runtime::Current()->GetJit();
+    return (jit == nullptr) || !jit->GetCodeCache()->ContainsPc(quick_code);
+  }
+
   if (runtime->IsNativeDebuggable()) {
     DCHECK(runtime->UseJitCompilation() && runtime->GetJit()->JitAtFirstUse());
     // If we are doing native debugging, ignore application's AOT code,
@@ -3353,7 +3442,8 @@
 }
 
 mirror::Class* ClassLinker::CreatePrimitiveClass(Thread* self, Primitive::Type type) {
-  ObjPtr<mirror::Class> klass = AllocClass(self, mirror::Class::PrimitiveClassSize(image_pointer_size_));
+  ObjPtr<mirror::Class> klass =
+      AllocClass(self, mirror::Class::PrimitiveClassSize(image_pointer_size_));
   if (UNLIKELY(klass == nullptr)) {
     self->AssertPendingOOMException();
     return nullptr;
@@ -3371,10 +3461,12 @@
   ObjectLock<mirror::Class> lock(self, h_class);
   h_class->SetAccessFlags(kAccPublic | kAccFinal | kAccAbstract);
   h_class->SetPrimitiveType(type);
+  h_class->SetIfTable(GetClassRoot(kJavaLangObject)->GetIfTable());
   mirror::Class::SetStatus(h_class, mirror::Class::kStatusInitialized, self);
   const char* descriptor = Primitive::Descriptor(type);
-  ObjPtr<mirror::Class> existing = InsertClass(descriptor, h_class.Get(),
-                                        ComputeModifiedUtf8Hash(descriptor));
+  ObjPtr<mirror::Class> existing = InsertClass(descriptor,
+                                               h_class.Get(),
+                                               ComputeModifiedUtf8Hash(descriptor));
   CHECK(existing == nullptr) << "InitPrimitiveClass(" << type << ") failed";
   return h_class.Get();
 }
@@ -3948,7 +4040,7 @@
 
   const OatFile::OatDexFile* oat_dex_file = dex_file.GetOatDexFile();
   // In case we run without an image there won't be a backing oat file.
-  if (oat_dex_file == nullptr) {
+  if (oat_dex_file == nullptr || oat_dex_file->GetOatFile() == nullptr) {
     return false;
   }
 
@@ -4038,7 +4130,7 @@
     for (; iterator.HasNext(); iterator.Next()) {
       // Ensure exception types are resolved so that they don't need resolution to be delivered,
       // unresolved exception types will be ignored by exception delivery
-      if (iterator.GetHandlerTypeIndex() != DexFile::kDexNoIndex16) {
+      if (iterator.GetHandlerTypeIndex().IsValid()) {
         ObjPtr<mirror::Class> exception_type = ResolveType(iterator.GetHandlerTypeIndex(), method);
         if (exception_type == nullptr) {
           DCHECK(Thread::Current()->IsExceptionPending());
@@ -4073,6 +4165,8 @@
   DCHECK_EQ(klass->GetPrimitiveType(), Primitive::kPrimNot);
   klass->SetName(soa.Decode<mirror::String>(name));
   klass->SetDexCache(GetClassRoot(kJavaLangReflectProxy)->GetDexCache());
+  // Object has an empty iftable, copy it for that reason.
+  klass->SetIfTable(GetClassRoot(kJavaLangObject)->GetIfTable());
   mirror::Class::SetStatus(klass, mirror::Class::kStatusIdx, self);
   std::string descriptor(GetDescriptorForProxy(klass.Get()));
   const size_t hash = ComputeModifiedUtf8Hash(descriptor.c_str());
@@ -4702,7 +4796,7 @@
   const DexFile* dex_file = m->GetDexFile();
   const DexFile::MethodId& method_id = dex_file->GetMethodId(m->GetDexMethodIndex());
   const DexFile::ProtoId& proto_id = dex_file->GetMethodPrototype(method_id);
-  uint16_t return_type_idx = proto_id.return_type_idx_;
+  dex::TypeIndex return_type_idx = proto_id.return_type_idx_;
   std::string return_type = dex_file->PrettyType(return_type_idx);
   std::string class_loader = mirror::Object::PrettyTypeOf(m->GetDeclaringClass()->GetClassLoader());
   ThrowWrappedLinkageError(klass.Get(),
@@ -4720,7 +4814,7 @@
                                                    ArtMethod* method,
                                                    ArtMethod* m,
                                                    uint32_t index,
-                                                   uint32_t arg_type_idx)
+                                                   dex::TypeIndex arg_type_idx)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   DCHECK(Thread::Current()->IsExceptionPending());
   DCHECK(!m->IsProxyMethod());
@@ -4810,7 +4904,7 @@
   }
   for (uint32_t i = 0; i < num_types; ++i) {
     StackHandleScope<1> hs(self);
-    uint32_t param_type_idx = types1->GetTypeItem(i).type_idx_;
+    dex::TypeIndex param_type_idx = types1->GetTypeItem(i).type_idx_;
     Handle<mirror::Class> param_type(hs.NewHandle(
         method1->GetClassFromTypeIndex(param_type_idx, true /* resolve */, pointer_size)));
     if (UNLIKELY(param_type.Get() == nullptr)) {
@@ -4818,7 +4912,7 @@
                                              method1, i, param_type_idx);
       return false;
     }
-    uint32_t other_param_type_idx = types2->GetTypeItem(i).type_idx_;
+    dex::TypeIndex other_param_type_idx = types2->GetTypeItem(i).type_idx_;
     ObjPtr<mirror::Class> other_param_type =
         method2->GetClassFromTypeIndex(other_param_type_idx, true /* resolve */, pointer_size);
     if (UNLIKELY(other_param_type == nullptr)) {
@@ -5049,6 +5143,12 @@
     if (klass->ShouldHaveImt()) {
       klass->SetImt(imt, image_pointer_size_);
     }
+
+    // Update CHA info based on whether we override methods.
+    // Have to do this before setting the class as resolved which allows
+    // instantiation of klass.
+    Runtime::Current()->GetClassHierarchyAnalysis()->UpdateAfterLoadingOf(klass);
+
     // This will notify waiters on klass that saw the not yet resolved
     // class in the class_table_ during EnsureResolved.
     mirror::Class::SetStatus(klass, mirror::Class::kStatusResolved, self);
@@ -5092,6 +5192,11 @@
       }
     }
 
+    // Update CHA info based on whether we override methods.
+    // Have to do this before setting the class as resolved which allows
+    // instantiation of klass.
+    Runtime::Current()->GetClassHierarchyAnalysis()->UpdateAfterLoadingOf(h_new_class);
+
     // This will notify waiters on temp class that saw the not yet resolved class in the
     // class_table_ during EnsureResolved.
     mirror::Class::SetStatus(klass, mirror::Class::kStatusRetired, self);
@@ -5302,8 +5407,8 @@
 bool ClassLinker::LoadSuperAndInterfaces(Handle<mirror::Class> klass, const DexFile& dex_file) {
   CHECK_EQ(mirror::Class::kStatusIdx, klass->GetStatus());
   const DexFile::ClassDef& class_def = dex_file.GetClassDef(klass->GetDexClassDefIndex());
-  uint16_t super_class_idx = class_def.superclass_idx_;
-  if (super_class_idx != DexFile::kDexNoIndex16) {
+  dex::TypeIndex super_class_idx = class_def.superclass_idx_;
+  if (super_class_idx.IsValid()) {
     // Check that a class does not inherit from itself directly.
     //
     // TODO: This is a cheap check to detect the straightforward case
@@ -5340,7 +5445,7 @@
   const DexFile::TypeList* interfaces = dex_file.GetInterfacesList(class_def);
   if (interfaces != nullptr) {
     for (size_t i = 0; i < interfaces->Size(); i++) {
-      uint16_t idx = interfaces->GetTypeItem(i).type_idx_;
+      dex::TypeIndex idx = interfaces->GetTypeItem(i).type_idx_;
       ObjPtr<mirror::Class> interface = ResolveType(dex_file, idx, klass.Get());
       if (interface == nullptr) {
         DCHECK(Thread::Current()->IsExceptionPending());
@@ -6333,16 +6438,18 @@
 bool ClassLinker::SetupInterfaceLookupTable(Thread* self, Handle<mirror::Class> klass,
                                             Handle<mirror::ObjectArray<mirror::Class>> interfaces) {
   StackHandleScope<1> hs(self);
-  const size_t super_ifcount =
-      klass->HasSuperClass() ? klass->GetSuperClass()->GetIfTableCount() : 0U;
+  const bool has_superclass = klass->HasSuperClass();
+  const size_t super_ifcount = has_superclass ? klass->GetSuperClass()->GetIfTableCount() : 0U;
   const bool have_interfaces = interfaces.Get() != nullptr;
   const size_t num_interfaces =
       have_interfaces ? interfaces->GetLength() : klass->NumDirectInterfaces();
   if (num_interfaces == 0) {
     if (super_ifcount == 0) {
+      if (LIKELY(has_superclass)) {
+        klass->SetIfTable(klass->GetSuperClass()->GetIfTable());
+      }
       // Class implements no interfaces.
       DCHECK_EQ(klass->GetIfTableCount(), 0);
-      DCHECK(klass->GetIfTable() == nullptr);
       return true;
     }
     // Class implements same interfaces as parent, are any of these not marker interfaces?
@@ -6535,7 +6642,7 @@
   } else {
     // No imt in the super class, need to reconstruct from the iftable.
     ObjPtr<mirror::IfTable> if_table = super_class->GetIfTable();
-    if (if_table != nullptr) {
+    if (if_table->Count() != 0) {
       // Ignore copied methods since we will handle these in LinkInterfaceMethods.
       FillIMTFromIfTable(if_table,
                          unimplemented_method,
@@ -7421,7 +7528,7 @@
 }
 
 mirror::String* ClassLinker::ResolveString(const DexFile& dex_file,
-                                           uint32_t string_idx,
+                                           dex::StringIndex string_idx,
                                            Handle<mirror::DexCache> dex_cache) {
   DCHECK(dex_cache.Get() != nullptr);
   Thread::PoisonObjectPointersIfDebug();
@@ -7437,7 +7544,7 @@
 }
 
 mirror::String* ClassLinker::LookupString(const DexFile& dex_file,
-                                          uint32_t string_idx,
+                                          dex::StringIndex string_idx,
                                           Handle<mirror::DexCache> dex_cache) {
   DCHECK(dex_cache.Get() != nullptr);
   ObjPtr<mirror::String> resolved = dex_cache->GetResolvedString(string_idx);
@@ -7446,7 +7553,8 @@
   }
   uint32_t utf16_length;
   const char* utf8_data = dex_file.StringDataAndUtf16LengthByIdx(string_idx, &utf16_length);
-  ObjPtr<mirror::String> string = intern_table_->LookupStrong(Thread::Current(), utf16_length, utf8_data);
+  ObjPtr<mirror::String> string =
+      intern_table_->LookupStrong(Thread::Current(), utf16_length, utf8_data);
   if (string != nullptr) {
     dex_cache->SetResolvedString(string_idx, string);
   }
@@ -7454,7 +7562,7 @@
 }
 
 ObjPtr<mirror::Class> ClassLinker::LookupResolvedType(const DexFile& dex_file,
-                                                      uint16_t type_idx,
+                                                      dex::TypeIndex type_idx,
                                                       ObjPtr<mirror::DexCache> dex_cache,
                                                       ObjPtr<mirror::ClassLoader> class_loader) {
   ObjPtr<mirror::Class> type = dex_cache->GetResolvedType(type_idx);
@@ -7480,7 +7588,7 @@
 }
 
 mirror::Class* ClassLinker::ResolveType(const DexFile& dex_file,
-                                        uint16_t type_idx,
+                                        dex::TypeIndex type_idx,
                                         ObjPtr<mirror::Class> referrer) {
   StackHandleScope<2> hs(Thread::Current());
   Handle<mirror::DexCache> dex_cache(hs.NewHandle(referrer->GetDexCache()));
@@ -7489,7 +7597,7 @@
 }
 
 mirror::Class* ClassLinker::ResolveType(const DexFile& dex_file,
-                                        uint16_t type_idx,
+                                        dex::TypeIndex type_idx,
                                         Handle<mirror::DexCache> dex_cache,
                                         Handle<mirror::ClassLoader> class_loader) {
   DCHECK(dex_cache.Get() != nullptr);
@@ -7883,7 +7991,7 @@
   int32_t i = 0;
   MutableHandle<mirror::Class> param_class = hs.NewHandle<mirror::Class>(nullptr);
   for (; it.HasNext(); it.Next()) {
-    const uint16_t type_idx = it.GetTypeIdx();
+    const dex::TypeIndex type_idx = it.GetTypeIdx();
     param_class.Assign(ResolveType(dex_file, type_idx, dex_cache, class_loader));
     if (param_class.Get() == nullptr) {
       DCHECK(self->IsExceptionPending());
@@ -8077,6 +8185,7 @@
     "[J",
     "[S",
     "[Ljava/lang/StackTraceElement;",
+    "Ldalvik/system/ClassExt;",
   };
   static_assert(arraysize(class_roots_descriptors) == size_t(kClassRootsMax),
                 "Mismatch between class descriptors and class-root enum");
@@ -8096,7 +8205,7 @@
   StackHandleScope<11> hs(self);
 
   ArtField* dex_elements_field =
-      soa.DecodeField(WellKnownClasses::dalvik_system_DexPathList_dexElements);
+      jni::DecodeArtField(WellKnownClasses::dalvik_system_DexPathList_dexElements);
 
   Handle<mirror::Class> dex_elements_class(hs.NewHandle(dex_elements_field->GetType<true>()));
   DCHECK(dex_elements_class.Get() != nullptr);
@@ -8109,13 +8218,13 @@
       hs.NewHandle(dex_elements_class->GetComponentType());
 
   ArtField* element_file_field =
-      soa.DecodeField(WellKnownClasses::dalvik_system_DexPathList__Element_dexFile);
+      jni::DecodeArtField(WellKnownClasses::dalvik_system_DexPathList__Element_dexFile);
   DCHECK_EQ(h_dex_element_class.Get(), element_file_field->GetDeclaringClass());
 
-  ArtField* cookie_field = soa.DecodeField(WellKnownClasses::dalvik_system_DexFile_cookie);
+  ArtField* cookie_field = jni::DecodeArtField(WellKnownClasses::dalvik_system_DexFile_cookie);
   DCHECK_EQ(cookie_field->GetDeclaringClass(), element_file_field->GetType<false>());
 
-  ArtField* file_name_field = soa.DecodeField(WellKnownClasses::dalvik_system_DexFile_fileName);
+  ArtField* file_name_field = jni::DecodeArtField(WellKnownClasses::dalvik_system_DexFile_fileName);
   DCHECK_EQ(file_name_field->GetDeclaringClass(), element_file_field->GetType<false>());
 
   // Fill the elements array.
@@ -8165,7 +8274,7 @@
   DCHECK(h_path_class_loader.Get() != nullptr);
   // Set DexPathList.
   ArtField* path_list_field =
-      soa.DecodeField(WellKnownClasses::dalvik_system_PathClassLoader_pathList);
+      jni::DecodeArtField(WellKnownClasses::dalvik_system_BaseDexClassLoader_pathList);
   DCHECK(path_list_field != nullptr);
   path_list_field->SetObject<false>(h_path_class_loader.Get(), h_dex_path_list.Get());
 
@@ -8293,10 +8402,10 @@
                                              dex_file->GetBaseLocation(),
                                              dex_file->GetLocationChecksum());
     size_t num_resolved = 0;
-    std::unordered_set<uint16_t> class_set;
+    std::unordered_set<dex::TypeIndex> class_set;
     CHECK_EQ(num_types, dex_cache->NumResolvedTypes());
     for (size_t i = 0; i < num_types; ++i) {
-      ObjPtr<mirror::Class> klass = dex_cache->GetResolvedType(i);
+      ObjPtr<mirror::Class> klass = dex_cache->GetResolvedType(dex::TypeIndex(i));
       // Filter out null class loader since that is the boot class loader.
       if (klass == nullptr || (ignore_boot_classes && klass->GetClassLoader() == nullptr)) {
         continue;
@@ -8309,17 +8418,16 @@
       }
       ObjPtr<mirror::DexCache> klass_dex_cache = klass->GetDexCache();
       if (klass_dex_cache == dex_cache) {
-        const size_t class_def_idx = klass->GetDexClassDefIndex();
         DCHECK(klass->IsResolved());
-        CHECK_LT(class_def_idx, num_class_defs);
-        class_set.insert(class_def_idx);
+        CHECK_LT(klass->GetDexClassDefIndex(), num_class_defs);
+        class_set.insert(klass->GetDexTypeIndex());
       }
     }
 
     if (!class_set.empty()) {
       auto it = ret.find(resolved_classes);
       if (it != ret.end()) {
-        // Already have the key, union the class def idxs.
+        // Already have the key, union the class type indexes.
         it->AddClasses(class_set.begin(), class_set.end());
       } else {
         resolved_classes.AddClasses(class_set.begin(), class_set.end());
@@ -8362,13 +8470,8 @@
       VLOG(profiler) << "Found opened dex file for " << dex_file->GetLocation() << " with "
                      << info.GetClasses().size() << " classes";
       DCHECK_EQ(dex_file->GetLocationChecksum(), info.GetLocationChecksum());
-      for (uint16_t class_def_idx : info.GetClasses()) {
-        if (class_def_idx >= dex_file->NumClassDefs()) {
-          LOG(WARNING) << "Class def index " << class_def_idx << " >= " << dex_file->NumClassDefs();
-          continue;
-        }
-        const DexFile::TypeId& type_id = dex_file->GetTypeId(
-            dex_file->GetClassDef(class_def_idx).class_idx_);
+      for (dex::TypeIndex type_idx : info.GetClasses()) {
+        const DexFile::TypeId& type_id = dex_file->GetTypeId(type_idx);
         const char* descriptor = dex_file->GetTypeDescriptor(type_id);
         ret.insert(descriptor);
       }
diff --git a/runtime/class_linker.h b/runtime/class_linker.h
index 3248d0e..60755cd 100644
--- a/runtime/class_linker.h
+++ b/runtime/class_linker.h
@@ -32,6 +32,7 @@
 #include "class_table.h"
 #include "dex_cache_resolved_classes.h"
 #include "dex_file.h"
+#include "dex_file_types.h"
 #include "gc_root.h"
 #include "jni.h"
 #include "mirror/class.h"
@@ -128,6 +129,7 @@
     kLongArrayClass,
     kShortArrayClass,
     kJavaLangStackTraceElementArrayClass,
+    kDalvikSystemClassExt,
     kClassRootsMax,
   };
 
@@ -235,18 +237,20 @@
   // Resolve a String with the given index from the DexFile, storing the
   // result in the DexCache. The referrer is used to identify the
   // target DexCache and ClassLoader to use for resolution.
-  mirror::String* ResolveString(uint32_t string_idx, ArtMethod* referrer)
+  mirror::String* ResolveString(dex::StringIndex string_idx, ArtMethod* referrer)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Resolve a String with the given index from the DexFile, storing the
   // result in the DexCache.
-  mirror::String* ResolveString(const DexFile& dex_file, uint32_t string_idx,
+  mirror::String* ResolveString(const DexFile& dex_file,
+                                dex::StringIndex string_idx,
                                 Handle<mirror::DexCache> dex_cache)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Find a String with the given index from the DexFile, storing the
   // result in the DexCache if found. Return null if not found.
-  mirror::String* LookupString(const DexFile& dex_file, uint32_t string_idx,
+  mirror::String* LookupString(const DexFile& dex_file,
+                               dex::StringIndex string_idx,
                                Handle<mirror::DexCache> dex_cache)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
@@ -254,7 +258,7 @@
   // result in the DexCache. The referrer is used to identify the
   // target DexCache and ClassLoader to use for resolution.
   mirror::Class* ResolveType(const DexFile& dex_file,
-                             uint16_t type_idx,
+                             dex::TypeIndex type_idx,
                              ObjPtr<mirror::Class> referrer)
       REQUIRES_SHARED(Locks::mutator_lock_)
       REQUIRES(!dex_lock_, !Roles::uninterruptible_);
@@ -262,18 +266,18 @@
   // Resolve a Type with the given index from the DexFile, storing the
   // result in the DexCache. The referrer is used to identify the
   // target DexCache and ClassLoader to use for resolution.
-  mirror::Class* ResolveType(uint16_t type_idx, ArtMethod* referrer)
+  mirror::Class* ResolveType(dex::TypeIndex type_idx, ArtMethod* referrer)
       REQUIRES_SHARED(Locks::mutator_lock_)
       REQUIRES(!dex_lock_, !Roles::uninterruptible_);
 
-  mirror::Class* ResolveType(uint16_t type_idx, ArtField* referrer)
+  mirror::Class* ResolveType(dex::TypeIndex type_idx, ArtField* referrer)
       REQUIRES_SHARED(Locks::mutator_lock_)
       REQUIRES(!dex_lock_, !Roles::uninterruptible_);
 
   // Look up a resolved type with the given ID from the DexFile. The ClassLoader is used to search
   // for the type, since it may be referenced from but not contained within the given DexFile.
   ObjPtr<mirror::Class> LookupResolvedType(const DexFile& dex_file,
-                                           uint16_t type_idx,
+                                           dex::TypeIndex type_idx,
                                            ObjPtr<mirror::DexCache> dex_cache,
                                            ObjPtr<mirror::ClassLoader> class_loader)
       REQUIRES_SHARED(Locks::mutator_lock_);
@@ -283,7 +287,7 @@
   // type, since it may be referenced from but not contained within
   // the given DexFile.
   mirror::Class* ResolveType(const DexFile& dex_file,
-                             uint16_t type_idx,
+                             dex::TypeIndex type_idx,
                              Handle<mirror::DexCache> dex_cache,
                              Handle<mirror::ClassLoader> class_loader)
       REQUIRES_SHARED(Locks::mutator_lock_)
@@ -417,10 +421,10 @@
       REQUIRES(!dex_lock_);
 
   void VisitClassRoots(RootVisitor* visitor, VisitRootFlags flags)
-      REQUIRES(!Locks::classlinker_classes_lock_)
+      REQUIRES(!Locks::classlinker_classes_lock_, !Locks::trace_lock_)
       REQUIRES_SHARED(Locks::mutator_lock_);
   void VisitRoots(RootVisitor* visitor, VisitRootFlags flags)
-      REQUIRES(!dex_lock_)
+      REQUIRES(!dex_lock_, !Locks::classlinker_classes_lock_, !Locks::trace_lock_)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   mirror::DexCache* FindDexCache(Thread* self,
@@ -794,17 +798,17 @@
 
   void FixupStaticTrampolines(ObjPtr<mirror::Class> klass) REQUIRES_SHARED(Locks::mutator_lock_);
 
-  // Finds a class in the path class loader, loading it if necessary without using JNI. Hash
+  // Finds a class in a Path- or DexClassLoader, loading it if necessary without using JNI. Hash
   // function is supposed to be ComputeModifiedUtf8Hash(descriptor). Returns true if the
   // class-loader chain could be handled, false otherwise, i.e., a non-supported class-loader
   // was encountered while walking the parent chain (currently only BootClassLoader and
   // PathClassLoader are supported).
-  bool FindClassInPathClassLoader(ScopedObjectAccessAlreadyRunnable& soa,
-                                  Thread* self,
-                                  const char* descriptor,
-                                  size_t hash,
-                                  Handle<mirror::ClassLoader> class_loader,
-                                  ObjPtr<mirror::Class>* result)
+  bool FindClassInBaseDexClassLoader(ScopedObjectAccessAlreadyRunnable& soa,
+                                     Thread* self,
+                                     const char* descriptor,
+                                     size_t hash,
+                                     Handle<mirror::ClassLoader> class_loader,
+                                     ObjPtr<mirror::Class>* result)
       REQUIRES_SHARED(Locks::mutator_lock_)
       REQUIRES(!dex_lock_);
 
@@ -1200,7 +1204,7 @@
   friend struct CompilationHelper;  // For Compile in ImageTest.
   friend class ImageDumper;  // for DexLock
   friend class ImageWriter;  // for GetClassRoots
-  friend class VMClassLoader;  // for LookupClass and FindClassInPathClassLoader.
+  friend class VMClassLoader;  // for LookupClass and FindClassInBaseDexClassLoader.
   friend class JniCompilerTest;  // for GetRuntimeQuickGenericJniStub
   friend class JniInternalTest;  // for GetRuntimeQuickGenericJniStub
   ART_FRIEND_TEST(ClassLinkerTest, RegisterDexFileName);  // for DexLock, and RegisterDexFileLocked
diff --git a/runtime/class_linker_test.cc b/runtime/class_linker_test.cc
index 5878bf3..7c06ffe 100644
--- a/runtime/class_linker_test.cc
+++ b/runtime/class_linker_test.cc
@@ -25,11 +25,13 @@
 #include "class_linker-inl.h"
 #include "common_runtime_test.h"
 #include "dex_file.h"
+#include "dex_file_types.h"
 #include "experimental_flags.h"
 #include "entrypoints/entrypoint_utils-inl.h"
 #include "gc/heap.h"
 #include "mirror/accessible_object.h"
 #include "mirror/class-inl.h"
+#include "mirror/class_ext.h"
 #include "mirror/dex_cache.h"
 #include "mirror/emulated_stack_frame.h"
 #include "mirror/executable.h"
@@ -100,7 +102,8 @@
     EXPECT_EQ(0U, primitive->NumDirectInterfaces());
     EXPECT_FALSE(primitive->HasVTable());
     EXPECT_EQ(0, primitive->GetIfTableCount());
-    EXPECT_TRUE(primitive->GetIfTable() == nullptr);
+    EXPECT_TRUE(primitive->GetIfTable() != nullptr);
+    EXPECT_EQ(primitive->GetIfTable()->Count(), 0u);
     EXPECT_EQ(kAccPublic | kAccFinal | kAccAbstract, primitive->GetAccessFlags());
   }
 
@@ -427,7 +430,7 @@
     }
     // Verify all the types referenced by this file
     for (size_t i = 0; i < dex.NumTypeIds(); i++) {
-      const DexFile::TypeId& type_id = dex.GetTypeId(i);
+      const DexFile::TypeId& type_id = dex.GetTypeId(dex::TypeIndex(i));
       const char* descriptor = dex.GetTypeDescriptor(type_id);
       AssertDexFileClass(class_loader, descriptor);
     }
@@ -586,6 +589,7 @@
     addOffset(OFFSETOF_MEMBER(mirror::Class, dex_cache_strings_), "dexCacheStrings");
     addOffset(OFFSETOF_MEMBER(mirror::Class, dex_class_def_idx_), "dexClassDefIndex");
     addOffset(OFFSETOF_MEMBER(mirror::Class, dex_type_idx_), "dexTypeIndex");
+    addOffset(OFFSETOF_MEMBER(mirror::Class, ext_data_), "extData");
     addOffset(OFFSETOF_MEMBER(mirror::Class, ifields_), "iFields");
     addOffset(OFFSETOF_MEMBER(mirror::Class, iftable_), "ifTable");
     addOffset(OFFSETOF_MEMBER(mirror::Class, methods_), "methods");
@@ -603,12 +607,20 @@
     addOffset(OFFSETOF_MEMBER(mirror::Class, sfields_), "sFields");
     addOffset(OFFSETOF_MEMBER(mirror::Class, status_), "status");
     addOffset(OFFSETOF_MEMBER(mirror::Class, super_class_), "superClass");
-    addOffset(OFFSETOF_MEMBER(mirror::Class, verify_error_), "verifyError");
     addOffset(OFFSETOF_MEMBER(mirror::Class, virtual_methods_offset_), "virtualMethodsOffset");
     addOffset(OFFSETOF_MEMBER(mirror::Class, vtable_), "vtable");
   };
 };
 
+struct ClassExtOffsets : public CheckOffsets<mirror::ClassExt> {
+  ClassExtOffsets() : CheckOffsets<mirror::ClassExt>(false, "Ldalvik/system/ClassExt;") {
+    addOffset(OFFSETOF_MEMBER(mirror::ClassExt, obsolete_dex_caches_), "obsoleteDexCaches");
+    addOffset(OFFSETOF_MEMBER(mirror::ClassExt, obsolete_methods_), "obsoleteMethods");
+    addOffset(OFFSETOF_MEMBER(mirror::ClassExt, original_dex_cache_), "originalDexCache");
+    addOffset(OFFSETOF_MEMBER(mirror::ClassExt, verify_error_), "verifyError");
+  }
+};
+
 struct StringOffsets : public CheckOffsets<mirror::String> {
   StringOffsets() : CheckOffsets<mirror::String>(false, "Ljava/lang/String;") {
     addOffset(OFFSETOF_MEMBER(mirror::String, count_), "count");
@@ -735,8 +747,8 @@
   MethodHandleImplOffsets() : CheckOffsets<mirror::MethodHandleImpl>(
       false, "Ljava/lang/invoke/MethodHandle;") {
     addOffset(OFFSETOF_MEMBER(mirror::MethodHandleImpl, art_field_or_method_), "artFieldOrMethod");
-    addOffset(OFFSETOF_MEMBER(mirror::MethodHandleImpl, as_type_cache_), "asTypeCache");
     addOffset(OFFSETOF_MEMBER(mirror::MethodHandleImpl, handle_kind_), "handleKind");
+    addOffset(OFFSETOF_MEMBER(mirror::MethodHandleImpl, nominal_type_), "nominalType");
     addOffset(OFFSETOF_MEMBER(mirror::MethodHandleImpl, method_type_), "type");
   }
 };
@@ -757,6 +769,7 @@
   ScopedObjectAccess soa(Thread::Current());
   EXPECT_TRUE(ObjectOffsets().Check());
   EXPECT_TRUE(ClassOffsets().Check());
+  EXPECT_TRUE(ClassExtOffsets().Check());
   EXPECT_TRUE(StringOffsets().Check());
   EXPECT_TRUE(ThrowableOffsets().Check());
   EXPECT_TRUE(StackTraceElementOffsets().Check());
@@ -882,7 +895,7 @@
       hs.NewHandle(soa.Decode<mirror::ClassLoader>(LoadDex("MyClass"))));
   AssertNonExistentClass("LMyClass;");
   ObjPtr<mirror::Class> klass = class_linker_->FindClass(soa.Self(), "LMyClass;", class_loader);
-  uint32_t type_idx = klass->GetClassDef()->class_idx_;
+  dex::TypeIndex type_idx = klass->GetClassDef()->class_idx_;
   ObjPtr<mirror::DexCache> dex_cache = klass->GetDexCache();
   const DexFile& dex_file = klass->GetDexFile();
   EXPECT_OBJ_PTR_EQ(dex_cache->GetResolvedType(type_idx), klass);
@@ -1145,7 +1158,7 @@
   ArtMethod* getS0 = klass->FindDirectMethod("getS0", "()Ljava/lang/Object;", kRuntimePointerSize);
   const DexFile::TypeId* type_id = dex_file->FindTypeId("LStaticsFromCode;");
   ASSERT_TRUE(type_id != nullptr);
-  uint32_t type_idx = dex_file->GetIndexForTypeId(*type_id);
+  dex::TypeIndex type_idx = dex_file->GetIndexForTypeId(*type_id);
   mirror::Class* uninit = ResolveVerifyAndClinit(type_idx, clinit, soa.Self(), true, false);
   EXPECT_TRUE(uninit != nullptr);
   EXPECT_FALSE(uninit->IsInitialized());
diff --git a/runtime/class_table.cc b/runtime/class_table.cc
index b44104e..0fcce6b 100644
--- a/runtime/class_table.cc
+++ b/runtime/class_table.cc
@@ -170,7 +170,7 @@
     const DexFile* dex_file = ObjPtr<mirror::DexCache>::DownCast(obj)->GetDexFile();
     if (dex_file != nullptr && dex_file->GetOatDexFile() != nullptr) {
       const OatFile* oat_file = dex_file->GetOatDexFile()->GetOatFile();
-      if (!oat_file->GetBssGcRoots().empty()) {
+      if (oat_file != nullptr && !oat_file->GetBssGcRoots().empty()) {
         InsertOatFileLocked(oat_file);  // Ignore return value.
       }
     }
diff --git a/runtime/class_table.h b/runtime/class_table.h
index bc9eaf4..558c144 100644
--- a/runtime/class_table.h
+++ b/runtime/class_table.h
@@ -48,7 +48,7 @@
     uint32_t operator()(const GcRoot<mirror::Class>& root) const NO_THREAD_SAFETY_ANALYSIS;
     // Same class loader and descriptor.
     bool operator()(const GcRoot<mirror::Class>& a, const GcRoot<mirror::Class>& b) const
-        NO_THREAD_SAFETY_ANALYSIS;;
+        NO_THREAD_SAFETY_ANALYSIS;
     // Same descriptor.
     bool operator()(const GcRoot<mirror::Class>& a, const char* descriptor) const
         NO_THREAD_SAFETY_ANALYSIS;
diff --git a/runtime/common_runtime_test.cc b/runtime/common_runtime_test.cc
index 5409fcb..8226e60 100644
--- a/runtime/common_runtime_test.cc
+++ b/runtime/common_runtime_test.cc
@@ -520,17 +520,17 @@
 
   // The class loader is a PathClassLoader which inherits from BaseDexClassLoader.
   // We need to get the DexPathList and loop through it.
-  ArtField* cookie_field = soa.DecodeField(WellKnownClasses::dalvik_system_DexFile_cookie);
+  ArtField* cookie_field = jni::DecodeArtField(WellKnownClasses::dalvik_system_DexFile_cookie);
   ArtField* dex_file_field =
-      soa.DecodeField(WellKnownClasses::dalvik_system_DexPathList__Element_dexFile);
+      jni::DecodeArtField(WellKnownClasses::dalvik_system_DexPathList__Element_dexFile);
   ObjPtr<mirror::Object> dex_path_list =
-      soa.DecodeField(WellKnownClasses::dalvik_system_PathClassLoader_pathList)->
-      GetObject(class_loader.Get());
+      jni::DecodeArtField(WellKnownClasses::dalvik_system_BaseDexClassLoader_pathList)->
+          GetObject(class_loader.Get());
   if (dex_path_list != nullptr && dex_file_field!= nullptr && cookie_field != nullptr) {
     // DexPathList has an array dexElements of Elements[] which each contain a dex file.
     ObjPtr<mirror::Object> dex_elements_obj =
-        soa.DecodeField(WellKnownClasses::dalvik_system_DexPathList_dexElements)->
-        GetObject(dex_path_list);
+        jni::DecodeArtField(WellKnownClasses::dalvik_system_DexPathList_dexElements)->
+            GetObject(dex_path_list);
     // Loop through each dalvik.system.DexPathList$Element's dalvik.system.DexFile and look
     // at the mCookie which is a DexFile vector.
     if (dex_elements_obj != nullptr) {
@@ -572,6 +572,29 @@
   return ret;
 }
 
+jobject CommonRuntimeTestImpl::LoadMultiDex(const char* first_dex_name,
+                                            const char* second_dex_name) {
+  std::vector<std::unique_ptr<const DexFile>> first_dex_files = OpenTestDexFiles(first_dex_name);
+  std::vector<std::unique_ptr<const DexFile>> second_dex_files = OpenTestDexFiles(second_dex_name);
+  std::vector<const DexFile*> class_path;
+  CHECK_NE(0U, first_dex_files.size());
+  CHECK_NE(0U, second_dex_files.size());
+  for (auto& dex_file : first_dex_files) {
+    class_path.push_back(dex_file.get());
+    loaded_dex_files_.push_back(std::move(dex_file));
+  }
+  for (auto& dex_file : second_dex_files) {
+    class_path.push_back(dex_file.get());
+    loaded_dex_files_.push_back(std::move(dex_file));
+  }
+
+  Thread* self = Thread::Current();
+  jobject class_loader = Runtime::Current()->GetClassLinker()->CreatePathClassLoader(self,
+                                                                                     class_path);
+  self->SetClassLoaderOverride(class_loader);
+  return class_loader;
+}
+
 jobject CommonRuntimeTestImpl::LoadDex(const char* dex_name) {
   std::vector<std::unique_ptr<const DexFile>> dex_files = OpenTestDexFiles(dex_name);
   std::vector<const DexFile*> class_path;
diff --git a/runtime/common_runtime_test.h b/runtime/common_runtime_test.h
index 92934c6..17e3729 100644
--- a/runtime/common_runtime_test.h
+++ b/runtime/common_runtime_test.h
@@ -52,7 +52,7 @@
 
   ScratchFile(const ScratchFile& other, const char* suffix);
 
-  explicit ScratchFile(ScratchFile&& other);
+  ScratchFile(ScratchFile&& other);
 
   ScratchFile& operator=(ScratchFile&& other);
 
@@ -133,6 +133,8 @@
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   jobject LoadDex(const char* dex_name) REQUIRES_SHARED(Locks::mutator_lock_);
+  jobject LoadMultiDex(const char* first_dex_name, const char* second_dex_name)
+      REQUIRES_SHARED(Locks::mutator_lock_);
 
   std::string android_data_;
   std::string dalvik_cache_;
diff --git a/runtime/common_throws.cc b/runtime/common_throws.cc
index 0251776..9f0dbbb 100644
--- a/runtime/common_throws.cc
+++ b/runtime/common_throws.cc
@@ -813,13 +813,11 @@
 
 void ThrowWrongMethodTypeException(mirror::MethodType* callee_type,
                                    mirror::MethodType* callsite_type) {
-  // TODO(narayan): Should we provide more detail here ? The RI doesn't bother.
-  UNUSED(callee_type);
-  UNUSED(callsite_type);
-
   ThrowException("Ljava/lang/invoke/WrongMethodTypeException;",
                  nullptr,
-                 "Invalid method type for signature polymorphic call");
+                 StringPrintf("Expected %s but was %s",
+                              callee_type->PrettyDescriptor().c_str(),
+                              callsite_type->PrettyDescriptor().c_str()).c_str());
 }
 
 }  // namespace art
diff --git a/runtime/compiler_callbacks.h b/runtime/compiler_callbacks.h
index 00dedef..806653a 100644
--- a/runtime/compiler_callbacks.h
+++ b/runtime/compiler_callbacks.h
@@ -47,6 +47,7 @@
   virtual bool IsRelocationPossible() = 0;
 
   virtual verifier::VerifierDeps* GetVerifierDeps() const = 0;
+  virtual void SetVerifierDeps(verifier::VerifierDeps* deps ATTRIBUTE_UNUSED) {}
 
   bool IsBootImage() {
     return mode_ == CallbackMode::kCompileBootImage;
diff --git a/runtime/compiler_filter.cc b/runtime/compiler_filter.cc
index dc197c1..6e3e1d8 100644
--- a/runtime/compiler_filter.cc
+++ b/runtime/compiler_filter.cc
@@ -33,6 +33,7 @@
     case CompilerFilter::kTime:
     case CompilerFilter::kSpeedProfile:
     case CompilerFilter::kSpeed:
+    case CompilerFilter::kLayoutProfile:
     case CompilerFilter::kEverythingProfile:
     case CompilerFilter::kEverything: return true;
   }
@@ -52,6 +53,7 @@
     case CompilerFilter::kTime:
     case CompilerFilter::kSpeedProfile:
     case CompilerFilter::kSpeed:
+    case CompilerFilter::kLayoutProfile:
     case CompilerFilter::kEverythingProfile:
     case CompilerFilter::kEverything: return true;
   }
@@ -71,6 +73,7 @@
     case CompilerFilter::kTime:
     case CompilerFilter::kSpeedProfile:
     case CompilerFilter::kSpeed:
+    case CompilerFilter::kLayoutProfile:
     case CompilerFilter::kEverythingProfile:
     case CompilerFilter::kEverything: return true;
   }
@@ -97,6 +100,7 @@
     case CompilerFilter::kVerifyProfile:
     case CompilerFilter::kSpaceProfile:
     case CompilerFilter::kSpeedProfile:
+    case CompilerFilter::kLayoutProfile:
     case CompilerFilter::kEverythingProfile: return true;
   }
   UNREACHABLE();
@@ -121,6 +125,7 @@
       return CompilerFilter::kSpace;
 
     case CompilerFilter::kSpeedProfile:
+    case CompilerFilter::kLayoutProfile:
       return CompilerFilter::kSpeed;
 
     case CompilerFilter::kEverythingProfile:
@@ -146,6 +151,7 @@
     case CompilerFilter::kTime: return "time";
     case CompilerFilter::kSpeedProfile: return "speed-profile";
     case CompilerFilter::kSpeed: return "speed";
+    case CompilerFilter::kLayoutProfile: return "layout-profile";
     case CompilerFilter::kEverythingProfile: return "everything-profile";
     case CompilerFilter::kEverything: return "everything";
   }
@@ -173,6 +179,8 @@
     *filter = kSpeed;
   } else if (strcmp(option, "speed-profile") == 0) {
     *filter = kSpeedProfile;
+  } else if (strcmp(option, "layout-profile") == 0) {
+    *filter = kLayoutProfile;
   } else if (strcmp(option, "everything") == 0) {
     *filter = kEverything;
   } else if (strcmp(option, "everything-profile") == 0) {
diff --git a/runtime/compiler_filter.h b/runtime/compiler_filter.h
index 37631cc..781d43a 100644
--- a/runtime/compiler_filter.h
+++ b/runtime/compiler_filter.h
@@ -39,6 +39,7 @@
     kSpace,               // Maximize space savings.
     kBalanced,            // Good performance return on compilation investment.
     kSpeedProfile,        // Maximize runtime performance based on profile.
+    kLayoutProfile,       // Temporary filter for dexlayout. Will be merged with kSpeedProfile.
     kSpeed,               // Maximize runtime performance.
     kEverythingProfile,   // Compile everything capable of being compiled based on profile.
     kEverything,          // Compile everything capable of being compiled.
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index 1da888e..dc2ae2e 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -39,6 +39,7 @@
 #include "handle_scope.h"
 #include "jdwp/jdwp_priv.h"
 #include "jdwp/object_registry.h"
+#include "jni_internal.h"
 #include "jvalue-inl.h"
 #include "mirror/class.h"
 #include "mirror/class-inl.h"
@@ -2007,7 +2008,7 @@
   mirror::Object* thread_object = gRegistry->Get<mirror::Object*>(thread_id, &error);
   CHECK(thread_object != nullptr) << error;
   ArtField* java_lang_Thread_name_field =
-      soa.DecodeField(WellKnownClasses::java_lang_Thread_name);
+      jni::DecodeArtField(WellKnownClasses::java_lang_Thread_name);
   ObjPtr<mirror::String> s(java_lang_Thread_name_field->GetObject(thread_object)->AsString());
   if (s != nullptr) {
     *name = s->ToModifiedUtf8();
@@ -2032,7 +2033,7 @@
   } else if (error == JDWP::ERR_NONE) {
     ObjPtr<mirror::Class> c = soa.Decode<mirror::Class>(WellKnownClasses::java_lang_Thread);
     CHECK(c != nullptr);
-    ArtField* f = soa.DecodeField(WellKnownClasses::java_lang_Thread_group);
+    ArtField* f = jni::DecodeArtField(WellKnownClasses::java_lang_Thread_group);
     CHECK(f != nullptr);
     ObjPtr<mirror::Object> group = f->GetObject(thread_object);
     CHECK(group != nullptr);
@@ -2074,7 +2075,7 @@
     return error;
   }
   ScopedAssertNoThreadSuspension ants("Debugger: GetThreadGroupName");
-  ArtField* f = soa.DecodeField(WellKnownClasses::java_lang_ThreadGroup_name);
+  ArtField* f = jni::DecodeArtField(WellKnownClasses::java_lang_ThreadGroup_name);
   CHECK(f != nullptr);
   ObjPtr<mirror::String> s = f->GetObject(thread_group)->AsString();
 
@@ -2093,7 +2094,7 @@
   ObjPtr<mirror::Object> parent;
   {
     ScopedAssertNoThreadSuspension ants("Debugger: GetThreadGroupParent");
-    ArtField* f = soa.DecodeField(WellKnownClasses::java_lang_ThreadGroup_parent);
+    ArtField* f = jni::DecodeArtField(WellKnownClasses::java_lang_ThreadGroup_parent);
     CHECK(f != nullptr);
     parent = f->GetObject(thread_group);
   }
@@ -2102,13 +2103,13 @@
   return JDWP::ERR_NONE;
 }
 
-static void GetChildThreadGroups(ScopedObjectAccessUnchecked& soa, mirror::Object* thread_group,
+static void GetChildThreadGroups(mirror::Object* thread_group,
                                  std::vector<JDWP::ObjectId>* child_thread_group_ids)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   CHECK(thread_group != nullptr);
 
   // Get the int "ngroups" count of this thread group...
-  ArtField* ngroups_field = soa.DecodeField(WellKnownClasses::java_lang_ThreadGroup_ngroups);
+  ArtField* ngroups_field = jni::DecodeArtField(WellKnownClasses::java_lang_ThreadGroup_ngroups);
   CHECK(ngroups_field != nullptr);
   const int32_t size = ngroups_field->GetInt(thread_group);
   if (size == 0) {
@@ -2116,7 +2117,7 @@
   }
 
   // Get the ThreadGroup[] "groups" out of this thread group...
-  ArtField* groups_field = soa.DecodeField(WellKnownClasses::java_lang_ThreadGroup_groups);
+  ArtField* groups_field = jni::DecodeArtField(WellKnownClasses::java_lang_ThreadGroup_groups);
   ObjPtr<mirror::Object> groups_array = groups_field->GetObject(thread_group);
 
   CHECK(groups_array != nullptr);
@@ -2154,7 +2155,7 @@
   // Add child thread groups.
   {
     std::vector<JDWP::ObjectId> child_thread_groups_ids;
-    GetChildThreadGroups(soa, thread_group, &child_thread_groups_ids);
+    GetChildThreadGroups(thread_group, &child_thread_groups_ids);
     expandBufAdd4BE(pReply, child_thread_groups_ids.size());
     for (JDWP::ObjectId child_thread_group_id : child_thread_groups_ids) {
       expandBufAddObjectId(pReply, child_thread_group_id);
@@ -2166,7 +2167,7 @@
 
 JDWP::ObjectId Dbg::GetSystemThreadGroupId() {
   ScopedObjectAccessUnchecked soa(Thread::Current());
-  ArtField* f = soa.DecodeField(WellKnownClasses::java_lang_ThreadGroup_systemThreadGroup);
+  ArtField* f = jni::DecodeArtField(WellKnownClasses::java_lang_ThreadGroup_systemThreadGroup);
   ObjPtr<mirror::Object> group = f->GetObject(f->GetDeclaringClass());
   return gRegistry->Add(group);
 }
@@ -2256,14 +2257,13 @@
   return JDWP::ERR_NONE;
 }
 
-static bool IsInDesiredThreadGroup(ScopedObjectAccessUnchecked& soa,
-                                   mirror::Object* desired_thread_group, mirror::Object* peer)
+static bool IsInDesiredThreadGroup(mirror::Object* desired_thread_group, mirror::Object* peer)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   // Do we want threads from all thread groups?
   if (desired_thread_group == nullptr) {
     return true;
   }
-  ArtField* thread_group_field = soa.DecodeField(WellKnownClasses::java_lang_Thread_group);
+  ArtField* thread_group_field = jni::DecodeArtField(WellKnownClasses::java_lang_Thread_group);
   DCHECK(thread_group_field != nullptr);
   ObjPtr<mirror::Object> group = thread_group_field->GetObject(peer);
   return (group == desired_thread_group);
@@ -2296,7 +2296,7 @@
       // Doing so might help us report ZOMBIE threads too.
       continue;
     }
-    if (IsInDesiredThreadGroup(soa, thread_group, peer)) {
+    if (IsInDesiredThreadGroup(thread_group, peer)) {
       thread_ids->push_back(gRegistry->Add(peer));
     }
   }
@@ -4093,7 +4093,7 @@
 
   // Invoke the method.
   ScopedLocalRef<jobject> ref(soa.Env(), soa.AddLocalReference<jobject>(pReq->receiver.Read()));
-  JValue result = InvokeWithJValues(soa, ref.get(), soa.EncodeMethod(m),
+  JValue result = InvokeWithJValues(soa, ref.get(), jni::EncodeArtMethod(m),
                                     reinterpret_cast<jvalue*>(pReq->arg_values.get()));
 
   // Prepare JDWP ids for the reply.
@@ -4371,7 +4371,7 @@
     CHECK(type == CHUNK_TYPE("THCR") || type == CHUNK_TYPE("THNM")) << type;
     ScopedObjectAccessUnchecked soa(Thread::Current());
     StackHandleScope<1> hs(soa.Self());
-    Handle<mirror::String> name(hs.NewHandle(t->GetThreadName(soa)));
+    Handle<mirror::String> name(hs.NewHandle(t->GetThreadName()));
     size_t char_count = (name.Get() != nullptr) ? name->GetLength() : 0;
     const jchar* chars = (name.Get() != nullptr) ? name->GetValue() : nullptr;
     bool is_compressed = (name.Get() != nullptr) ? name->IsCompressed() : false;
@@ -5117,13 +5117,11 @@
 }
 
 ArtMethod* DeoptimizationRequest::Method() const {
-  ScopedObjectAccessUnchecked soa(Thread::Current());
-  return soa.DecodeMethod(method_);
+  return jni::DecodeArtMethod(method_);
 }
 
 void DeoptimizationRequest::SetMethod(ArtMethod* m) {
-  ScopedObjectAccessUnchecked soa(Thread::Current());
-  method_ = soa.EncodeMethod(m);
+  method_ = jni::EncodeArtMethod(m);
 }
 
 void Dbg::VisitRoots(RootVisitor* visitor) {
diff --git a/runtime/dex2oat_environment_test.h b/runtime/dex2oat_environment_test.h
index d717ec0..b0c4597 100644
--- a/runtime/dex2oat_environment_test.h
+++ b/runtime/dex2oat_environment_test.h
@@ -136,7 +136,9 @@
       + "/core.art";
   }
 
-  bool GetCachedImageFile(/*out*/std::string* image, std::string* error_msg) const {
+  bool GetCachedImageFile(const std::string& image_location,
+                          /*out*/std::string* image,
+                          /*out*/std::string* error_msg) const {
     std::string cache;
     bool have_android_data;
     bool dalvik_cache_exists;
@@ -151,7 +153,14 @@
       *error_msg = "Failed to create dalvik cache";
       return false;
     }
-    return GetDalvikCacheFilename(GetImageLocation().c_str(), cache.c_str(), image, error_msg);
+    return GetDalvikCacheFilename(image_location.c_str(), cache.c_str(), image, error_msg);
+  }
+
+  // Returns the path to an image location whose contents differ from the
+  // image at GetImageLocation(). This is used for testing mismatched
+  // image checksums in the oat_file_assistant_tests.
+  std::string GetImageLocation2() const {
+    return GetImageDirectory() + "/core-npic.art";
   }
 
   std::string GetDexSrc1() const {
diff --git a/runtime/dex_cache_resolved_classes.h b/runtime/dex_cache_resolved_classes.h
index 0febbed..f53ca4a 100644
--- a/runtime/dex_cache_resolved_classes.h
+++ b/runtime/dex_cache_resolved_classes.h
@@ -21,6 +21,8 @@
 #include <unordered_set>
 #include <vector>
 
+#include "dex_file_types.h"
+
 namespace art {
 
 // Data structure for passing around which classes belonging to a dex cache / dex file are resolved.
@@ -59,7 +61,7 @@
     return location_checksum_;
   }
 
-  const std::unordered_set<uint16_t>& GetClasses() const {
+  const std::unordered_set<dex::TypeIndex>& GetClasses() const {
     return classes_;
   }
 
@@ -68,7 +70,7 @@
   const std::string base_location_;
   const uint32_t location_checksum_;
   // Array of resolved class def indexes.
-  mutable std::unordered_set<uint16_t> classes_;
+  mutable std::unordered_set<dex::TypeIndex> classes_;
 };
 
 inline bool operator<(const DexCacheResolvedClasses& a, const DexCacheResolvedClasses& b) {
diff --git a/runtime/dex_file-inl.h b/runtime/dex_file-inl.h
index 621b2c5..e884e39 100644
--- a/runtime/dex_file-inl.h
+++ b/runtime/dex_file-inl.h
@@ -43,9 +43,9 @@
   return GetStringDataAndUtf16Length(string_id, &ignored);
 }
 
-inline const char* DexFile::StringDataAndUtf16LengthByIdx(uint32_t idx,
+inline const char* DexFile::StringDataAndUtf16LengthByIdx(dex::StringIndex idx,
                                                           uint32_t* utf16_length) const {
-  if (idx == kDexNoIndex) {
+  if (!idx.IsValid()) {
     *utf16_length = 0;
     return nullptr;
   }
@@ -53,17 +53,17 @@
   return GetStringDataAndUtf16Length(string_id, utf16_length);
 }
 
-inline const char* DexFile::StringDataByIdx(uint32_t idx) const {
+inline const char* DexFile::StringDataByIdx(dex::StringIndex idx) const {
   uint32_t unicode_length;
   return StringDataAndUtf16LengthByIdx(idx, &unicode_length);
 }
 
-inline const char* DexFile::StringByTypeIdx(uint32_t idx, uint32_t* unicode_length) const {
+inline const char* DexFile::StringByTypeIdx(dex::TypeIndex idx, uint32_t* unicode_length) const {
   const TypeId& type_id = GetTypeId(idx);
   return StringDataAndUtf16LengthByIdx(type_id.descriptor_idx_, unicode_length);
 }
 
-inline const char* DexFile::StringByTypeIdx(uint32_t idx) const {
+inline const char* DexFile::StringByTypeIdx(dex::TypeIndex idx) const {
   const TypeId& type_id = GetTypeId(idx);
   return StringDataByIdx(type_id.descriptor_idx_);
 }
@@ -130,8 +130,8 @@
       (RoundUp(reinterpret_cast<uintptr_t>(insns_end_), 4)) + offset;
 }
 
-static inline bool DexFileStringEquals(const DexFile* df1, uint32_t sidx1,
-                                       const DexFile* df2, uint32_t sidx2) {
+static inline bool DexFileStringEquals(const DexFile* df1, dex::StringIndex sidx1,
+                                       const DexFile* df2, dex::StringIndex sidx2) {
   uint32_t s1_len;  // Note: utf16 length != mutf8 length.
   const char* s1_data = df1->StringDataAndUtf16LengthByIdx(sidx1, &s1_len);
   uint32_t s2_len;
diff --git a/runtime/dex_file.cc b/runtime/dex_file.cc
index 2ef7509..aa8fb38 100644
--- a/runtime/dex_file.cc
+++ b/runtime/dex_file.cc
@@ -26,6 +26,7 @@
 
 #include <memory>
 #include <sstream>
+#include <type_traits>
 
 #include "base/enums.h"
 #include "base/file_magic.h"
@@ -44,6 +45,11 @@
 
 namespace art {
 
+static_assert(sizeof(dex::StringIndex) == sizeof(uint32_t), "StringIndex size is wrong");
+static_assert(std::is_trivially_copyable<dex::StringIndex>::value, "StringIndex not trivial");
+static_assert(sizeof(dex::TypeIndex) == sizeof(uint16_t), "TypeIndex size is wrong");
+static_assert(std::is_trivially_copyable<dex::TypeIndex>::value, "TypeIndex not trivial");
+
 static constexpr OatDexFile* kNoOatDexFile = nullptr;
 
 const char* DexFile::kClassesDex = "classes.dex";
@@ -550,7 +556,7 @@
   return atoi(version);
 }
 
-const DexFile::ClassDef* DexFile::FindClassDef(uint16_t type_idx) const {
+const DexFile::ClassDef* DexFile::FindClassDef(dex::TypeIndex type_idx) const {
   size_t num_class_defs = NumClassDefs();
   // Fast path for rare no class defs case.
   if (num_class_defs == 0) {
@@ -597,9 +603,9 @@
                                              const DexFile::StringId& name,
                                              const DexFile::TypeId& type) const {
   // Binary search MethodIds knowing that they are sorted by class_idx, name_idx then proto_idx
-  const uint16_t class_idx = GetIndexForTypeId(declaring_klass);
-  const uint32_t name_idx = GetIndexForStringId(name);
-  const uint16_t type_idx = GetIndexForTypeId(type);
+  const dex::TypeIndex class_idx = GetIndexForTypeId(declaring_klass);
+  const dex::StringIndex name_idx = GetIndexForStringId(name);
+  const dex::TypeIndex type_idx = GetIndexForTypeId(type);
   int32_t lo = 0;
   int32_t hi = NumFieldIds() - 1;
   while (hi >= lo) {
@@ -632,8 +638,8 @@
                                                const DexFile::StringId& name,
                                                const DexFile::ProtoId& signature) const {
   // Binary search MethodIds knowing that they are sorted by class_idx, name_idx then proto_idx
-  const uint16_t class_idx = GetIndexForTypeId(declaring_klass);
-  const uint32_t name_idx = GetIndexForStringId(name);
+  const dex::TypeIndex class_idx = GetIndexForTypeId(declaring_klass);
+  const dex::StringIndex name_idx = GetIndexForStringId(name);
   const uint16_t proto_idx = GetIndexForProtoId(signature);
   int32_t lo = 0;
   int32_t hi = NumMethodIds() - 1;
@@ -668,7 +674,7 @@
   int32_t hi = NumStringIds() - 1;
   while (hi >= lo) {
     int32_t mid = (hi + lo) / 2;
-    const DexFile::StringId& str_id = GetStringId(mid);
+    const DexFile::StringId& str_id = GetStringId(dex::StringIndex(mid));
     const char* str = GetStringData(str_id);
     int compare = CompareModifiedUtf8ToModifiedUtf8AsUtf16CodePointValues(string, str);
     if (compare > 0) {
@@ -687,7 +693,7 @@
   int32_t hi = NumTypeIds() - 1;
   while (hi >= lo) {
     int32_t mid = (hi + lo) / 2;
-    const TypeId& type_id = GetTypeId(mid);
+    const TypeId& type_id = GetTypeId(dex::TypeIndex(mid));
     const DexFile::StringId& str_id = GetStringId(type_id.descriptor_idx_);
     const char* str = GetStringData(str_id);
     int compare = CompareModifiedUtf8ToModifiedUtf8AsUtf16CodePointValues(string, str);
@@ -707,7 +713,7 @@
   int32_t hi = NumStringIds() - 1;
   while (hi >= lo) {
     int32_t mid = (hi + lo) / 2;
-    const DexFile::StringId& str_id = GetStringId(mid);
+    const DexFile::StringId& str_id = GetStringId(dex::StringIndex(mid));
     const char* str = GetStringData(str_id);
     int compare = CompareModifiedUtf8ToUtf16AsCodePointValues(str, string, length);
     if (compare > 0) {
@@ -721,12 +727,12 @@
   return nullptr;
 }
 
-const DexFile::TypeId* DexFile::FindTypeId(uint32_t string_idx) const {
+const DexFile::TypeId* DexFile::FindTypeId(dex::StringIndex string_idx) const {
   int32_t lo = 0;
   int32_t hi = NumTypeIds() - 1;
   while (hi >= lo) {
     int32_t mid = (hi + lo) / 2;
-    const TypeId& type_id = GetTypeId(mid);
+    const TypeId& type_id = GetTypeId(dex::TypeIndex(mid));
     if (string_idx > type_id.descriptor_idx_) {
       lo = mid + 1;
     } else if (string_idx < type_id.descriptor_idx_) {
@@ -738,20 +744,20 @@
   return nullptr;
 }
 
-const DexFile::ProtoId* DexFile::FindProtoId(uint16_t return_type_idx,
-                                             const uint16_t* signature_type_idxs,
+const DexFile::ProtoId* DexFile::FindProtoId(dex::TypeIndex return_type_idx,
+                                             const dex::TypeIndex* signature_type_idxs,
                                              uint32_t signature_length) const {
   int32_t lo = 0;
   int32_t hi = NumProtoIds() - 1;
   while (hi >= lo) {
     int32_t mid = (hi + lo) / 2;
     const DexFile::ProtoId& proto = GetProtoId(mid);
-    int compare = return_type_idx - proto.return_type_idx_;
+    int compare = return_type_idx.index_ - proto.return_type_idx_.index_;
     if (compare == 0) {
       DexFileParameterIterator it(*this, proto);
       size_t i = 0;
       while (it.HasNext() && i < signature_length && compare == 0) {
-        compare = signature_type_idxs[i] - it.GetTypeIdx();
+        compare = signature_type_idxs[i].index_ - it.GetTypeIdx().index_;
         it.Next();
         i++;
       }
@@ -775,8 +781,9 @@
 }
 
 // Given a signature place the type ids into the given vector
-bool DexFile::CreateTypeList(const StringPiece& signature, uint16_t* return_type_idx,
-                             std::vector<uint16_t>* param_type_idxs) const {
+bool DexFile::CreateTypeList(const StringPiece& signature,
+                             dex::TypeIndex* return_type_idx,
+                             std::vector<dex::TypeIndex>* param_type_idxs) const {
   if (signature[0] != '(') {
     return false;
   }
@@ -813,7 +820,7 @@
     if (type_id == nullptr) {
       return false;
     }
-    uint16_t type_idx = GetIndexForTypeId(*type_id);
+    dex::TypeIndex type_idx = GetIndexForTypeId(*type_id);
     if (!process_return) {
       param_type_idxs->push_back(type_idx);
     } else {
@@ -825,8 +832,8 @@
 }
 
 const Signature DexFile::CreateSignature(const StringPiece& signature) const {
-  uint16_t return_type_idx;
-  std::vector<uint16_t> param_type_indices;
+  dex::TypeIndex return_type_idx;
+  std::vector<dex::TypeIndex> param_type_indices;
   bool success = CreateTypeList(signature, &return_type_idx, &param_type_indices);
   if (!success) {
     return Signature::NoSignature();
@@ -907,7 +914,7 @@
     }
     uint32_t name_idx = DecodeUnsignedLeb128P1(&stream);
     const char* descriptor = it.GetDescriptor();
-    local_in_reg[arg_reg].name_ = StringDataByIdx(name_idx);
+    local_in_reg[arg_reg].name_ = StringDataByIdx(dex::StringIndex(name_idx));
     local_in_reg[arg_reg].descriptor_ = descriptor;
     local_in_reg[arg_reg].signature_ = nullptr;
     local_in_reg[arg_reg].start_address_ = 0;
@@ -970,9 +977,10 @@
           local_cb(context, local_in_reg[reg]);
         }
 
-        local_in_reg[reg].name_ = StringDataByIdx(name_idx);
-        local_in_reg[reg].descriptor_ = StringByTypeIdx(descriptor_idx);
-        local_in_reg[reg].signature_ = StringDataByIdx(signature_idx);
+        local_in_reg[reg].name_ = StringDataByIdx(dex::StringIndex(name_idx));
+        local_in_reg[reg].descriptor_ =
+            StringByTypeIdx(dex::TypeIndex(dchecked_integral_cast<uint16_t>(descriptor_idx)));;
+        local_in_reg[reg].signature_ = StringDataByIdx(dex::StringIndex(signature_idx));
         local_in_reg[reg].start_address_ = address;
         local_in_reg[reg].reg_ = reg;
         local_in_reg[reg].is_live_ = true;
@@ -1074,7 +1082,7 @@
         break;
       case DBG_SET_FILE: {
         uint32_t name_idx = DecodeUnsignedLeb128P1(&stream);
-        entry.source_file_ = StringDataByIdx(name_idx);
+        entry.source_file_ = StringDataByIdx(dex::StringIndex(name_idx));
         break;
       }
       default: {
@@ -1225,9 +1233,9 @@
   return result;
 }
 
-std::string DexFile::PrettyType(uint32_t type_idx) const {
-  if (type_idx >= NumTypeIds()) {
-    return StringPrintf("<<invalid-type-idx-%d>>", type_idx);
+std::string DexFile::PrettyType(dex::TypeIndex type_idx) const {
+  if (type_idx.index_ >= NumTypeIds()) {
+    return StringPrintf("<<invalid-type-idx-%d>>", type_idx.index_);
   }
   const DexFile::TypeId& type_id = GetTypeId(type_idx);
   return PrettyDescriptor(GetTypeDescriptor(type_id));
@@ -1457,14 +1465,14 @@
 
 void CatchHandlerIterator::Next() {
   if (remaining_count_ > 0) {
-    handler_.type_idx_ = DecodeUnsignedLeb128(&current_data_);
+    handler_.type_idx_ = dex::TypeIndex(DecodeUnsignedLeb128(&current_data_));
     handler_.address_  = DecodeUnsignedLeb128(&current_data_);
     remaining_count_--;
     return;
   }
 
   if (catch_all_) {
-    handler_.type_idx_ = DexFile::kDexNoIndex16;
+    handler_.type_idx_ = dex::TypeIndex(DexFile::kDexNoIndex16);
     handler_.address_  = DecodeUnsignedLeb128(&current_data_);
     catch_all_ = false;
     return;
@@ -1474,4 +1482,18 @@
   remaining_count_ = -1;
 }
 
+namespace dex {
+
+std::ostream& operator<<(std::ostream& os, const StringIndex& index) {
+  os << "StringIndex[" << index.index_ << "]";
+  return os;
+}
+
+std::ostream& operator<<(std::ostream& os, const TypeIndex& index) {
+  os << "TypeIndex[" << index.index_ << "]";
+  return os;
+}
+
+}  // namespace dex
+
 }  // namespace art
diff --git a/runtime/dex_file.h b/runtime/dex_file.h
index da9fa50..250795b 100644
--- a/runtime/dex_file.h
+++ b/runtime/dex_file.h
@@ -23,6 +23,7 @@
 
 #include "base/logging.h"
 #include "base/value_object.h"
+#include "dex_file_types.h"
 #include "globals.h"
 #include "invoke_type.h"
 #include "jni.h"
@@ -151,7 +152,7 @@
 
   // Raw type_id_item.
   struct TypeId {
-    uint32_t descriptor_idx_;  // index into string_ids
+    dex::StringIndex descriptor_idx_;  // index into string_ids
 
    private:
     DISALLOW_COPY_AND_ASSIGN(TypeId);
@@ -159,44 +160,44 @@
 
   // Raw field_id_item.
   struct FieldId {
-    uint16_t class_idx_;  // index into type_ids_ array for defining class
-    uint16_t type_idx_;  // index into type_ids_ array for field type
-    uint32_t name_idx_;  // index into string_ids_ array for field name
+    dex::TypeIndex class_idx_;   // index into type_ids_ array for defining class
+    dex::TypeIndex type_idx_;    // index into type_ids_ array for field type
+    dex::StringIndex name_idx_;  // index into string_ids_ array for field name
 
    private:
     DISALLOW_COPY_AND_ASSIGN(FieldId);
   };
 
-  // Raw method_id_item.
-  struct MethodId {
-    uint16_t class_idx_;  // index into type_ids_ array for defining class
-    uint16_t proto_idx_;  // index into proto_ids_ array for method prototype
-    uint32_t name_idx_;  // index into string_ids_ array for method name
-
-   private:
-    DISALLOW_COPY_AND_ASSIGN(MethodId);
-  };
-
   // Raw proto_id_item.
   struct ProtoId {
-    uint32_t shorty_idx_;  // index into string_ids array for shorty descriptor
-    uint16_t return_type_idx_;  // index into type_ids array for return type
-    uint16_t pad_;             // padding = 0
-    uint32_t parameters_off_;  // file offset to type_list for parameter types
+    dex::StringIndex shorty_idx_;     // index into string_ids array for shorty descriptor
+    dex::TypeIndex return_type_idx_;  // index into type_ids array for return type
+    uint16_t pad_;                    // padding = 0
+    uint32_t parameters_off_;         // file offset to type_list for parameter types
 
    private:
     DISALLOW_COPY_AND_ASSIGN(ProtoId);
   };
 
+  // Raw method_id_item.
+  struct MethodId {
+    dex::TypeIndex class_idx_;   // index into type_ids_ array for defining class
+    uint16_t proto_idx_;         // index into proto_ids_ array for method prototype
+    dex::StringIndex name_idx_;  // index into string_ids_ array for method name
+
+   private:
+    DISALLOW_COPY_AND_ASSIGN(MethodId);
+  };
+
   // Raw class_def_item.
   struct ClassDef {
-    uint16_t class_idx_;  // index into type_ids_ array for this class
+    dex::TypeIndex class_idx_;  // index into type_ids_ array for this class
     uint16_t pad1_;  // padding = 0
     uint32_t access_flags_;
-    uint16_t superclass_idx_;  // index into type_ids_ array for superclass
+    dex::TypeIndex superclass_idx_;  // index into type_ids_ array for superclass
     uint16_t pad2_;  // padding = 0
     uint32_t interfaces_off_;  // file offset to TypeList
-    uint32_t source_file_idx_;  // index into string_ids_ for source file name
+    dex::StringIndex source_file_idx_;  // index into string_ids_ for source file name
     uint32_t annotations_off_;  // file offset to annotations_directory_item
     uint32_t class_data_off_;  // file offset to class_data_item
     uint32_t static_values_off_;  // file offset to EncodedArray
@@ -225,7 +226,7 @@
 
   // Raw type_item.
   struct TypeItem {
-    uint16_t type_idx_;  // index into type_ids section
+    dex::TypeIndex type_idx_;  // index into type_ids section
 
    private:
     DISALLOW_COPY_AND_ASSIGN(TypeItem);
@@ -500,15 +501,15 @@
   }
 
   // Returns the StringId at the specified index.
-  const StringId& GetStringId(uint32_t idx) const {
-    DCHECK_LT(idx, NumStringIds()) << GetLocation();
-    return string_ids_[idx];
+  const StringId& GetStringId(dex::StringIndex idx) const {
+    DCHECK_LT(idx.index_, NumStringIds()) << GetLocation();
+    return string_ids_[idx.index_];
   }
 
-  uint32_t GetIndexForStringId(const StringId& string_id) const {
+  dex::StringIndex GetIndexForStringId(const StringId& string_id) const {
     CHECK_GE(&string_id, string_ids_) << GetLocation();
     CHECK_LT(&string_id, string_ids_ + header_->string_ids_size_) << GetLocation();
-    return &string_id - string_ids_;
+    return dex::StringIndex(&string_id - string_ids_);
   }
 
   int32_t GetStringLength(const StringId& string_id) const;
@@ -521,9 +522,9 @@
   const char* GetStringData(const StringId& string_id) const;
 
   // Index version of GetStringDataAndUtf16Length.
-  const char* StringDataAndUtf16LengthByIdx(uint32_t idx, uint32_t* utf16_length) const;
+  const char* StringDataAndUtf16LengthByIdx(dex::StringIndex idx, uint32_t* utf16_length) const;
 
-  const char* StringDataByIdx(uint32_t idx) const;
+  const char* StringDataByIdx(dex::StringIndex idx) const;
 
   // Looks up a string id for a given modified utf8 string.
   const StringId* FindStringId(const char* string) const;
@@ -540,29 +541,29 @@
   }
 
   // Returns the TypeId at the specified index.
-  const TypeId& GetTypeId(uint32_t idx) const {
-    DCHECK_LT(idx, NumTypeIds()) << GetLocation();
-    return type_ids_[idx];
+  const TypeId& GetTypeId(dex::TypeIndex idx) const {
+    DCHECK_LT(idx.index_, NumTypeIds()) << GetLocation();
+    return type_ids_[idx.index_];
   }
 
-  uint16_t GetIndexForTypeId(const TypeId& type_id) const {
+  dex::TypeIndex GetIndexForTypeId(const TypeId& type_id) const {
     CHECK_GE(&type_id, type_ids_) << GetLocation();
     CHECK_LT(&type_id, type_ids_ + header_->type_ids_size_) << GetLocation();
     size_t result = &type_id - type_ids_;
     DCHECK_LT(result, 65536U) << GetLocation();
-    return static_cast<uint16_t>(result);
+    return dex::TypeIndex(static_cast<uint16_t>(result));
   }
 
   // Get the descriptor string associated with a given type index.
-  const char* StringByTypeIdx(uint32_t idx, uint32_t* unicode_length) const;
+  const char* StringByTypeIdx(dex::TypeIndex idx, uint32_t* unicode_length) const;
 
-  const char* StringByTypeIdx(uint32_t idx) const;
+  const char* StringByTypeIdx(dex::TypeIndex idx) const;
 
   // Returns the type descriptor string of a type id.
   const char* GetTypeDescriptor(const TypeId& type_id) const;
 
   // Looks up a type for the given string index
-  const TypeId* FindTypeId(uint32_t string_idx) const;
+  const TypeId* FindTypeId(dex::StringIndex string_idx) const;
 
   // Returns the number of field identifiers in the .dex file.
   size_t NumFieldIds() const {
@@ -671,7 +672,7 @@
   const char* GetClassDescriptor(const ClassDef& class_def) const;
 
   // Looks up a class definition by its type index.
-  const ClassDef* FindClassDef(uint16_t type_idx) const;
+  const ClassDef* FindClassDef(dex::TypeIndex type_idx) const;
 
   const TypeList* GetInterfacesList(const ClassDef& class_def) const {
     if (class_def.interfaces_off_ == 0) {
@@ -711,7 +712,7 @@
   }
 
   // Returns the ProtoId at the specified index.
-  const ProtoId& GetProtoId(uint32_t idx) const {
+  const ProtoId& GetProtoId(uint16_t idx) const {
     DCHECK_LT(idx, NumProtoIds()) << GetLocation();
     return proto_ids_[idx];
   }
@@ -723,16 +724,18 @@
   }
 
   // Looks up a proto id for a given return type and signature type list
-  const ProtoId* FindProtoId(uint16_t return_type_idx,
-                             const uint16_t* signature_type_idxs, uint32_t signature_length) const;
-  const ProtoId* FindProtoId(uint16_t return_type_idx,
-                             const std::vector<uint16_t>& signature_type_idxs) const {
+  const ProtoId* FindProtoId(dex::TypeIndex return_type_idx,
+                             const dex::TypeIndex* signature_type_idxs,
+                             uint32_t signature_length) const;
+  const ProtoId* FindProtoId(dex::TypeIndex return_type_idx,
+                             const std::vector<dex::TypeIndex>& signature_type_idxs) const {
     return FindProtoId(return_type_idx, &signature_type_idxs[0], signature_type_idxs.size());
   }
 
   // Given a signature place the type ids into the given vector, returns true on success
-  bool CreateTypeList(const StringPiece& signature, uint16_t* return_type_idx,
-                      std::vector<uint16_t>* param_type_idxs) const;
+  bool CreateTypeList(const StringPiece& signature,
+                      dex::TypeIndex* return_type_idx,
+                      std::vector<dex::TypeIndex>* param_type_idxs) const;
 
   // Create a Signature from the given string signature or return Signature::NoSignature if not
   // possible.
@@ -960,7 +963,7 @@
                                void* context) const;
 
   const char* GetSourceFile(const ClassDef& class_def) const {
-    if (class_def.source_file_idx_ == 0xffffffff) {
+    if (!class_def.source_file_idx_.IsValid()) {
       return nullptr;
     } else {
       return StringDataByIdx(class_def.source_file_idx_);
@@ -1010,6 +1013,11 @@
     return oat_dex_file_;
   }
 
+  // Used by oat writer.
+  void SetOatDexFile(OatDexFile* oat_dex_file) const {
+    oat_dex_file_ = oat_dex_file;
+  }
+
   // Utility methods for reading integral values from a buffer.
   static int32_t ReadSignedInt(const uint8_t* ptr, int zwidth);
   static uint32_t ReadUnsignedInt(const uint8_t* ptr, int zwidth, bool fill_on_right);
@@ -1021,7 +1029,7 @@
   // Returns a human-readable form of the field at an index.
   std::string PrettyField(uint32_t field_idx, bool with_type = true) const;
   // Returns a human-readable form of the type at an index.
-  std::string PrettyType(uint32_t type_idx) const;
+  std::string PrettyType(dex::TypeIndex type_idx) const;
 
  private:
   static std::unique_ptr<const DexFile> OpenFile(int fd,
@@ -1138,9 +1146,10 @@
   // If this dex file was loaded from an oat file, oat_dex_file_ contains a
   // pointer to the OatDexFile it was loaded from. Otherwise oat_dex_file_ is
   // null.
-  const OatDexFile* oat_dex_file_;
+  mutable const OatDexFile* oat_dex_file_;
 
   friend class DexFileVerifierTest;
+  friend class OatWriter;
   ART_FRIEND_TEST(ClassLinkerTest, RegisterDexFileName);  // for constructor
 };
 
@@ -1165,11 +1174,11 @@
   bool HasNext() const { return pos_ < size_; }
   size_t Size() const { return size_; }
   void Next() { ++pos_; }
-  uint16_t GetTypeIdx() {
+  dex::TypeIndex GetTypeIdx() {
     return type_list_->GetTypeItem(pos_).type_idx_;
   }
   const char* GetDescriptor() {
-    return dex_file_.StringByTypeIdx(GetTypeIdx());
+    return dex_file_.StringByTypeIdx(dex::TypeIndex(GetTypeIdx()));
   }
  private:
   const DexFile& dex_file_;
@@ -1455,7 +1464,7 @@
       Init(handler_data);
     }
 
-    uint16_t GetHandlerTypeIndex() const {
+    dex::TypeIndex GetHandlerTypeIndex() const {
       return handler_.type_idx_;
     }
     uint32_t GetHandlerAddress() const {
@@ -1476,7 +1485,7 @@
     void Init(const uint8_t* handler_data);
 
     struct CatchHandlerItem {
-      uint16_t type_idx_;  // type index of the caught exception type
+      dex::TypeIndex type_idx_;  // type index of the caught exception type
       uint32_t address_;  // handler address
     } handler_;
     const uint8_t* current_data_;  // the current handler in dex file.
diff --git a/runtime/dex_file_annotations.cc b/runtime/dex_file_annotations.cc
index 0765465..52b9f11 100644
--- a/runtime/dex_file_annotations.cc
+++ b/runtime/dex_file_annotations.cc
@@ -22,6 +22,7 @@
 #include "art_method-inl.h"
 #include "class_linker-inl.h"
 #include "dex_file-inl.h"
+#include "jni_internal.h"
 #include "jvalue-inl.h"
 #include "mirror/field.h"
 #include "mirror/method.h"
@@ -89,7 +90,7 @@
     const uint8_t* annotation = annotation_item->annotation_;
     uint32_t type_index = DecodeUnsignedLeb128(&annotation);
 
-    if (strcmp(descriptor, dex_file.StringByTypeIdx(type_index)) == 0) {
+    if (strcmp(descriptor, dex_file.StringByTypeIdx(dex::TypeIndex(type_index))) == 0) {
       result = annotation_item;
       break;
     }
@@ -166,7 +167,8 @@
 
   while (size != 0) {
     uint32_t element_name_index = DecodeUnsignedLeb128(&annotation);
-    const char* element_name = dex_file.GetStringData(dex_file.GetStringId(element_name_index));
+    const char* element_name =
+        dex_file.GetStringData(dex_file.GetStringId(dex::StringIndex(element_name_index)));
     if (strcmp(name, element_name) == 0) {
       return annotation;
     }
@@ -245,7 +247,7 @@
   StackHandleScope<2> hs(self);
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
   Handle<mirror::Class> annotation_class(hs.NewHandle(
-      class_linker->ResolveType(klass->GetDexFile(), type_index, klass.Get())));
+      class_linker->ResolveType(klass->GetDexFile(), dex::TypeIndex(type_index), klass.Get())));
   if (annotation_class.Get() == nullptr) {
     LOG(INFO) << "Unable to resolve " << klass->PrettyClass() << " annotation class " << type_index;
     DCHECK(Thread::Current()->IsExceptionPending());
@@ -281,7 +283,7 @@
 
   JValue result;
   ArtMethod* create_annotation_method =
-      soa.DecodeMethod(WellKnownClasses::libcore_reflect_AnnotationFactory_createAnnotation);
+      jni::DecodeArtMethod(WellKnownClasses::libcore_reflect_AnnotationFactory_createAnnotation);
   uint32_t args[2] = { static_cast<uint32_t>(reinterpret_cast<uintptr_t>(annotation_class.Get())),
                        static_cast<uint32_t>(reinterpret_cast<uintptr_t>(h_element_array.Get())) };
   create_annotation_method->Invoke(self, args, sizeof(args), &result, "LLL");
@@ -356,7 +358,7 @@
         StackHandleScope<1> hs(self);
         Handle<mirror::DexCache> dex_cache(hs.NewHandle(klass->GetDexCache()));
         element_object = Runtime::Current()->GetClassLinker()->ResolveString(
-            klass->GetDexFile(), index, dex_cache);
+            klass->GetDexFile(), dex::StringIndex(index), dex_cache);
         set_object = true;
         if (element_object == nullptr) {
           return false;
@@ -369,13 +371,14 @@
       if (result_style == DexFile::kAllRaw) {
         annotation_value->value_.SetI(index);
       } else {
+        dex::TypeIndex type_index(index);
         element_object = Runtime::Current()->GetClassLinker()->ResolveType(
-            klass->GetDexFile(), index, klass.Get());
+            klass->GetDexFile(), type_index, klass.Get());
         set_object = true;
         if (element_object == nullptr) {
           CHECK(self->IsExceptionPending());
           if (result_style == DexFile::kAllObjects) {
-            const char* msg = dex_file.StringByTypeIdx(index);
+            const char* msg = dex_file.StringByTypeIdx(type_index);
             self->ThrowNewWrappedException("Ljava/lang/TypeNotPresentException;", msg);
             element_object = self->GetException();
             self->ClearException();
@@ -590,7 +593,7 @@
   ScopedObjectAccessUnchecked soa(self);
   StackHandleScope<5> hs(self);
   uint32_t element_name_index = DecodeUnsignedLeb128(annotation);
-  const char* name = dex_file.StringDataByIdx(element_name_index);
+  const char* name = dex_file.StringDataByIdx(dex::StringIndex(element_name_index));
   Handle<mirror::String> string_name(
       hs.NewHandle(mirror::String::AllocFromModifiedUtf8(self, name)));
 
@@ -633,7 +636,7 @@
 
   JValue result;
   ArtMethod* annotation_member_init =
-      soa.DecodeMethod(WellKnownClasses::libcore_reflect_AnnotationMember_init);
+      jni::DecodeArtMethod(WellKnownClasses::libcore_reflect_AnnotationMember_init);
   uint32_t args[5] = { static_cast<uint32_t>(reinterpret_cast<uintptr_t>(new_member.Get())),
                        static_cast<uint32_t>(reinterpret_cast<uintptr_t>(string_name.Get())),
                        static_cast<uint32_t>(reinterpret_cast<uintptr_t>(value_object.Get())),
@@ -664,7 +667,7 @@
     const uint8_t* annotation = annotation_item->annotation_;
     uint32_t type_index = DecodeUnsignedLeb128(&annotation);
     mirror::Class* resolved_class = Runtime::Current()->GetClassLinker()->ResolveType(
-        klass->GetDexFile(), type_index, klass.Get());
+        klass->GetDexFile(), dex::TypeIndex(type_index), klass.Get());
     if (resolved_class == nullptr) {
       std::string temp;
       LOG(WARNING) << StringPrintf("Unable to resolve %s annotation class %d",
@@ -1339,12 +1342,16 @@
     case kDouble:  field->SetDouble<kTransactionActive>(field->GetDeclaringClass(), jval_.d); break;
     case kNull:    field->SetObject<kTransactionActive>(field->GetDeclaringClass(), nullptr); break;
     case kString: {
-      mirror::String* resolved = linker_->ResolveString(dex_file_, jval_.i, *dex_cache_);
+      mirror::String* resolved = linker_->ResolveString(dex_file_,
+                                                        dex::StringIndex(jval_.i),
+                                                        *dex_cache_);
       field->SetObject<kTransactionActive>(field->GetDeclaringClass(), resolved);
       break;
     }
     case kType: {
-      mirror::Class* resolved = linker_->ResolveType(dex_file_, jval_.i, *dex_cache_,
+      mirror::Class* resolved = linker_->ResolveType(dex_file_,
+                                                     dex::TypeIndex(jval_.i),
+                                                     *dex_cache_,
                                                      *class_loader_);
       field->SetObject<kTransactionActive>(field->GetDeclaringClass(), resolved);
       break;
diff --git a/runtime/dex_file_test.cc b/runtime/dex_file_test.cc
index 8e1501f..0fec856 100644
--- a/runtime/dex_file_test.cc
+++ b/runtime/dex_file_test.cc
@@ -415,14 +415,14 @@
 
 TEST_F(DexFileTest, FindTypeId) {
   for (size_t i = 0; i < java_lang_dex_file_->NumTypeIds(); i++) {
-    const char* type_str = java_lang_dex_file_->StringByTypeIdx(i);
+    const char* type_str = java_lang_dex_file_->StringByTypeIdx(dex::TypeIndex(i));
     const DexFile::StringId* type_str_id = java_lang_dex_file_->FindStringId(type_str);
     ASSERT_TRUE(type_str_id != nullptr);
-    uint32_t type_str_idx = java_lang_dex_file_->GetIndexForStringId(*type_str_id);
+    dex::StringIndex type_str_idx = java_lang_dex_file_->GetIndexForStringId(*type_str_id);
     const DexFile::TypeId* type_id = java_lang_dex_file_->FindTypeId(type_str_idx);
     ASSERT_EQ(type_id, java_lang_dex_file_->FindTypeId(type_str));
     ASSERT_TRUE(type_id != nullptr);
-    EXPECT_EQ(java_lang_dex_file_->GetIndexForTypeId(*type_id), i);
+    EXPECT_EQ(java_lang_dex_file_->GetIndexForTypeId(*type_id).index_, i);
   }
 }
 
@@ -430,7 +430,7 @@
   for (size_t i = 0; i < java_lang_dex_file_->NumProtoIds(); i++) {
     const DexFile::ProtoId& to_find = java_lang_dex_file_->GetProtoId(i);
     const DexFile::TypeList* to_find_tl = java_lang_dex_file_->GetProtoParameters(to_find);
-    std::vector<uint16_t> to_find_types;
+    std::vector<dex::TypeIndex> to_find_types;
     if (to_find_tl != nullptr) {
       for (size_t j = 0; j < to_find_tl->Size(); j++) {
         to_find_types.push_back(to_find_tl->GetTypeItem(j).type_idx_);
diff --git a/runtime/dex_file_types.h b/runtime/dex_file_types.h
new file mode 100644
index 0000000..bd779c4
--- /dev/null
+++ b/runtime/dex_file_types.h
@@ -0,0 +1,115 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_DEX_FILE_TYPES_H_
+#define ART_RUNTIME_DEX_FILE_TYPES_H_
+
+#include <limits>
+#include <ostream>
+
+namespace art {
+namespace dex {
+
+class StringIndex {
+ public:
+  uint32_t index_;
+
+  constexpr StringIndex() : index_(std::numeric_limits<decltype(index_)>::max()) {}
+  explicit constexpr StringIndex(uint32_t idx) : index_(idx) {}
+
+  bool IsValid() const {
+    return index_ != std::numeric_limits<decltype(index_)>::max();
+  }
+  static StringIndex Invalid() {
+    return StringIndex(std::numeric_limits<decltype(index_)>::max());
+  }
+
+  bool operator==(const StringIndex& other) const {
+    return index_ == other.index_;
+  }
+  bool operator!=(const StringIndex& other) const {
+    return index_ != other.index_;
+  }
+  bool operator<(const StringIndex& other) const {
+    return index_ < other.index_;
+  }
+  bool operator<=(const StringIndex& other) const {
+    return index_ <= other.index_;
+  }
+  bool operator>(const StringIndex& other) const {
+    return index_ > other.index_;
+  }
+  bool operator>=(const StringIndex& other) const {
+    return index_ >= other.index_;
+  }
+};
+std::ostream& operator<<(std::ostream& os, const StringIndex& index);
+
+class TypeIndex {
+ public:
+  uint16_t index_;
+
+  constexpr TypeIndex() : index_(std::numeric_limits<decltype(index_)>::max()) {}
+  explicit constexpr TypeIndex(uint16_t idx) : index_(idx) {}
+
+  bool IsValid() const {
+    return index_ != std::numeric_limits<decltype(index_)>::max();
+  }
+  static TypeIndex Invalid() {
+    return TypeIndex(std::numeric_limits<decltype(index_)>::max());
+  }
+
+  bool operator==(const TypeIndex& other) const {
+    return index_ == other.index_;
+  }
+  bool operator!=(const TypeIndex& other) const {
+    return index_ != other.index_;
+  }
+  bool operator<(const TypeIndex& other) const {
+    return index_ < other.index_;
+  }
+  bool operator<=(const TypeIndex& other) const {
+    return index_ <= other.index_;
+  }
+  bool operator>(const TypeIndex& other) const {
+    return index_ > other.index_;
+  }
+  bool operator>=(const TypeIndex& other) const {
+    return index_ >= other.index_;
+  }
+};
+std::ostream& operator<<(std::ostream& os, const TypeIndex& index);
+
+}  // namespace dex
+}  // namespace art
+
+namespace std {
+
+template<> struct hash<art::dex::StringIndex> {
+  size_t operator()(const art::dex::StringIndex& index) const {
+    return hash<uint32_t>()(index.index_);
+  }
+};
+
+template<> struct hash<art::dex::TypeIndex> {
+  size_t operator()(const art::dex::TypeIndex& index) const {
+    return hash<uint16_t>()(index.index_);
+  }
+};
+
+}  // namespace std
+
+#endif  // ART_RUNTIME_DEX_FILE_TYPES_H_
diff --git a/runtime/dex_file_verifier.cc b/runtime/dex_file_verifier.cc
index 7b1fb95..07f0fca 100644
--- a/runtime/dex_file_verifier.cc
+++ b/runtime/dex_file_verifier.cc
@@ -19,6 +19,7 @@
 #include <inttypes.h>
 #include <zlib.h>
 
+#include <limits>
 #include <memory>
 
 #include "base/stringprintf.h"
@@ -31,6 +32,16 @@
 
 namespace art {
 
+static constexpr uint32_t kTypeIdLimit = std::numeric_limits<uint16_t>::max();
+
+static bool IsValidOrNoTypeId(uint16_t low, uint16_t high) {
+  return (high == 0) || ((high == 0xffffU) && (low == 0xffffU));
+}
+
+static bool IsValidTypeId(uint16_t low ATTRIBUTE_UNUSED, uint16_t high) {
+  return (high == 0);
+}
+
 static uint32_t MapTypeToBitMask(uint32_t map_type) {
   switch (map_type) {
     case DexFile::kDexTypeHeaderItem:               return 1 << 0;
@@ -69,20 +80,19 @@
   return true;
 }
 
-const char* DexFileVerifier::CheckLoadStringByIdx(uint32_t idx, const char* error_string) {
-  if (UNLIKELY(!CheckIndex(idx, dex_file_->NumStringIds(), error_string))) {
+const char* DexFileVerifier::CheckLoadStringByIdx(dex::StringIndex idx, const char* error_string) {
+  if (UNLIKELY(!CheckIndex(idx.index_, dex_file_->NumStringIds(), error_string))) {
     return nullptr;
   }
   return dex_file_->StringDataByIdx(idx);
 }
 
-const char* DexFileVerifier::CheckLoadStringByTypeIdx(uint32_t type_idx, const char* error_string) {
-  if (UNLIKELY(!CheckIndex(type_idx, dex_file_->NumTypeIds(), error_string))) {
+const char* DexFileVerifier::CheckLoadStringByTypeIdx(dex::TypeIndex type_idx,
+                                                      const char* error_string) {
+  if (UNLIKELY(!CheckIndex(type_idx.index_, dex_file_->NumTypeIds(), error_string))) {
     return nullptr;
   }
-  const DexFile::TypeId& type_id = dex_file_->GetTypeId(type_idx);
-  uint32_t idx = type_id.descriptor_idx_;
-  return CheckLoadStringByIdx(idx, error_string);
+  return CheckLoadStringByIdx(dex_file_->GetTypeId(type_idx).descriptor_idx_, error_string);
 }
 
 const DexFile::FieldId* DexFileVerifier::CheckLoadFieldId(uint32_t idx, const char* error_string) {
@@ -456,22 +466,22 @@
 
 #define DECODE_UNSIGNED_CHECKED_FROM_WITH_ERROR_VALUE(ptr, var, error_value)  \
   uint32_t var;                                                               \
-  if (!DecodeUnsignedLeb128Checked(&ptr, begin_ + size_, &var)) {             \
+  if (!DecodeUnsignedLeb128Checked(&(ptr), begin_ + size_, &(var))) {         \
     return error_value;                                                       \
   }
 
-#define DECODE_UNSIGNED_CHECKED_FROM(ptr, var)                      \
-  uint32_t var;                                                     \
-  if (!DecodeUnsignedLeb128Checked(&ptr, begin_ + size_, &var)) {   \
-    ErrorStringPrintf("Read out of bounds");                        \
-    return false;                                                   \
+#define DECODE_UNSIGNED_CHECKED_FROM(ptr, var)                        \
+  uint32_t var;                                                       \
+  if (!DecodeUnsignedLeb128Checked(&(ptr), begin_ + size_, &(var))) { \
+    ErrorStringPrintf("Read out of bounds");                          \
+    return false;                                                     \
   }
 
-#define DECODE_SIGNED_CHECKED_FROM(ptr, var)                      \
-  int32_t var;                                                    \
-  if (!DecodeSignedLeb128Checked(&ptr, begin_ + size_, &var)) {   \
-    ErrorStringPrintf("Read out of bounds");                      \
-    return false;                                                 \
+#define DECODE_SIGNED_CHECKED_FROM(ptr, var)                        \
+  int32_t var;                                                      \
+  if (!DecodeSignedLeb128Checked(&(ptr), begin_ + size_, &(var))) { \
+    ErrorStringPrintf("Read out of bounds");                        \
+    return false;                                                   \
   }
 
 bool DexFileVerifier::CheckAndGetHandlerOffsets(const DexFile::CodeItem* code_item,
@@ -525,7 +535,7 @@
 bool DexFileVerifier::CheckClassDataItemField(uint32_t idx,
                                               uint32_t access_flags,
                                               uint32_t class_access_flags,
-                                              uint16_t class_type_index,
+                                              dex::TypeIndex class_type_index,
                                               bool expect_static) {
   // Check for overflow.
   if (!CheckIndex(idx, header_->field_ids_size_, "class_data_item field_idx")) {
@@ -533,13 +543,13 @@
   }
 
   // Check that it's the right class.
-  uint16_t my_class_index =
+  dex::TypeIndex my_class_index =
       (reinterpret_cast<const DexFile::FieldId*>(begin_ + header_->field_ids_off_) + idx)->
           class_idx_;
   if (class_type_index != my_class_index) {
     ErrorStringPrintf("Field's class index unexpected, %" PRIu16 "vs %" PRIu16,
-                      my_class_index,
-                      class_type_index);
+                      my_class_index.index_,
+                      class_type_index.index_);
     return false;
   }
 
@@ -563,7 +573,7 @@
 bool DexFileVerifier::CheckClassDataItemMethod(uint32_t idx,
                                                uint32_t access_flags,
                                                uint32_t class_access_flags,
-                                               uint16_t class_type_index,
+                                               dex::TypeIndex class_type_index,
                                                uint32_t code_offset,
                                                std::unordered_set<uint32_t>* direct_method_indexes,
                                                bool expect_direct) {
@@ -574,13 +584,13 @@
   }
 
   // Check that it's the right class.
-  uint16_t my_class_index =
+  dex::TypeIndex my_class_index =
       (reinterpret_cast<const DexFile::MethodId*>(begin_ + header_->method_ids_off_) + idx)->
           class_idx_;
   if (class_type_index != my_class_index) {
-    ErrorStringPrintf("Method's class index unexpected, %" PRIu16 "vs %" PRIu16,
-                      my_class_index,
-                      class_type_index);
+    ErrorStringPrintf("Method's class index unexpected, %" PRIu16 " vs %" PRIu16,
+                      my_class_index.index_,
+                      class_type_index.index_);
     return false;
   }
 
@@ -789,7 +799,7 @@
 
 bool DexFileVerifier::FindClassFlags(uint32_t index,
                                      bool is_field,
-                                     uint16_t* class_type_index,
+                                     dex::TypeIndex* class_type_index,
                                      uint32_t* class_access_flags) {
   DCHECK(class_type_index != nullptr);
   DCHECK(class_access_flags != nullptr);
@@ -811,7 +821,7 @@
   }
 
   // Check if that is valid.
-  if (*class_type_index >= header_->type_ids_size_) {
+  if (class_type_index->index_ >= header_->type_ids_size_) {
     return false;
   }
 
@@ -836,7 +846,7 @@
                                                  uint32_t curr_index,
                                                  uint32_t prev_index,
                                                  bool* have_class,
-                                                 uint16_t* class_type_index,
+                                                 dex::TypeIndex* class_type_index,
                                                  uint32_t* class_access_flags) {
   if (curr_index < prev_index) {
     ErrorStringPrintf("out-of-order %s indexes %" PRIu32 " and %" PRIu32,
@@ -862,7 +872,7 @@
 template <bool kStatic>
 bool DexFileVerifier::CheckIntraClassDataItemFields(ClassDataItemIterator* it,
                                                     bool* have_class,
-                                                    uint16_t* class_type_index,
+                                                    dex::TypeIndex* class_type_index,
                                                     uint32_t* class_access_flags) {
   DCHECK(it != nullptr);
   // These calls use the raw access flags to check whether the whole dex field is valid.
@@ -897,7 +907,7 @@
     ClassDataItemIterator* it,
     std::unordered_set<uint32_t>* direct_method_indexes,
     bool* have_class,
-    uint16_t* class_type_index,
+    dex::TypeIndex* class_type_index,
     uint32_t* class_access_flags) {
   uint32_t prev_index = 0;
   for (; kDirect ? it->HasNextDirectMethod() : it->HasNextVirtualMethod(); it->Next()) {
@@ -935,7 +945,7 @@
   // So we need to explicitly search with the first item we find (either field or method), and then,
   // as the lookup is expensive, cache the result.
   bool have_class = false;
-  uint16_t class_type_index;
+  dex::TypeIndex class_type_index;
   uint32_t class_access_flags;
 
   // Check fields.
@@ -1682,26 +1692,27 @@
   return true;
 }
 
-uint16_t DexFileVerifier::FindFirstClassDataDefiner(const uint8_t* ptr, bool* success) {
+dex::TypeIndex DexFileVerifier::FindFirstClassDataDefiner(const uint8_t* ptr, bool* success) {
   ClassDataItemIterator it(*dex_file_, ptr);
   *success = true;
 
   if (it.HasNextStaticField() || it.HasNextInstanceField()) {
     LOAD_FIELD(field, it.GetMemberIndex(), "first_class_data_definer field_id",
-               *success = false; return DexFile::kDexNoIndex16)
+               *success = false; return dex::TypeIndex(DexFile::kDexNoIndex16))
     return field->class_idx_;
   }
 
   if (it.HasNextDirectMethod() || it.HasNextVirtualMethod()) {
     LOAD_METHOD(method, it.GetMemberIndex(), "first_class_data_definer method_id",
-                *success = false; return DexFile::kDexNoIndex16)
+                *success = false; return dex::TypeIndex(DexFile::kDexNoIndex16))
     return method->class_idx_;
   }
 
-  return DexFile::kDexNoIndex16;
+  return dex::TypeIndex(DexFile::kDexNoIndex16);
 }
 
-uint16_t DexFileVerifier::FindFirstAnnotationsDirectoryDefiner(const uint8_t* ptr, bool* success) {
+dex::TypeIndex DexFileVerifier::FindFirstAnnotationsDirectoryDefiner(const uint8_t* ptr,
+                                                                     bool* success) {
   const DexFile::AnnotationsDirectoryItem* item =
       reinterpret_cast<const DexFile::AnnotationsDirectoryItem*>(ptr);
   *success = true;
@@ -1709,25 +1720,25 @@
   if (item->fields_size_ != 0) {
     DexFile::FieldAnnotationsItem* field_items = (DexFile::FieldAnnotationsItem*) (item + 1);
     LOAD_FIELD(field, field_items[0].field_idx_, "first_annotations_dir_definer field_id",
-               *success = false; return DexFile::kDexNoIndex16)
+               *success = false; return dex::TypeIndex(DexFile::kDexNoIndex16))
     return field->class_idx_;
   }
 
   if (item->methods_size_ != 0) {
     DexFile::MethodAnnotationsItem* method_items = (DexFile::MethodAnnotationsItem*) (item + 1);
     LOAD_METHOD(method, method_items[0].method_idx_, "first_annotations_dir_definer method id",
-                *success = false; return DexFile::kDexNoIndex16)
+                *success = false; return dex::TypeIndex(DexFile::kDexNoIndex16))
     return method->class_idx_;
   }
 
   if (item->parameters_size_ != 0) {
     DexFile::ParameterAnnotationsItem* parameter_items = (DexFile::ParameterAnnotationsItem*) (item + 1);
     LOAD_METHOD(method, parameter_items[0].method_idx_, "first_annotations_dir_definer method id",
-                *success = false; return DexFile::kDexNoIndex16)
+                *success = false; return dex::TypeIndex(DexFile::kDexNoIndex16))
     return method->class_idx_;
   }
 
-  return DexFile::kDexNoIndex16;
+  return dex::TypeIndex(DexFile::kDexNoIndex16);
 }
 
 bool DexFileVerifier::CheckInterStringIdItem() {
@@ -1769,7 +1780,8 @@
     const DexFile::TypeId* prev_item = reinterpret_cast<const DexFile::TypeId*>(previous_item_);
     if (UNLIKELY(prev_item->descriptor_idx_ >= item->descriptor_idx_)) {
       ErrorStringPrintf("Out-of-order type_ids: %x then %x",
-                        prev_item->descriptor_idx_, item->descriptor_idx_);
+                        prev_item->descriptor_idx_.index_,
+                        item->descriptor_idx_.index_);
       return false;
     }
   }
@@ -1788,6 +1800,12 @@
     return false;
   }
 
+  // Check that return type is representable as a uint16_t;
+  if (UNLIKELY(!IsValidOrNoTypeId(item->return_type_idx_.index_, item->pad_))) {
+    ErrorStringPrintf("proto with return type idx outside uint16_t range '%x:%x'",
+                      item->pad_, item->return_type_idx_.index_);
+    return false;
+  }
   // Check the return type and advance the shorty.
   LOAD_STRING_BY_TYPE(return_type, item->return_type_idx_, "inter_proto_id_item return_type_idx")
   if (!CheckShortyDescriptorMatch(*shorty, return_type, true)) {
@@ -1797,7 +1815,8 @@
 
   DexFileParameterIterator it(*dex_file_, *item);
   while (it.HasNext() && *shorty != '\0') {
-    if (!CheckIndex(it.GetTypeIdx(), dex_file_->NumTypeIds(),
+    if (!CheckIndex(it.GetTypeIdx().index_,
+                    dex_file_->NumTypeIds(),
                     "inter_proto_id_item shorty type_idx")) {
       return false;
     }
@@ -1824,10 +1843,10 @@
       DexFileParameterIterator prev_it(*dex_file_, *prev);
 
       while (curr_it.HasNext() && prev_it.HasNext()) {
-        uint16_t prev_idx = prev_it.GetTypeIdx();
-        uint16_t curr_idx = curr_it.GetTypeIdx();
-        DCHECK_NE(prev_idx, DexFile::kDexNoIndex16);
-        DCHECK_NE(curr_idx, DexFile::kDexNoIndex16);
+        dex::TypeIndex prev_idx = prev_it.GetTypeIdx();
+        dex::TypeIndex curr_idx = curr_it.GetTypeIdx();
+        DCHECK_NE(prev_idx, dex::TypeIndex(DexFile::kDexNoIndex16));
+        DCHECK_NE(curr_idx, dex::TypeIndex(DexFile::kDexNoIndex16));
 
         if (prev_idx < curr_idx) {
           break;
@@ -1949,9 +1968,21 @@
 bool DexFileVerifier::CheckInterClassDefItem() {
   const DexFile::ClassDef* item = reinterpret_cast<const DexFile::ClassDef*>(ptr_);
 
+  // Check that class_idx_ is representable as a uint16_t;
+  if (UNLIKELY(!IsValidTypeId(item->class_idx_.index_, item->pad1_))) {
+    ErrorStringPrintf("class with type idx outside uint16_t range '%x:%x'", item->pad1_,
+                      item->class_idx_.index_);
+    return false;
+  }
+  // Check that superclass_idx_ is representable as a uint16_t;
+  if (UNLIKELY(!IsValidOrNoTypeId(item->superclass_idx_.index_, item->pad2_))) {
+    ErrorStringPrintf("class with superclass type idx outside uint16_t range '%x:%x'", item->pad2_,
+                      item->superclass_idx_.index_);
+    return false;
+  }
   // Check for duplicate class def.
   if (defined_classes_.find(item->class_idx_) != defined_classes_.end()) {
-    ErrorStringPrintf("Redefinition of class with type idx: '%d'", item->class_idx_);
+    ErrorStringPrintf("Redefinition of class with type idx: '%d'", item->class_idx_.index_);
     return false;
   }
   defined_classes_.insert(item->class_idx_);
@@ -1985,12 +2016,13 @@
     return false;
   }
 
-  if (item->superclass_idx_ != DexFile::kDexNoIndex16) {
+  if (item->superclass_idx_.IsValid()) {
     if (header_->GetVersion() >= DexFile::kClassDefinitionOrderEnforcedVersion) {
       // Check that a class does not inherit from itself directly (by having
       // the same type idx as its super class).
       if (UNLIKELY(item->superclass_idx_ == item->class_idx_)) {
-        ErrorStringPrintf("Class with same type idx as its superclass: '%d'", item->class_idx_);
+        ErrorStringPrintf("Class with same type idx as its superclass: '%d'",
+                          item->class_idx_.index_);
         return false;
       }
 
@@ -2004,8 +2036,8 @@
           ErrorStringPrintf("Invalid class definition ordering:"
                             " class with type idx: '%d' defined before"
                             " superclass with type idx: '%d'",
-                            item->class_idx_,
-                            item->superclass_idx_);
+                            item->class_idx_.index_,
+                            item->superclass_idx_.index_);
           return false;
         }
       }
@@ -2029,7 +2061,7 @@
         // same type idx as one of its immediate implemented interfaces).
         if (UNLIKELY(interfaces->GetTypeItem(i).type_idx_ == item->class_idx_)) {
           ErrorStringPrintf("Class with same type idx as implemented interface: '%d'",
-                            item->class_idx_);
+                            item->class_idx_.index_);
           return false;
         }
 
@@ -2044,8 +2076,8 @@
             ErrorStringPrintf("Invalid class definition ordering:"
                               " class with type idx: '%d' defined before"
                               " implemented interface with type idx: '%d'",
-                              item->class_idx_,
-                              interfaces->GetTypeItem(i).type_idx_);
+                              item->class_idx_.index_,
+                              interfaces->GetTypeItem(i).type_idx_.index_);
             return false;
           }
         }
@@ -2065,9 +2097,9 @@
      * practice the number of interfaces implemented by any given class is low.
      */
     for (uint32_t i = 1; i < size; i++) {
-      uint32_t idx1 = interfaces->GetTypeItem(i).type_idx_;
+      dex::TypeIndex idx1 = interfaces->GetTypeItem(i).type_idx_;
       for (uint32_t j =0; j < i; j++) {
-        uint32_t idx2 = interfaces->GetTypeItem(j).type_idx_;
+        dex::TypeIndex idx2 = interfaces->GetTypeItem(j).type_idx_;
         if (UNLIKELY(idx1 == idx2)) {
           ErrorStringPrintf("Duplicate interface: '%s'", dex_file_->StringByTypeIdx(idx1));
           return false;
@@ -2080,11 +2112,12 @@
   if (item->class_data_off_ != 0) {
     const uint8_t* data = begin_ + item->class_data_off_;
     bool success;
-    uint16_t data_definer = FindFirstClassDataDefiner(data, &success);
+    dex::TypeIndex data_definer = FindFirstClassDataDefiner(data, &success);
     if (!success) {
       return false;
     }
-    if (UNLIKELY((data_definer != item->class_idx_) && (data_definer != DexFile::kDexNoIndex16))) {
+    if (UNLIKELY((data_definer != item->class_idx_) &&
+                 (data_definer != dex::TypeIndex(DexFile::kDexNoIndex16)))) {
       ErrorStringPrintf("Invalid class_data_item");
       return false;
     }
@@ -2099,12 +2132,12 @@
     }
     const uint8_t* data = begin_ + item->annotations_off_;
     bool success;
-    uint16_t annotations_definer = FindFirstAnnotationsDirectoryDefiner(data, &success);
+    dex::TypeIndex annotations_definer = FindFirstAnnotationsDirectoryDefiner(data, &success);
     if (!success) {
       return false;
     }
     if (UNLIKELY((annotations_definer != item->class_idx_) &&
-                 (annotations_definer != DexFile::kDexNoIndex16))) {
+                 (annotations_definer != dex::TypeIndex(DexFile::kDexNoIndex16)))) {
       ErrorStringPrintf("Invalid annotations_directory_item");
       return false;
     }
@@ -2165,7 +2198,7 @@
 bool DexFileVerifier::CheckInterClassDataItem() {
   ClassDataItemIterator it(*dex_file_, ptr_);
   bool success;
-  uint16_t defining_class = FindFirstClassDataDefiner(ptr_, &success);
+  dex::TypeIndex defining_class = FindFirstClassDataDefiner(ptr_, &success);
   if (!success) {
     return false;
   }
@@ -2197,7 +2230,7 @@
   const DexFile::AnnotationsDirectoryItem* item =
       reinterpret_cast<const DexFile::AnnotationsDirectoryItem*>(ptr_);
   bool success;
-  uint16_t defining_class = FindFirstAnnotationsDirectoryDefiner(ptr_, &success);
+  dex::TypeIndex defining_class = FindFirstAnnotationsDirectoryDefiner(ptr_, &success);
   if (!success) {
     return false;
   }
@@ -2315,6 +2348,14 @@
         break;
       }
       case DexFile::kDexTypeClassDefItem: {
+        // There shouldn't be more class definitions than type ids allow.
+        // This check should be redundant, since there are checks that the
+        // class_idx_ is within range and that there is only one definition
+        // for a given type id.
+        if (i > kTypeIdLimit) {
+          ErrorStringPrintf("Too many class definition items");
+          return false;
+        }
         if (!CheckInterClassDefItem()) {
           return false;
         }
@@ -2333,6 +2374,14 @@
         break;
       }
       case DexFile::kDexTypeClassDataItem: {
+        // There shouldn't be more class data than type ids allow.
+        // This check should be redundant, since there are checks that the
+        // class_idx_ is within range and that there is only one definition
+        // for a given type id.
+        if (i > kTypeIdLimit) {
+          ErrorStringPrintf("Too many class data items");
+          return false;
+        }
         if (!CheckInterClassDataItem()) {
           return false;
         }
@@ -2450,14 +2499,15 @@
 
 static std::string GetStringOrError(const uint8_t* const begin,
                                     const DexFile::Header* const header,
-                                    uint32_t string_idx) {
+                                    dex::StringIndex string_idx) {
   // The `string_idx` is not guaranteed to be valid yet.
-  if (header->string_ids_size_ <= string_idx) {
+  if (header->string_ids_size_ <= string_idx.index_) {
     return "(error)";
   }
 
   const DexFile::StringId* string_id =
-      reinterpret_cast<const DexFile::StringId*>(begin + header->string_ids_off_) + string_idx;
+      reinterpret_cast<const DexFile::StringId*>(begin + header->string_ids_off_)
+          + string_idx.index_;
 
   // Assume that the data is OK at this point. String data has been checked at this point.
 
@@ -2471,15 +2521,15 @@
 
 static std::string GetClassOrError(const uint8_t* const begin,
                                    const DexFile::Header* const header,
-                                   uint32_t class_idx) {
+                                   dex::TypeIndex class_idx) {
   // The `class_idx` is either `FieldId::class_idx_` or `MethodId::class_idx_` and
   // it has already been checked in `DexFileVerifier::CheckClassDataItemField()`
   // or `DexFileVerifier::CheckClassDataItemMethod()`, respectively, to match
   // a valid defining class.
-  CHECK_LT(class_idx, header->type_ids_size_);
+  CHECK_LT(class_idx.index_, header->type_ids_size_);
 
   const DexFile::TypeId* type_id =
-      reinterpret_cast<const DexFile::TypeId*>(begin + header->type_ids_off_) + class_idx;
+      reinterpret_cast<const DexFile::TypeId*>(begin + header->type_ids_off_) + class_idx.index_;
 
   // Assume that the data is OK at this point. Type id offsets have been checked at this point.
 
@@ -2614,7 +2664,7 @@
   }
   uint32_t string_idx =
       (reinterpret_cast<const DexFile::MethodId*>(begin + header->method_ids_off_) +
-          method_index)->name_idx_;
+          method_index)->name_idx_.index_;
   if (string_idx >= header->string_ids_size_) {
     *error_msg = "String index not available for method flags verification";
     return false;
diff --git a/runtime/dex_file_verifier.h b/runtime/dex_file_verifier.h
index 133e432..0327367 100644
--- a/runtime/dex_file_verifier.h
+++ b/runtime/dex_file_verifier.h
@@ -20,6 +20,7 @@
 #include <unordered_set>
 
 #include "dex_file.h"
+#include "dex_file_types.h"
 #include "safe_map.h"
 
 namespace art {
@@ -76,12 +77,12 @@
   bool CheckClassDataItemField(uint32_t idx,
                                uint32_t access_flags,
                                uint32_t class_access_flags,
-                               uint16_t class_type_index,
+                               dex::TypeIndex class_type_index,
                                bool expect_static);
   bool CheckClassDataItemMethod(uint32_t idx,
                                 uint32_t access_flags,
                                 uint32_t class_access_flags,
-                                uint16_t class_type_index,
+                                dex::TypeIndex class_type_index,
                                 uint32_t code_offset,
                                 std::unordered_set<uint32_t>* direct_method_indexes,
                                 bool expect_direct);
@@ -90,7 +91,7 @@
                                   uint32_t curr_index,
                                   uint32_t prev_index,
                                   bool* have_class,
-                                  uint16_t* class_type_index,
+                                  dex::TypeIndex* class_type_index,
                                   uint32_t* class_access_flags);
 
   bool CheckPadding(size_t offset, uint32_t aligned_offset);
@@ -104,7 +105,7 @@
   template <bool kStatic>
   bool CheckIntraClassDataItemFields(ClassDataItemIterator* it,
                                      bool* have_class,
-                                     uint16_t* class_type_index,
+                                     dex::TypeIndex* class_type_index,
                                      uint32_t* class_access_flags);
   // Check all methods of the given type from the given iterator. Load the class data from the first
   // method, if necessary (and return it), or use the given values.
@@ -112,7 +113,7 @@
   bool CheckIntraClassDataItemMethods(ClassDataItemIterator* it,
                                       std::unordered_set<uint32_t>* direct_method_indexes,
                                       bool* have_class,
-                                      uint16_t* class_type_index,
+                                      dex::TypeIndex* class_type_index,
                                       uint32_t* class_access_flags);
 
   bool CheckIntraCodeItem();
@@ -130,8 +131,8 @@
 
   // Note: as sometimes kDexNoIndex16, being 0xFFFF, is a valid return value, we need an
   // additional out parameter to signal any errors loading an index.
-  uint16_t FindFirstClassDataDefiner(const uint8_t* ptr, bool* success);
-  uint16_t FindFirstAnnotationsDirectoryDefiner(const uint8_t* ptr, bool* success);
+  dex::TypeIndex FindFirstClassDataDefiner(const uint8_t* ptr, bool* success);
+  dex::TypeIndex FindFirstAnnotationsDirectoryDefiner(const uint8_t* ptr, bool* success);
 
   bool CheckInterStringIdItem();
   bool CheckInterTypeIdItem();
@@ -149,8 +150,8 @@
 
   // Load a string by (type) index. Checks whether the index is in bounds, printing the error if
   // not. If there is an error, null is returned.
-  const char* CheckLoadStringByIdx(uint32_t idx, const char* error_fmt);
-  const char* CheckLoadStringByTypeIdx(uint32_t type_idx, const char* error_fmt);
+  const char* CheckLoadStringByIdx(dex::StringIndex idx, const char* error_fmt);
+  const char* CheckLoadStringByTypeIdx(dex::TypeIndex type_idx, const char* error_fmt);
 
   // Load a field/method Id by index. Checks whether the index is in bounds, printing the error if
   // not. If there is an error, null is returned.
@@ -168,7 +169,7 @@
   // linear search. The output values should thus be cached by the caller.
   bool FindClassFlags(uint32_t index,
                       bool is_field,
-                      uint16_t* class_type_index,
+                      dex::TypeIndex* class_type_index,
                       uint32_t* class_access_flags);
 
   // Check validity of the given access flags, interpreted for a field in the context of a class
diff --git a/runtime/dex_file_verifier_test.cc b/runtime/dex_file_verifier_test.cc
index e392870..f14b1d5 100644
--- a/runtime/dex_file_verifier_test.cc
+++ b/runtime/dex_file_verifier_test.cc
@@ -26,6 +26,7 @@
 #include "base/macros.h"
 #include "common_runtime_test.h"
 #include "dex_file-inl.h"
+#include "dex_file_types.h"
 #include "leb128.h"
 #include "scoped_thread_state_change-inl.h"
 #include "thread-inl.h"
@@ -58,7 +59,7 @@
 
   void VerifyModification(const char* dex_file_base64_content,
                           const char* location,
-                          std::function<void(DexFile*)> f,
+                          const std::function<void(DexFile*)>& f,
                           const char* expected_error) {
     size_t length;
     std::unique_ptr<uint8_t[]> dex_bytes(DecodeBase64(dex_file_base64_content, &length));
@@ -155,7 +156,7 @@
       "method_id_class_idx",
       [](DexFile* dex_file) {
         DexFile::MethodId* method_id = const_cast<DexFile::MethodId*>(&dex_file->GetMethodId(0));
-        method_id->class_idx_ = 0xFF;
+        method_id->class_idx_ = dex::TypeIndex(0xFF);
       },
       "could not find declaring class for direct method index 0");
 
@@ -175,7 +176,7 @@
       "method_id_name_idx",
       [](DexFile* dex_file) {
         DexFile::MethodId* method_id = const_cast<DexFile::MethodId*>(&dex_file->GetMethodId(0));
-        method_id->name_idx_ = 0xFF;
+        method_id->name_idx_ = dex::StringIndex(0xFF);
       },
       "String index not available for method flags verification");
 }
@@ -246,7 +247,7 @@
 
   while (it.HasNextDirectMethod() || it.HasNextVirtualMethod()) {
     uint32_t method_index = it.GetMemberIndex();
-    uint32_t name_index = dex_file->GetMethodId(method_index).name_idx_;
+    dex::StringIndex name_index = dex_file->GetMethodId(method_index).name_idx_;
     const DexFile::StringId& string_id = dex_file->GetStringId(name_index);
     const char* str = dex_file->GetStringData(string_id);
     if (strcmp(name, str) == 0) {
@@ -634,7 +635,7 @@
         uint32_t method_idx;
         FindMethodData(dex_file, "foo", &method_idx);
         auto* method_id = const_cast<DexFile::MethodId*>(&dex_file->GetMethodId(method_idx));
-        method_id->name_idx_ = dex_file->NumStringIds();
+        method_id->name_idx_ = dex::StringIndex(dex_file->NumStringIds());
       },
       "Method may have only one of public/protected/private, LMethodFlags;.(error)");
 }
@@ -855,7 +856,7 @@
 
   while (it.HasNextStaticField() || it.HasNextInstanceField()) {
     uint32_t field_index = it.GetMemberIndex();
-    uint32_t name_index = dex_file->GetFieldId(field_index).name_idx_;
+    dex::StringIndex name_index = dex_file->GetFieldId(field_index).name_idx_;
     const DexFile::StringId& string_id = dex_file->GetStringId(name_index);
     const char* str = dex_file->GetStringData(string_id);
     if (strcmp(name, str) == 0) {
@@ -1450,12 +1451,12 @@
             // Swap the proto parameters and shorties to break the ordering.
             std::swap(const_cast<uint32_t&>(proto1.parameters_off_),
                       const_cast<uint32_t&>(proto2.parameters_off_));
-            std::swap(const_cast<uint32_t&>(proto1.shorty_idx_),
-                      const_cast<uint32_t&>(proto2.shorty_idx_));
+            std::swap(const_cast<dex::StringIndex&>(proto1.shorty_idx_),
+                      const_cast<dex::StringIndex&>(proto2.shorty_idx_));
           } else {
             // Copy the proto parameters and shorty to create duplicate proto id.
             const_cast<uint32_t&>(proto1.parameters_off_) = proto2.parameters_off_;
-            const_cast<uint32_t&>(proto1.shorty_idx_) = proto2.shorty_idx_;
+            const_cast<dex::StringIndex&>(proto1.shorty_idx_) = proto2.shorty_idx_;
           }
         },
         "Out-of-order proto_id arguments");
diff --git a/runtime/dex_instruction.cc b/runtime/dex_instruction.cc
index c766b54..9902389 100644
--- a/runtime/dex_instruction.cc
+++ b/runtime/dex_instruction.cc
@@ -191,10 +191,11 @@
           if (file != nullptr) {
             uint32_t string_idx = VRegB_21c();
             if (string_idx < file->NumStringIds()) {
-              os << StringPrintf("const-string v%d, %s // string@%d",
-                                 VRegA_21c(),
-                                 PrintableString(file->StringDataByIdx(string_idx)).c_str(),
-                                 string_idx);
+              os << StringPrintf(
+                  "const-string v%d, %s // string@%d",
+                  VRegA_21c(),
+                  PrintableString(file->StringDataByIdx(dex::StringIndex(string_idx))).c_str(),
+                  string_idx);
             } else {
               os << StringPrintf("const-string v%d, <<invalid-string-idx-%d>> // string@%d",
                                  VRegA_21c(),
@@ -208,9 +209,9 @@
         case CONST_CLASS:
         case NEW_INSTANCE:
           if (file != nullptr) {
-            uint32_t type_idx = VRegB_21c();
-            os << opcode << " v" << static_cast<int>(VRegA_21c()) << ", " << file->PrettyType(type_idx)
-               << " // type@" << type_idx;
+            dex::TypeIndex type_idx(VRegB_21c());
+            os << opcode << " v" << static_cast<int>(VRegA_21c()) << ", "
+               << file->PrettyType(type_idx) << " // type@" << type_idx;
             break;
           }
           FALLTHROUGH_INTENDED;
@@ -302,17 +303,19 @@
           FALLTHROUGH_INTENDED;
         case INSTANCE_OF:
           if (file != nullptr) {
-            uint32_t type_idx = VRegC_22c();
-            os << opcode << " v" << static_cast<int>(VRegA_22c()) << ", v" << static_cast<int>(VRegB_22c()) << ", "
-               << file->PrettyType(type_idx) << " // type@" << type_idx;
+            dex::TypeIndex type_idx(VRegC_22c());
+            os << opcode << " v" << static_cast<int>(VRegA_22c()) << ", v"
+               << static_cast<int>(VRegB_22c()) << ", " << file->PrettyType(type_idx)
+               << " // type@" << type_idx.index_;
             break;
           }
           FALLTHROUGH_INTENDED;
         case NEW_ARRAY:
           if (file != nullptr) {
-            uint32_t type_idx = VRegC_22c();
-            os << opcode << " v" << static_cast<int>(VRegA_22c()) << ", v" << static_cast<int>(VRegB_22c()) << ", "
-               << file->PrettyType(type_idx) << " // type@" << type_idx;
+            dex::TypeIndex type_idx(VRegC_22c());
+            os << opcode << " v" << static_cast<int>(VRegA_22c()) << ", v"
+               << static_cast<int>(VRegB_22c()) << ", " << file->PrettyType(type_idx)
+               << " // type@" << type_idx.index_;
             break;
           }
           FALLTHROUGH_INTENDED;
@@ -331,11 +334,12 @@
         uint32_t string_idx = VRegB_31c();
         if (file != nullptr) {
           if (string_idx < file->NumStringIds()) {
-            os << StringPrintf("%s v%d, %s // string@%d",
-                               opcode,
-                               VRegA_31c(),
-                               PrintableString(file->StringDataByIdx(string_idx)).c_str(),
-                               string_idx);
+            os << StringPrintf(
+                "%s v%d, %s // string@%d",
+                opcode,
+                VRegA_31c(),
+                PrintableString(file->StringDataByIdx(dex::StringIndex(string_idx))).c_str(),
+                string_idx);
           } else {
             os << StringPrintf("%s v%d, <<invalid-string-idx-%d>> // string@%d",
                                opcode,
diff --git a/runtime/dex_instruction.h b/runtime/dex_instruction.h
index 8eb1a79..578550c 100644
--- a/runtime/dex_instruction.h
+++ b/runtime/dex_instruction.h
@@ -189,6 +189,7 @@
     kVerifyVarArgRangeNonZero = 0x100000,
     kVerifyRuntimeOnly        = 0x200000,
     kVerifyError              = 0x400000,
+    kVerifyRegHPrototype      = 0x800000
   };
 
   static constexpr uint32_t kMaxVarArgRegs = 5;
@@ -480,6 +481,18 @@
     insns[1] = val;
   }
 
+  void SetVRegA_21c(uint8_t val) {
+    DCHECK(FormatOf(Opcode()) == k21c);
+    uint16_t* insns = reinterpret_cast<uint16_t*>(this);
+    insns[0] = (val << 8) | (insns[0] & 0x00ff);
+  }
+
+  void SetVRegB_21c(uint16_t val) {
+    DCHECK(FormatOf(Opcode()) == k21c);
+    uint16_t* insns = reinterpret_cast<uint16_t*>(this);
+    insns[1] = val;
+  }
+
   // Returns the format of the given opcode.
   static Format FormatOf(Code opcode) {
     return kInstructionFormats[opcode];
@@ -567,6 +580,10 @@
         kVerifyRegCNewArray | kVerifyRegCType | kVerifyRegCWide));
   }
 
+  int GetVerifyTypeArgumentH() const {
+    return (kInstructionVerifyFlags[Opcode()] & kVerifyRegHPrototype);
+  }
+
   int GetVerifyExtraFlags() const {
     return (kInstructionVerifyFlags[Opcode()] & (kVerifyArrayData | kVerifyBranchTarget |
         kVerifySwitchTargets | kVerifyVarArg | kVerifyVarArgNonZero | kVerifyVarArgRange |
diff --git a/runtime/dex_instruction_list.h b/runtime/dex_instruction_list.h
index e537afe..ca2ce1d 100644
--- a/runtime/dex_instruction_list.h
+++ b/runtime/dex_instruction_list.h
@@ -269,8 +269,8 @@
   V(0xF7, UNUSED_F7, "unused-f7", k10x, kIndexUnknown, 0, kVerifyError) \
   V(0xF8, UNUSED_F8, "unused-f8", k10x, kIndexUnknown, 0, kVerifyError) \
   V(0xF9, UNUSED_F9, "unused-f9", k10x, kIndexUnknown, 0, kVerifyError) \
-  V(0xFA, INVOKE_POLYMORPHIC, "invoke-polymorphic", k45cc, kIndexMethodAndProtoRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArgNonZero | kExperimental) \
-  V(0xFB, INVOKE_POLYMORPHIC_RANGE, "invoke-polymorphic/range", k4rcc, kIndexMethodAndProtoRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArgRangeNonZero | kExperimental) \
+  V(0xFA, INVOKE_POLYMORPHIC, "invoke-polymorphic", k45cc, kIndexMethodAndProtoRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArgNonZero | kVerifyRegHPrototype) \
+  V(0xFB, INVOKE_POLYMORPHIC_RANGE, "invoke-polymorphic/range", k4rcc, kIndexMethodAndProtoRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArgRangeNonZero | kVerifyRegHPrototype) \
   V(0xFC, UNUSED_FC, "unused-fc", k10x, kIndexUnknown, 0, kVerifyError) \
   V(0xFD, UNUSED_FD, "unused-fd", k10x, kIndexUnknown, 0, kVerifyError) \
   V(0xFE, UNUSED_FE, "unused-fe", k10x, kIndexUnknown, 0, kVerifyError) \
diff --git a/runtime/entrypoints/entrypoint_utils-inl.h b/runtime/entrypoints/entrypoint_utils-inl.h
index ed60f59..f6eeffc 100644
--- a/runtime/entrypoints/entrypoint_utils-inl.h
+++ b/runtime/entrypoints/entrypoint_utils-inl.h
@@ -129,7 +129,7 @@
 
 template <const bool kAccessCheck>
 ALWAYS_INLINE
-inline mirror::Class* CheckObjectAlloc(uint32_t type_idx,
+inline mirror::Class* CheckObjectAlloc(dex::TypeIndex type_idx,
                                        ArtMethod* method,
                                        Thread* self,
                                        bool* slow_path) {
@@ -219,7 +219,7 @@
 // check.
 template <bool kAccessCheck, bool kInstrumented>
 ALWAYS_INLINE
-inline mirror::Object* AllocObjectFromCode(uint32_t type_idx,
+inline mirror::Object* AllocObjectFromCode(dex::TypeIndex type_idx,
                                            ArtMethod* method,
                                            Thread* self,
                                            gc::AllocatorType allocator_type) {
@@ -275,7 +275,7 @@
 
 template <bool kAccessCheck>
 ALWAYS_INLINE
-inline mirror::Class* CheckArrayAlloc(uint32_t type_idx,
+inline mirror::Class* CheckArrayAlloc(dex::TypeIndex type_idx,
                                       int32_t component_count,
                                       ArtMethod* method,
                                       bool* slow_path) {
@@ -313,7 +313,7 @@
 // check.
 template <bool kAccessCheck, bool kInstrumented>
 ALWAYS_INLINE
-inline mirror::Array* AllocArrayFromCode(uint32_t type_idx,
+inline mirror::Array* AllocArrayFromCode(dex::TypeIndex type_idx,
                                          int32_t component_count,
                                          ArtMethod* method,
                                          Thread* self,
@@ -562,7 +562,7 @@
       StackHandleScope<2> hs2(self);
       HandleWrapperObjPtr<mirror::Object> h_this(hs2.NewHandleWrapper(this_object));
       Handle<mirror::Class> h_referring_class(hs2.NewHandle(referrer->GetDeclaringClass()));
-      const uint16_t method_type_idx =
+      const dex::TypeIndex method_type_idx =
           h_referring_class->GetDexFile().GetMethodId(method_idx).class_idx_;
       mirror::Class* method_reference_class = class_linker->ResolveType(method_type_idx, referrer);
       if (UNLIKELY(method_reference_class == nullptr)) {
@@ -758,7 +758,8 @@
     return resolved_method;
   } else if (type == kSuper) {
     // TODO This lookup is rather slow.
-    uint16_t method_type_idx = referring_class->GetDexFile().GetMethodId(method_idx).class_idx_;
+    dex::TypeIndex method_type_idx =
+        referring_class->GetDexFile().GetMethodId(method_idx).class_idx_;
     mirror::Class* method_reference_class =
         referring_class->GetDexCache()->GetResolvedType(method_type_idx);
     if (method_reference_class == nullptr) {
@@ -788,8 +789,11 @@
   }
 }
 
-inline mirror::Class* ResolveVerifyAndClinit(uint32_t type_idx, ArtMethod* referrer, Thread* self,
-                                             bool can_run_clinit, bool verify_access) {
+inline mirror::Class* ResolveVerifyAndClinit(dex::TypeIndex type_idx,
+                                             ArtMethod* referrer,
+                                             Thread* self,
+                                             bool can_run_clinit,
+                                             bool verify_access) {
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
   mirror::Class* klass = class_linker->ResolveType(type_idx, referrer);
   if (UNLIKELY(klass == nullptr)) {
@@ -822,7 +826,7 @@
   return h_class.Get();
 }
 
-inline mirror::String* ResolveStringFromCode(ArtMethod* referrer, uint32_t string_idx) {
+inline mirror::String* ResolveStringFromCode(ArtMethod* referrer, dex::StringIndex string_idx) {
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
   return class_linker->ResolveString(string_idx, referrer);
 }
diff --git a/runtime/entrypoints/entrypoint_utils.cc b/runtime/entrypoints/entrypoint_utils.cc
index 1ccb4b0..5390165 100644
--- a/runtime/entrypoints/entrypoint_utils.cc
+++ b/runtime/entrypoints/entrypoint_utils.cc
@@ -38,7 +38,7 @@
 
 namespace art {
 
-static inline mirror::Class* CheckFilledNewArrayAlloc(uint32_t type_idx,
+static inline mirror::Class* CheckFilledNewArrayAlloc(dex::TypeIndex type_idx,
                                                       int32_t component_count,
                                                       ArtMethod* referrer,
                                                       Thread* self,
@@ -82,10 +82,12 @@
 }
 
 // Helper function to allocate array for FILLED_NEW_ARRAY.
-mirror::Array* CheckAndAllocArrayFromCode(uint32_t type_idx, int32_t component_count,
-                                          ArtMethod* referrer, Thread* self,
+mirror::Array* CheckAndAllocArrayFromCode(dex::TypeIndex type_idx,
+                                          int32_t component_count,
+                                          ArtMethod* referrer,
+                                          Thread* self,
                                           bool access_check,
-                                          gc::AllocatorType /* allocator_type */) {
+                                          gc::AllocatorType allocator_type ATTRIBUTE_UNUSED) {
   mirror::Class* klass = CheckFilledNewArrayAlloc(type_idx, component_count, referrer, self,
                                                   access_check);
   if (UNLIKELY(klass == nullptr)) {
@@ -101,12 +103,13 @@
 }
 
 // Helper function to allocate array for FILLED_NEW_ARRAY.
-mirror::Array* CheckAndAllocArrayFromCodeInstrumented(uint32_t type_idx,
-                                                      int32_t component_count,
-                                                      ArtMethod* referrer,
-                                                      Thread* self,
-                                                      bool access_check,
-                                                      gc::AllocatorType /* allocator_type */) {
+mirror::Array* CheckAndAllocArrayFromCodeInstrumented(
+    dex::TypeIndex type_idx,
+    int32_t component_count,
+    ArtMethod* referrer,
+    Thread* self,
+    bool access_check,
+    gc::AllocatorType allocator_type ATTRIBUTE_UNUSED) {
   mirror::Class* klass = CheckFilledNewArrayAlloc(type_idx, component_count, referrer, self,
                                                   access_check);
   if (UNLIKELY(klass == nullptr)) {
diff --git a/runtime/entrypoints/entrypoint_utils.h b/runtime/entrypoints/entrypoint_utils.h
index bcddfb0..7cc136e 100644
--- a/runtime/entrypoints/entrypoint_utils.h
+++ b/runtime/entrypoints/entrypoint_utils.h
@@ -23,6 +23,7 @@
 #include "base/macros.h"
 #include "base/mutex.h"
 #include "dex_instruction.h"
+#include "dex_file_types.h"
 #include "gc/allocator_type.h"
 #include "handle.h"
 #include "invoke_type.h"
@@ -45,7 +46,7 @@
 class Thread;
 
 template <const bool kAccessCheck>
-ALWAYS_INLINE inline mirror::Class* CheckObjectAlloc(uint32_t type_idx,
+ALWAYS_INLINE inline mirror::Class* CheckObjectAlloc(dex::TypeIndex type_idx,
                                                      ArtMethod* method,
                                                      Thread* self,
                                                      bool* slow_path)
@@ -63,7 +64,7 @@
 // When verification/compiler hasn't been able to verify access, optionally perform an access
 // check.
 template <bool kAccessCheck, bool kInstrumented>
-ALWAYS_INLINE inline mirror::Object* AllocObjectFromCode(uint32_t type_idx,
+ALWAYS_INLINE inline mirror::Object* AllocObjectFromCode(dex::TypeIndex type_idx,
                                                          ArtMethod* method,
                                                          Thread* self,
                                                          gc::AllocatorType allocator_type)
@@ -89,7 +90,7 @@
 
 
 template <bool kAccessCheck>
-ALWAYS_INLINE inline mirror::Class* CheckArrayAlloc(uint32_t type_idx,
+ALWAYS_INLINE inline mirror::Class* CheckArrayAlloc(dex::TypeIndex type_idx,
                                                     int32_t component_count,
                                                     ArtMethod* method,
                                                     bool* slow_path)
@@ -101,7 +102,7 @@
 // When verification/compiler hasn't been able to verify access, optionally perform an access
 // check.
 template <bool kAccessCheck, bool kInstrumented>
-ALWAYS_INLINE inline mirror::Array* AllocArrayFromCode(uint32_t type_idx,
+ALWAYS_INLINE inline mirror::Array* AllocArrayFromCode(dex::TypeIndex type_idx,
                                                        int32_t component_count,
                                                        ArtMethod* method,
                                                        Thread* self,
@@ -118,19 +119,21 @@
     REQUIRES_SHARED(Locks::mutator_lock_)
     REQUIRES(!Roles::uninterruptible_);
 
-extern mirror::Array* CheckAndAllocArrayFromCode(uint32_t type_idx, int32_t component_count,
-                                                 ArtMethod* method, Thread* self,
-                                                 bool access_check,
-                                                 gc::AllocatorType allocator_type)
+mirror::Array* CheckAndAllocArrayFromCode(dex::TypeIndex type_idx,
+                                          int32_t component_count,
+                                          ArtMethod* method,
+                                          Thread* self,
+                                          bool access_check,
+                                          gc::AllocatorType allocator_type)
     REQUIRES_SHARED(Locks::mutator_lock_)
     REQUIRES(!Roles::uninterruptible_);
 
-extern mirror::Array* CheckAndAllocArrayFromCodeInstrumented(uint32_t type_idx,
-                                                             int32_t component_count,
-                                                             ArtMethod* method,
-                                                             Thread* self,
-                                                             bool access_check,
-                                                             gc::AllocatorType allocator_type)
+mirror::Array* CheckAndAllocArrayFromCodeInstrumented(dex::TypeIndex type_idx,
+                                                      int32_t component_count,
+                                                      ArtMethod* method,
+                                                      Thread* self,
+                                                      bool access_check,
+                                                      gc::AllocatorType allocator_type)
     REQUIRES_SHARED(Locks::mutator_lock_)
     REQUIRES(!Roles::uninterruptible_);
 
@@ -177,7 +180,7 @@
                                  InvokeType type)
     REQUIRES_SHARED(Locks::mutator_lock_);
 
-inline mirror::Class* ResolveVerifyAndClinit(uint32_t type_idx,
+inline mirror::Class* ResolveVerifyAndClinit(dex::TypeIndex type_idx,
                                              ArtMethod* referrer,
                                              Thread* self,
                                              bool can_run_clinit,
@@ -185,7 +188,7 @@
     REQUIRES_SHARED(Locks::mutator_lock_)
     REQUIRES(!Roles::uninterruptible_);
 
-inline mirror::String* ResolveStringFromCode(ArtMethod* referrer, uint32_t string_idx)
+inline mirror::String* ResolveStringFromCode(ArtMethod* referrer, dex::StringIndex string_idx)
     REQUIRES_SHARED(Locks::mutator_lock_)
     REQUIRES(!Roles::uninterruptible_);
 
diff --git a/runtime/entrypoints/quick/quick_alloc_entrypoints.cc b/runtime/entrypoints/quick/quick_alloc_entrypoints.cc
index dc5fd07..82bb8e5 100644
--- a/runtime/entrypoints/quick/quick_alloc_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_alloc_entrypoints.cc
@@ -19,6 +19,7 @@
 #include "art_method-inl.h"
 #include "base/enums.h"
 #include "callee_save_frame.h"
+#include "dex_file_types.h"
 #include "entrypoints/entrypoint_utils-inl.h"
 #include "mirror/class-inl.h"
 #include "mirror/object_array-inl.h"
@@ -34,7 +35,8 @@
     REQUIRES_SHARED(Locks::mutator_lock_) { \
   ScopedQuickEntrypointChecks sqec(self); \
   if (kUseTlabFastPath && !(instrumented_bool) && (allocator_type) == gc::kAllocatorTypeTLAB) { \
-    mirror::Class* klass = method->GetDexCacheResolvedType<false>(type_idx, kRuntimePointerSize); \
+    mirror::Class* klass = method->GetDexCacheResolvedType<false>(dex::TypeIndex(type_idx), \
+                                                                  kRuntimePointerSize); \
     if (LIKELY(klass != nullptr && klass->IsInitialized() && !klass->IsFinalizable())) { \
       size_t byte_count = klass->GetObjectSize(); \
       byte_count = RoundUp(byte_count, gc::space::BumpPointerSpace::kAlignment); \
@@ -43,18 +45,18 @@
         obj = self->AllocTlab(byte_count); \
         DCHECK(obj != nullptr) << "AllocTlab can't fail"; \
         obj->SetClass(klass); \
-        if (kUseBakerOrBrooksReadBarrier) { \
-          if (kUseBrooksReadBarrier) { \
-            obj->SetReadBarrierPointer(obj); \
-          } \
-          obj->AssertReadBarrierPointer(); \
+        if (kUseBakerReadBarrier) { \
+          obj->AssertReadBarrierState(); \
         } \
         QuasiAtomic::ThreadFenceForConstructor(); \
         return obj; \
       } \
     } \
   } \
-  return AllocObjectFromCode<false, instrumented_bool>(type_idx, method, self, allocator_type); \
+  return AllocObjectFromCode<false, instrumented_bool>(dex::TypeIndex(type_idx), \
+                                                       method, \
+                                                       self, \
+                                                       allocator_type); \
 } \
 extern "C" mirror::Object* artAllocObjectFromCodeResolved##suffix##suffix2( \
     mirror::Class* klass, ArtMethod* method ATTRIBUTE_UNUSED, Thread* self) \
@@ -69,11 +71,8 @@
         obj = self->AllocTlab(byte_count); \
         DCHECK(obj != nullptr) << "AllocTlab can't fail"; \
         obj->SetClass(klass); \
-        if (kUseBakerOrBrooksReadBarrier) { \
-          if (kUseBrooksReadBarrier) { \
-            obj->SetReadBarrierPointer(obj); \
-          } \
-          obj->AssertReadBarrierPointer(); \
+        if (kUseBakerReadBarrier) { \
+          obj->AssertReadBarrierState(); \
         } \
         QuasiAtomic::ThreadFenceForConstructor(); \
         return obj; \
@@ -94,11 +93,8 @@
       obj = self->AllocTlab(byte_count); \
       DCHECK(obj != nullptr) << "AllocTlab can't fail"; \
       obj->SetClass(klass); \
-      if (kUseBakerOrBrooksReadBarrier) { \
-        if (kUseBrooksReadBarrier) { \
-          obj->SetReadBarrierPointer(obj); \
-        } \
-        obj->AssertReadBarrierPointer(); \
+      if (kUseBakerReadBarrier) { \
+        obj->AssertReadBarrierState(); \
       } \
       QuasiAtomic::ThreadFenceForConstructor(); \
       return obj; \
@@ -110,13 +106,19 @@
     uint32_t type_idx, ArtMethod* method, Thread* self) \
     REQUIRES_SHARED(Locks::mutator_lock_) { \
   ScopedQuickEntrypointChecks sqec(self); \
-  return AllocObjectFromCode<true, instrumented_bool>(type_idx, method, self, allocator_type); \
+  return AllocObjectFromCode<true, instrumented_bool>(dex::TypeIndex(type_idx), \
+                                                      method, \
+                                                      self, \
+                                                      allocator_type); \
 } \
 extern "C" mirror::Array* artAllocArrayFromCode##suffix##suffix2( \
     uint32_t type_idx, int32_t component_count, ArtMethod* method, Thread* self) \
     REQUIRES_SHARED(Locks::mutator_lock_) { \
   ScopedQuickEntrypointChecks sqec(self); \
-  return AllocArrayFromCode<false, instrumented_bool>(type_idx, component_count, method, self, \
+  return AllocArrayFromCode<false, instrumented_bool>(dex::TypeIndex(type_idx), \
+                                                      component_count, \
+                                                      method, \
+                                                      self, \
                                                       allocator_type); \
 } \
 extern "C" mirror::Array* artAllocArrayFromCodeResolved##suffix##suffix2( \
@@ -130,7 +132,10 @@
     uint32_t type_idx, int32_t component_count, ArtMethod* method, Thread* self) \
     REQUIRES_SHARED(Locks::mutator_lock_) { \
   ScopedQuickEntrypointChecks sqec(self); \
-  return AllocArrayFromCode<true, instrumented_bool>(type_idx, component_count, method, self, \
+  return AllocArrayFromCode<true, instrumented_bool>(dex::TypeIndex(type_idx), \
+                                                     component_count, \
+                                                     method, \
+                                                     self, \
                                                      allocator_type); \
 } \
 extern "C" mirror::Array* artCheckAndAllocArrayFromCode##suffix##suffix2( \
@@ -138,9 +143,19 @@
     REQUIRES_SHARED(Locks::mutator_lock_) { \
   ScopedQuickEntrypointChecks sqec(self); \
   if (!(instrumented_bool)) { \
-    return CheckAndAllocArrayFromCode(type_idx, component_count, method, self, false, allocator_type); \
+    return CheckAndAllocArrayFromCode(dex::TypeIndex(type_idx), \
+                                      component_count, \
+                                      method, \
+                                      self, \
+                                      false, \
+                                      allocator_type); \
   } else { \
-    return CheckAndAllocArrayFromCodeInstrumented(type_idx, component_count, method, self, false, allocator_type); \
+    return CheckAndAllocArrayFromCodeInstrumented(dex::TypeIndex(type_idx), \
+                                                  component_count, \
+                                                  method, \
+                                                  self, \
+                                                  false, \
+                                                  allocator_type); \
   } \
 } \
 extern "C" mirror::Array* artCheckAndAllocArrayFromCodeWithAccessCheck##suffix##suffix2( \
@@ -148,9 +163,19 @@
     REQUIRES_SHARED(Locks::mutator_lock_) { \
   ScopedQuickEntrypointChecks sqec(self); \
   if (!(instrumented_bool)) { \
-    return CheckAndAllocArrayFromCode(type_idx, component_count, method, self, true, allocator_type); \
+    return CheckAndAllocArrayFromCode(dex::TypeIndex(type_idx), \
+                                      component_count, \
+                                      method, \
+                                      self, \
+                                      true, \
+                                      allocator_type); \
   } else { \
-    return CheckAndAllocArrayFromCodeInstrumented(type_idx, component_count, method, self, true, allocator_type); \
+    return CheckAndAllocArrayFromCodeInstrumented(dex::TypeIndex(type_idx), \
+                                                  component_count, \
+                                                  method, \
+                                                  self, \
+                                                  true, \
+                                                  allocator_type); \
   } \
 } \
 extern "C" mirror::String* artAllocStringFromBytesFromCode##suffix##suffix2( \
@@ -267,7 +292,7 @@
   entry_points_instrumented = instrumented;
 }
 
-void ResetQuickAllocEntryPoints(QuickEntryPoints* qpoints) {
+void ResetQuickAllocEntryPoints(QuickEntryPoints* qpoints, bool is_marking) {
 #if !defined(__APPLE__) || !defined(__LP64__)
   switch (entry_points_allocator) {
     case gc::kAllocatorTypeDlMalloc: {
@@ -295,7 +320,12 @@
     }
     case gc::kAllocatorTypeRegionTLAB: {
       CHECK(kMovingCollector);
-      SetQuickAllocEntryPoints_region_tlab(qpoints, entry_points_instrumented);
+      if (is_marking) {
+        SetQuickAllocEntryPoints_region_tlab(qpoints, entry_points_instrumented);
+      } else {
+        // Not marking means we need no read barriers and can just use the normal TLAB case.
+        SetQuickAllocEntryPoints_tlab(qpoints, entry_points_instrumented);
+      }
       return;
     }
     default:
@@ -303,6 +333,7 @@
   }
 #else
   UNUSED(qpoints);
+  UNUSED(is_marking);
 #endif
   UNIMPLEMENTED(FATAL);
   UNREACHABLE();
diff --git a/runtime/entrypoints/quick/quick_alloc_entrypoints.h b/runtime/entrypoints/quick/quick_alloc_entrypoints.h
index 14a8e04..bd1e295 100644
--- a/runtime/entrypoints/quick/quick_alloc_entrypoints.h
+++ b/runtime/entrypoints/quick/quick_alloc_entrypoints.h
@@ -23,7 +23,9 @@
 
 namespace art {
 
-void ResetQuickAllocEntryPoints(QuickEntryPoints* qpoints);
+// is_marking is only used for CC, if the GC is marking the allocation entrypoint is the marking
+// one.
+void ResetQuickAllocEntryPoints(QuickEntryPoints* qpoints, bool is_marking);
 
 // Runtime shutdown lock is necessary to prevent races in thread initialization. When the thread is
 // starting it doesn't hold the mutator lock until after it has been added to the thread list.
diff --git a/runtime/entrypoints/quick/quick_cast_entrypoints.cc b/runtime/entrypoints/quick/quick_cast_entrypoints.cc
index 2732d68..083d578 100644
--- a/runtime/entrypoints/quick/quick_cast_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_cast_entrypoints.cc
@@ -27,4 +27,12 @@
   return klass->IsAssignableFrom(ref_class) ? 1 : 0;
 }
 
+// Is assignable test for code, won't throw.  Null and equality test already performed.
+extern "C" size_t artInstanceOfFromCode(mirror::Object* obj, mirror::Class* ref_class)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  DCHECK(obj != nullptr);
+  DCHECK(ref_class != nullptr);
+  return obj->InstanceOf(ref_class) ? 1 : 0;
+}
+
 }  // namespace art
diff --git a/runtime/entrypoints/quick/quick_default_externs.h b/runtime/entrypoints/quick/quick_default_externs.h
index cfa5325..64030f3 100644
--- a/runtime/entrypoints/quick/quick_default_externs.h
+++ b/runtime/entrypoints/quick/quick_default_externs.h
@@ -31,7 +31,7 @@
 // These are extern declarations of assembly stubs with common names.
 
 // Cast entrypoints.
-extern "C" void art_quick_check_cast(const art::mirror::Class*, const art::mirror::Class*);
+extern "C" void art_quick_check_instance_of(art::mirror::Object*, art::mirror::Class*);
 
 // DexCache entrypoints.
 extern "C" void* art_quick_initialize_static_storage(uint32_t);
diff --git a/runtime/entrypoints/quick/quick_default_init_entrypoints.h b/runtime/entrypoints/quick/quick_default_init_entrypoints.h
index df23f94..78dad94 100644
--- a/runtime/entrypoints/quick/quick_default_init_entrypoints.h
+++ b/runtime/entrypoints/quick/quick_default_init_entrypoints.h
@@ -31,7 +31,7 @@
   jpoints->pDlsymLookup = art_jni_dlsym_lookup_stub;
 
   // Alloc
-  ResetQuickAllocEntryPoints(qpoints);
+  ResetQuickAllocEntryPoints(qpoints, /* is_marking */ true);
 
   // DexCache
   qpoints->pInitializeStaticStorage = art_quick_initialize_static_storage;
diff --git a/runtime/entrypoints/quick/quick_dexcache_entrypoints.cc b/runtime/entrypoints/quick/quick_dexcache_entrypoints.cc
index d438418..5dad43e 100644
--- a/runtime/entrypoints/quick/quick_dexcache_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_dexcache_entrypoints.cc
@@ -20,6 +20,7 @@
 #include "class_linker-inl.h"
 #include "class_table-inl.h"
 #include "dex_file-inl.h"
+#include "dex_file_types.h"
 #include "gc/heap.h"
 #include "mirror/class-inl.h"
 #include "mirror/class_loader.h"
@@ -37,7 +38,7 @@
   // given by inheritance.
   ScopedQuickEntrypointChecks sqec(self);
   auto* caller = GetCalleeSaveMethodCaller(self, Runtime::kSaveRefsOnly);
-  return ResolveVerifyAndClinit(type_idx, caller, self, true, false);
+  return ResolveVerifyAndClinit(dex::TypeIndex(type_idx), caller, self, true, false);
 }
 
 extern "C" mirror::Class* artInitializeTypeFromCode(uint32_t type_idx, Thread* self)
@@ -45,7 +46,7 @@
   // Called when method->dex_cache_resolved_types_[] misses.
   ScopedQuickEntrypointChecks sqec(self);
   auto* caller = GetCalleeSaveMethodCaller(self, Runtime::kSaveRefsOnly);
-  return ResolveVerifyAndClinit(type_idx, caller, self, false, false);
+  return ResolveVerifyAndClinit(dex::TypeIndex(type_idx), caller, self, false, false);
 }
 
 extern "C" mirror::Class* artInitializeTypeAndVerifyAccessFromCode(uint32_t type_idx, Thread* self)
@@ -54,7 +55,7 @@
   // unpopulated.
   ScopedQuickEntrypointChecks sqec(self);
   auto* caller = GetCalleeSaveMethodCaller(self, Runtime::kSaveRefsOnly);
-  return ResolveVerifyAndClinit(type_idx, caller, self, false, true);
+  return ResolveVerifyAndClinit(dex::TypeIndex(type_idx), caller, self, false, true);
 }
 
 extern "C" mirror::String* artResolveStringFromCode(int32_t string_idx, Thread* self)
@@ -65,7 +66,7 @@
       // TODO: Change art_quick_resolve_string on MIPS and MIPS64 to kSaveEverything.
       (kRuntimeISA == kMips || kRuntimeISA == kMips64) ? Runtime::kSaveRefsOnly
                                                        : Runtime::kSaveEverything);
-  mirror::String* result = ResolveStringFromCode(caller, string_idx);
+  mirror::String* result = ResolveStringFromCode(caller, dex::StringIndex(string_idx));
   if (LIKELY(result != nullptr)) {
     // For AOT code, we need a write barrier for the class loader that holds
     // the GC roots in the .bss.
diff --git a/runtime/entrypoints/quick/quick_entrypoints_list.h b/runtime/entrypoints/quick/quick_entrypoints_list.h
index 3cfee45..a1c5082 100644
--- a/runtime/entrypoints/quick/quick_entrypoints_list.h
+++ b/runtime/entrypoints/quick/quick_entrypoints_list.h
@@ -33,8 +33,8 @@
   V(AllocStringFromChars, void*, int32_t, int32_t, void*) \
   V(AllocStringFromString, void*, void*) \
 \
-  V(InstanceofNonTrivial, size_t, const mirror::Class*, const mirror::Class*) \
-  V(CheckCast, void, const mirror::Class*, const mirror::Class*) \
+  V(InstanceofNonTrivial, size_t, mirror::Object*, mirror::Class*) \
+  V(CheckInstanceOf, void, mirror::Object*, mirror::Class*) \
 \
   V(InitializeStaticStorage, void*, uint32_t) \
   V(InitializeTypeAndVerifyAccess, void*, uint32_t) \
diff --git a/runtime/entrypoints/quick/quick_throw_entrypoints.cc b/runtime/entrypoints/quick/quick_throw_entrypoints.cc
index a205b17..c8ee99a 100644
--- a/runtime/entrypoints/quick/quick_throw_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_throw_entrypoints.cc
@@ -111,6 +111,14 @@
   self->QuickDeliverException();
 }
 
+extern "C" NO_RETURN void artThrowClassCastExceptionForObject(mirror::Object* obj,
+                                                              mirror::Class* dest_type,
+                                                              Thread* self)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  DCHECK(obj != nullptr);
+  artThrowClassCastException(dest_type, obj->GetClass(), self);
+}
+
 extern "C" NO_RETURN void artThrowArrayStoreException(mirror::Object* array, mirror::Object* value,
                                                       Thread* self)
     REQUIRES_SHARED(Locks::mutator_lock_) {
diff --git a/runtime/entrypoints_order_test.cc b/runtime/entrypoints_order_test.cc
index cdb1051..1283660 100644
--- a/runtime/entrypoints_order_test.cc
+++ b/runtime/entrypoints_order_test.cc
@@ -94,8 +94,8 @@
     EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, opeer, jpeer, sizeof(void*));
     EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, jpeer, stack_begin, sizeof(void*));
     EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, stack_begin, stack_size, sizeof(void*));
-    EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, stack_size, stack_trace_sample, sizeof(void*));
-    EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, stack_trace_sample, wait_next, sizeof(void*));
+    EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, stack_size, deps_or_stack_trace_sample, sizeof(void*));
+    EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, deps_or_stack_trace_sample, wait_next, sizeof(void*));
     EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, wait_next, monitor_enter_object, sizeof(void*));
     EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, monitor_enter_object, top_handle_scope, sizeof(void*));
     EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, top_handle_scope, class_loader_override, sizeof(void*));
@@ -174,8 +174,9 @@
                          sizeof(void*));
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pAllocStringFromString, pInstanceofNonTrivial,
                          sizeof(void*));
-    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pInstanceofNonTrivial, pCheckCast, sizeof(void*));
-    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pCheckCast, pInitializeStaticStorage, sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pInstanceofNonTrivial, pCheckInstanceOf, sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pCheckInstanceOf, pInitializeStaticStorage,
+                         sizeof(void*));
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pInitializeStaticStorage, pInitializeTypeAndVerifyAccess,
                          sizeof(void*));
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pInitializeTypeAndVerifyAccess, pInitializeType,
diff --git a/runtime/gc/allocation_record.cc b/runtime/gc/allocation_record.cc
index d921900..e18a955 100644
--- a/runtime/gc/allocation_record.cc
+++ b/runtime/gc/allocation_record.cc
@@ -181,7 +181,6 @@
 }
 
 void AllocRecordObjectMap::BroadcastForNewAllocationRecords() {
-  CHECK(kUseReadBarrier);
   new_record_condition_.Broadcast(Thread::Current());
 }
 
@@ -291,6 +290,9 @@
   // Wait for GC's sweeping to complete and allow new records
   while (UNLIKELY((!kUseReadBarrier && !allow_new_record_) ||
                   (kUseReadBarrier && !self->GetWeakRefAccessEnabled()))) {
+    // Check and run the empty checkpoint before blocking so the empty checkpoint will work in the
+    // presence of threads blocking for weak ref access.
+    self->CheckEmptyCheckpoint();
     new_record_condition_.WaitHoldingLocks(self);
   }
 
diff --git a/runtime/gc/allocation_record.h b/runtime/gc/allocation_record.h
index c8b2b89..90cff6a 100644
--- a/runtime/gc/allocation_record.h
+++ b/runtime/gc/allocation_record.h
@@ -261,7 +261,6 @@
       REQUIRES_SHARED(Locks::mutator_lock_)
       REQUIRES(Locks::alloc_tracker_lock_);
   void BroadcastForNewAllocationRecords()
-      REQUIRES_SHARED(Locks::mutator_lock_)
       REQUIRES(Locks::alloc_tracker_lock_);
 
   // TODO: Is there a better way to hide the entries_'s type?
diff --git a/runtime/gc/collector/concurrent_copying-inl.h b/runtime/gc/collector/concurrent_copying-inl.h
index 76f500c..7c64952 100644
--- a/runtime/gc/collector/concurrent_copying-inl.h
+++ b/runtime/gc/collector/concurrent_copying-inl.h
@@ -32,7 +32,7 @@
     mirror::Object* ref, accounting::ContinuousSpaceBitmap* bitmap) {
   // For the Baker-style RB, in a rare case, we could incorrectly change the object from white
   // to gray even though the object has already been marked through. This happens if a mutator
-  // thread gets preempted before the AtomicSetReadBarrierPointer below, GC marks through the
+  // thread gets preempted before the AtomicSetReadBarrierState below, GC marks through the
   // object (changes it from white to gray and back to white), and the thread runs and
   // incorrectly changes it from white to gray. If this happens, the object will get added to the
   // mark stack again and get changed back to white after it is processed.
@@ -50,14 +50,14 @@
     // we can avoid an expensive CAS.
     // For the baker case, an object is marked if either the mark bit marked or the bitmap bit is
     // set.
-    success = ref->AtomicSetReadBarrierPointer(ReadBarrier::WhitePtr(), ReadBarrier::GrayPtr());
+    success = ref->AtomicSetReadBarrierState(ReadBarrier::WhiteState(), ReadBarrier::GrayState());
   } else {
     success = !bitmap->AtomicTestAndSet(ref);
   }
   if (success) {
     // Newly marked.
     if (kUseBakerReadBarrier) {
-      DCHECK_EQ(ref->GetReadBarrierPointer(), ReadBarrier::GrayPtr());
+      DCHECK_EQ(ref->GetReadBarrierState(), ReadBarrier::GrayState());
     }
     PushOntoMarkStack(ref);
   }
@@ -84,8 +84,8 @@
       return ref;
     }
     // This may or may not succeed, which is ok because the object may already be gray.
-    bool success = ref->AtomicSetReadBarrierPointer(ReadBarrier::WhitePtr(),
-                                                    ReadBarrier::GrayPtr());
+    bool success = ref->AtomicSetReadBarrierState(ReadBarrier::WhiteState(),
+                                                  ReadBarrier::GrayState());
     if (success) {
       MutexLock mu(Thread::Current(), immune_gray_stack_lock_);
       immune_gray_stack_.push_back(ref);
@@ -125,10 +125,6 @@
       return from_ref;
     case space::RegionSpace::RegionType::kRegionTypeFromSpace: {
       mirror::Object* to_ref = GetFwdPtr(from_ref);
-      if (kUseBakerReadBarrier) {
-        DCHECK_NE(to_ref, ReadBarrier::GrayPtr())
-            << "from_ref=" << from_ref << " to_ref=" << to_ref;
-      }
       if (to_ref == nullptr) {
         // It isn't marked yet. Mark it by copying it to the to-space.
         to_ref = Copy(from_ref);
@@ -153,9 +149,7 @@
 
 inline mirror::Object* ConcurrentCopying::MarkFromReadBarrier(mirror::Object* from_ref) {
   mirror::Object* ret;
-  // TODO: Delete GetMarkBit check when all of the callers properly check the bit. Remaining caller
-  // is array allocations.
-  if (from_ref == nullptr || from_ref->GetMarkBit()) {
+  if (from_ref == nullptr) {
     return from_ref;
   }
   // TODO: Consider removing this check when we are done investigating slow paths. b/30162165
@@ -192,9 +186,9 @@
 
 inline bool ConcurrentCopying::IsMarkedInUnevacFromSpace(mirror::Object* from_ref) {
   // Use load acquire on the read barrier pointer to ensure that we never see a white read barrier
-  // pointer with an unmarked bit due to reordering.
+  // state with an unmarked bit due to reordering.
   DCHECK(region_space_->IsInUnevacFromSpace(from_ref));
-  if (kUseBakerReadBarrier && from_ref->GetReadBarrierPointerAcquire() == ReadBarrier::GrayPtr()) {
+  if (kUseBakerReadBarrier && from_ref->GetReadBarrierStateAcquire() == ReadBarrier::GrayState()) {
     return true;
   }
   return region_space_bitmap_->Test(from_ref);
diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc
index 6dfab8b..19ee0fb 100644
--- a/runtime/gc/collector/concurrent_copying.cc
+++ b/runtime/gc/collector/concurrent_copying.cc
@@ -246,7 +246,7 @@
     Thread* self = Thread::Current();
     CHECK(thread == self || thread->IsSuspended() || thread->GetState() == kWaitingPerformingGc)
         << thread->GetState() << " thread " << thread << " self " << self;
-    thread->SetIsGcMarking(true);
+    thread->SetIsGcMarkingAndUpdateEntrypoints(true);
     if (use_tlab_ && thread->HasTlab()) {
       if (ConcurrentCopying::kEnableFromSpaceAccountingCheck) {
         // This must come before the revoke.
@@ -418,7 +418,7 @@
                                   [&visitor](mirror::Object* obj)
         REQUIRES_SHARED(Locks::mutator_lock_) {
       // If an object is not gray, it should only have references to things in the immune spaces.
-      if (obj->GetReadBarrierPointer() != ReadBarrier::GrayPtr()) {
+      if (obj->GetReadBarrierState() != ReadBarrier::GrayState()) {
         obj->VisitReferences</*kVisitNativeRoots*/true,
                              kDefaultVerifyFlags,
                              kWithoutReadBarrier>(visitor, visitor);
@@ -463,7 +463,7 @@
       if (kIsDebugBuild) {
         Locks::mutator_lock_->AssertExclusiveHeld(Thread::Current());
       }
-      obj->SetReadBarrierPointer(ReadBarrier::GrayPtr());
+      obj->SetReadBarrierState(ReadBarrier::GrayState());
     }
   }
 
@@ -514,26 +514,6 @@
   live_stack_freeze_size_ = heap_->GetLiveStack()->Size();
 }
 
-class EmptyCheckpoint : public Closure {
- public:
-  explicit EmptyCheckpoint(ConcurrentCopying* concurrent_copying)
-      : concurrent_copying_(concurrent_copying) {
-  }
-
-  virtual void Run(Thread* thread) OVERRIDE NO_THREAD_SAFETY_ANALYSIS {
-    // Note: self is not necessarily equal to thread since thread may be suspended.
-    Thread* self = Thread::Current();
-    CHECK(thread == self || thread->IsSuspended() || thread->GetState() == kWaitingPerformingGc)
-        << thread->GetState() << " thread " << thread << " self " << self;
-    // If thread is a running mutator, then act on behalf of the garbage collector.
-    // See the code in ThreadList::RunCheckpoint.
-    concurrent_copying_->GetBarrier().Pass(self);
-  }
-
- private:
-  ConcurrentCopying* const concurrent_copying_;
-};
-
 // Used to visit objects in the immune spaces.
 inline void ConcurrentCopying::ScanImmuneObject(mirror::Object* obj) {
   DCHECK(obj != nullptr);
@@ -549,11 +529,11 @@
 
   ALWAYS_INLINE void operator()(mirror::Object* obj) const REQUIRES_SHARED(Locks::mutator_lock_) {
     if (kUseBakerReadBarrier && kGrayDirtyImmuneObjects) {
-      if (obj->GetReadBarrierPointer() == ReadBarrier::GrayPtr()) {
+      if (obj->GetReadBarrierState() == ReadBarrier::GrayState()) {
         collector_->ScanImmuneObject(obj);
         // Done scanning the object, go back to white.
-        bool success = obj->AtomicSetReadBarrierPointer(ReadBarrier::GrayPtr(),
-                                                        ReadBarrier::WhitePtr());
+        bool success = obj->AtomicSetReadBarrierState(ReadBarrier::GrayState(),
+                                                      ReadBarrier::WhiteState());
         CHECK(success);
       }
     } else {
@@ -620,9 +600,9 @@
       LOG(INFO) << "immune gray stack size=" << immune_gray_stack_.size();
     }
     for (mirror::Object* obj : immune_gray_stack_) {
-      DCHECK(obj->GetReadBarrierPointer() == ReadBarrier::GrayPtr());
-      bool success = obj->AtomicSetReadBarrierPointer(ReadBarrier::GrayPtr(),
-                                                      ReadBarrier::WhitePtr());
+      DCHECK(obj->GetReadBarrierState() == ReadBarrier::GrayState());
+      bool success = obj->AtomicSetReadBarrierState(ReadBarrier::GrayState(),
+                                                    ReadBarrier::WhiteState());
       DCHECK(success);
     }
     immune_gray_stack_.clear();
@@ -746,7 +726,7 @@
     // Disable the thread-local is_gc_marking flag.
     // Note a thread that has just started right before this checkpoint may have already this flag
     // set to false, which is ok.
-    thread->SetIsGcMarking(false);
+    thread->SetIsGcMarkingAndUpdateEntrypoints(false);
     // If thread is a running mutator, then act on behalf of the garbage collector.
     // See the code in ThreadList::RunCheckpoint.
     concurrent_copying_->GetBarrier().Pass(self);
@@ -821,11 +801,11 @@
   for (mirror::Object* obj : false_gray_stack_) {
     DCHECK(IsMarked(obj));
     // The object could be white here if a thread got preempted after a success at the
-    // AtomicSetReadBarrierPointer in Mark(), GC started marking through it (but not finished so
+    // AtomicSetReadBarrierState in Mark(), GC started marking through it (but not finished so
     // still gray), and the thread ran to register it onto the false gray stack.
-    if (obj->GetReadBarrierPointer() == ReadBarrier::GrayPtr()) {
-      bool success = obj->AtomicSetReadBarrierPointer(ReadBarrier::GrayPtr(),
-                                                      ReadBarrier::WhitePtr());
+    if (obj->GetReadBarrierState() == ReadBarrier::GrayState()) {
+      bool success = obj->AtomicSetReadBarrierState(ReadBarrier::GrayState(),
+                                                    ReadBarrier::WhiteState());
       DCHECK(success);
     }
   }
@@ -835,10 +815,10 @@
 
 void ConcurrentCopying::IssueEmptyCheckpoint() {
   Thread* self = Thread::Current();
-  EmptyCheckpoint check_point(this);
   ThreadList* thread_list = Runtime::Current()->GetThreadList();
-  gc_barrier_->Init(self, 0);
-  size_t barrier_count = thread_list->RunCheckpoint(&check_point);
+  Barrier* barrier = thread_list->EmptyCheckpointBarrier();
+  barrier->Init(self, 0);
+  size_t barrier_count = thread_list->RunEmptyCheckpoint();
   // If there are no threads to wait which implys that all the checkpoint functions are finished,
   // then no need to release the mutator lock.
   if (barrier_count == 0) {
@@ -848,7 +828,7 @@
   Locks::mutator_lock_->SharedUnlock(self);
   {
     ScopedThreadStateChange tsc(self, kWaitingForCheckPointsToRun);
-    gc_barrier_->Increment(self, barrier_count);
+    barrier->Increment(self, barrier_count);
   }
   Locks::mutator_lock_->SharedLock(self);
 }
@@ -955,9 +935,9 @@
     }
     collector_->AssertToSpaceInvariant(nullptr, MemberOffset(0), ref);
     if (kUseBakerReadBarrier) {
-      CHECK_EQ(ref->GetReadBarrierPointer(), ReadBarrier::WhitePtr())
+      CHECK_EQ(ref->GetReadBarrierState(), ReadBarrier::WhiteState())
           << "Ref " << ref << " " << ref->PrettyTypeOf()
-          << " has non-white rb_ptr ";
+          << " has non-white rb_state ";
     }
   }
 
@@ -1026,8 +1006,8 @@
     VerifyNoFromSpaceRefsFieldVisitor visitor(collector);
     obj->VisitReferences(visitor, visitor);
     if (kUseBakerReadBarrier) {
-      CHECK_EQ(obj->GetReadBarrierPointer(), ReadBarrier::WhitePtr())
-          << "obj=" << obj << " non-white rb_ptr " << obj->GetReadBarrierPointer();
+      CHECK_EQ(obj->GetReadBarrierState(), ReadBarrier::WhiteState())
+          << "obj=" << obj << " non-white rb_state " << obj->GetReadBarrierState();
     }
   }
 
@@ -1253,6 +1233,10 @@
     }
     gc_mark_stack_->Reset();
   } else if (mark_stack_mode == kMarkStackModeShared) {
+    // Do an empty checkpoint to avoid a race with a mutator preempted in the middle of a read
+    // barrier but before pushing onto the mark stack. b/32508093. Note the weak ref access is
+    // disabled at this point.
+    IssueEmptyCheckpoint();
     // Process the shared GC mark stack with a lock.
     {
       MutexLock mu(self, mark_stack_lock_);
@@ -1333,8 +1317,8 @@
 inline void ConcurrentCopying::ProcessMarkStackRef(mirror::Object* to_ref) {
   DCHECK(!region_space_->IsInFromSpace(to_ref));
   if (kUseBakerReadBarrier) {
-    DCHECK(to_ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr())
-        << " " << to_ref << " " << to_ref->GetReadBarrierPointer()
+    DCHECK(to_ref->GetReadBarrierState() == ReadBarrier::GrayState())
+        << " " << to_ref << " " << to_ref->GetReadBarrierState()
         << " is_marked=" << IsMarked(to_ref);
   }
   bool add_to_live_bytes = false;
@@ -1351,8 +1335,8 @@
     Scan(to_ref);
   }
   if (kUseBakerReadBarrier) {
-    DCHECK(to_ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr())
-        << " " << to_ref << " " << to_ref->GetReadBarrierPointer()
+    DCHECK(to_ref->GetReadBarrierState() == ReadBarrier::GrayState())
+        << " " << to_ref << " " << to_ref->GetReadBarrierState()
         << " is_marked=" << IsMarked(to_ref);
   }
 #ifdef USE_BAKER_OR_BROOKS_READ_BARRIER
@@ -1368,9 +1352,9 @@
     // above IsInToSpace() evaluates to true and we change the color from gray to white here in this
     // else block.
     if (kUseBakerReadBarrier) {
-      bool success = to_ref->AtomicSetReadBarrierPointer</*kCasRelease*/true>(
-          ReadBarrier::GrayPtr(),
-          ReadBarrier::WhitePtr());
+      bool success = to_ref->AtomicSetReadBarrierState</*kCasRelease*/true>(
+          ReadBarrier::GrayState(),
+          ReadBarrier::WhiteState());
       DCHECK(success) << "Must succeed as we won the race.";
     }
   }
@@ -1458,9 +1442,9 @@
         while (!mark_stack->IsEmpty()) {
           mirror::Object* obj = mark_stack->PopBack();
           if (kUseBakerReadBarrier) {
-            mirror::Object* rb_ptr = obj->GetReadBarrierPointer();
-            LOG(INFO) << "On mark queue : " << obj << " " << obj->PrettyTypeOf() << " rb_ptr="
-                      << rb_ptr << " is_marked=" << IsMarked(obj);
+            uint32_t rb_state = obj->GetReadBarrierState();
+            LOG(INFO) << "On mark queue : " << obj << " " << obj->PrettyTypeOf() << " rb_state="
+                      << rb_state << " is_marked=" << IsMarked(obj);
           } else {
             LOG(INFO) << "On mark queue : " << obj << " " << obj->PrettyTypeOf()
                       << " is_marked=" << IsMarked(obj);
@@ -1707,7 +1691,7 @@
 void ConcurrentCopying::LogFromSpaceRefHolder(mirror::Object* obj, MemberOffset offset) {
   if (kUseBakerReadBarrier) {
     LOG(INFO) << "holder=" << obj << " " << obj->PrettyTypeOf()
-              << " holder rb_ptr=" << obj->GetReadBarrierPointer();
+              << " holder rb_state=" << obj->GetReadBarrierState();
   } else {
     LOG(INFO) << "holder=" << obj << " " << obj->PrettyTypeOf();
   }
@@ -1762,10 +1746,10 @@
         return;
       }
       bool updated_all_immune_objects = updated_all_immune_objects_.LoadSequentiallyConsistent();
-      CHECK(updated_all_immune_objects || ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr())
-          << "Unmarked immune space ref. obj=" << obj << " rb_ptr="
-          << (obj != nullptr ? obj->GetReadBarrierPointer() : nullptr)
-          << " ref=" << ref << " ref rb_ptr=" << ref->GetReadBarrierPointer()
+      CHECK(updated_all_immune_objects || ref->GetReadBarrierState() == ReadBarrier::GrayState())
+          << "Unmarked immune space ref. obj=" << obj << " rb_state="
+          << (obj != nullptr ? obj->GetReadBarrierState() : 0U)
+          << " ref=" << ref << " ref rb_state=" << ref->GetReadBarrierState()
           << " updated_all_immune_objects=" << updated_all_immune_objects;
     }
   } else {
@@ -2158,17 +2142,21 @@
     to_ref->SetLockWord(old_lock_word, false);
     // Set the gray ptr.
     if (kUseBakerReadBarrier) {
-      to_ref->SetReadBarrierPointer(ReadBarrier::GrayPtr());
+      to_ref->SetReadBarrierState(ReadBarrier::GrayState());
     }
 
+    // Do a fence to prevent the field CAS in ConcurrentCopying::Process from possibly reordering
+    // before the object copy.
+    QuasiAtomic::ThreadFenceRelease();
+
     LockWord new_lock_word = LockWord::FromForwardingAddress(reinterpret_cast<size_t>(to_ref));
 
     // Try to atomically write the fwd ptr.
-    bool success = from_ref->CasLockWordWeakSequentiallyConsistent(old_lock_word, new_lock_word);
+    bool success = from_ref->CasLockWordWeakRelaxed(old_lock_word, new_lock_word);
     if (LIKELY(success)) {
       // The CAS succeeded.
-      objects_moved_.FetchAndAddSequentiallyConsistent(1);
-      bytes_moved_.FetchAndAddSequentiallyConsistent(region_space_alloc_size);
+      objects_moved_.FetchAndAddRelaxed(1);
+      bytes_moved_.FetchAndAddRelaxed(region_space_alloc_size);
       if (LIKELY(!fall_back_to_non_moving)) {
         DCHECK(region_space_->IsInToSpace(to_ref));
       } else {
@@ -2176,7 +2164,7 @@
         DCHECK_EQ(bytes_allocated, non_moving_space_bytes_allocated);
       }
       if (kUseBakerReadBarrier) {
-        DCHECK(to_ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr());
+        DCHECK(to_ref->GetReadBarrierState() == ReadBarrier::GrayState());
       }
       DCHECK(GetFwdPtr(from_ref) == to_ref);
       CHECK_NE(to_ref->GetLockWord(false).GetState(), LockWord::kForwardingAddress);
@@ -2262,14 +2250,14 @@
   if (!is_los && mark_bitmap->Test(ref)) {
     // Already marked.
     if (kUseBakerReadBarrier) {
-      DCHECK(ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr() ||
-             ref->GetReadBarrierPointer() == ReadBarrier::WhitePtr());
+      DCHECK(ref->GetReadBarrierState() == ReadBarrier::GrayState() ||
+             ref->GetReadBarrierState() == ReadBarrier::WhiteState());
     }
   } else if (is_los && los_bitmap->Test(ref)) {
     // Already marked in LOS.
     if (kUseBakerReadBarrier) {
-      DCHECK(ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr() ||
-             ref->GetReadBarrierPointer() == ReadBarrier::WhitePtr());
+      DCHECK(ref->GetReadBarrierState() == ReadBarrier::GrayState() ||
+             ref->GetReadBarrierState() == ReadBarrier::WhiteState());
     }
   } else {
     // Not marked.
@@ -2282,7 +2270,7 @@
         DCHECK(!los_bitmap->Test(ref));
       }
       if (kUseBakerReadBarrier) {
-        DCHECK_EQ(ref->GetReadBarrierPointer(), ReadBarrier::WhitePtr());
+        DCHECK_EQ(ref->GetReadBarrierState(), ReadBarrier::WhiteState());
       }
     } else {
       // For the baker-style RB, we need to handle 'false-gray' cases. See the
@@ -2298,25 +2286,25 @@
       // This may or may not succeed, which is ok.
       bool cas_success = false;
       if (kUseBakerReadBarrier) {
-        cas_success = ref->AtomicSetReadBarrierPointer(ReadBarrier::WhitePtr(),
-                                                       ReadBarrier::GrayPtr());
+        cas_success = ref->AtomicSetReadBarrierState(ReadBarrier::WhiteState(),
+                                                     ReadBarrier::GrayState());
       }
       if (!is_los && mark_bitmap->AtomicTestAndSet(ref)) {
         // Already marked.
         if (kUseBakerReadBarrier && cas_success &&
-            ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr()) {
+            ref->GetReadBarrierState() == ReadBarrier::GrayState()) {
           PushOntoFalseGrayStack(ref);
         }
       } else if (is_los && los_bitmap->AtomicTestAndSet(ref)) {
         // Already marked in LOS.
         if (kUseBakerReadBarrier && cas_success &&
-            ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr()) {
+            ref->GetReadBarrierState() == ReadBarrier::GrayState()) {
           PushOntoFalseGrayStack(ref);
         }
       } else {
         // Newly marked.
         if (kUseBakerReadBarrier) {
-          DCHECK_EQ(ref->GetReadBarrierPointer(), ReadBarrier::GrayPtr());
+          DCHECK_EQ(ref->GetReadBarrierState(), ReadBarrier::GrayState());
         }
         PushOntoMarkStack(ref);
       }
diff --git a/runtime/gc/collector/mark_compact.cc b/runtime/gc/collector/mark_compact.cc
index e0bf744..ddcb6c0 100644
--- a/runtime/gc/collector/mark_compact.cc
+++ b/runtime/gc/collector/mark_compact.cc
@@ -124,9 +124,9 @@
   if (obj == nullptr) {
     return nullptr;
   }
-  if (kUseBakerOrBrooksReadBarrier) {
-    // Verify all the objects have the correct forward pointer installed.
-    obj->AssertReadBarrierPointer();
+  if (kUseBakerReadBarrier) {
+    // Verify all the objects have the correct forward state installed.
+    obj->AssertReadBarrierState();
   }
   if (!immune_spaces_.IsInImmuneRegion(obj)) {
     if (objects_before_forwarding_->HasAddress(obj)) {
diff --git a/runtime/gc/collector/mark_sweep.cc b/runtime/gc/collector/mark_sweep.cc
index 77d7274..06ed029 100644
--- a/runtime/gc/collector/mark_sweep.cc
+++ b/runtime/gc/collector/mark_sweep.cc
@@ -413,42 +413,48 @@
     if (UNLIKELY(obj == nullptr || !IsAligned<kPageSize>(obj) ||
                  (kIsDebugBuild && large_object_space != nullptr &&
                      !large_object_space->Contains(obj)))) {
-      LOG(FATAL_WITHOUT_ABORT) << "Tried to mark " << obj << " not contained by any spaces";
+      // Lowest priority logging first:
+      PrintFileToLog("/proc/self/maps", LogSeverity::FATAL_WITHOUT_ABORT);
+      MemMap::DumpMaps(LOG_STREAM(FATAL_WITHOUT_ABORT), true);
+      // Buffer the output in the string stream since it is more important than the stack traces
+      // and we want it to have log priority. The stack traces are printed from Runtime::Abort
+      // which is called from LOG(FATAL) but before the abort message.
+      std::ostringstream oss;
+      oss << "Tried to mark " << obj << " not contained by any spaces" << std::endl;
       if (holder_ != nullptr) {
         size_t holder_size = holder_->SizeOf();
         ArtField* field = holder_->FindFieldByOffset(offset_);
-        LOG(FATAL_WITHOUT_ABORT) << "Field info: "
-                            << " holder=" << holder_
-                            << " holder is "
-                            << (mark_sweep_->GetHeap()->IsLiveObjectLocked(holder_)
-                                ? "alive" : "dead")
-                            << " holder_size=" << holder_size
-                            << " holder_type=" << holder_->PrettyTypeOf()
-                            << " offset=" << offset_.Uint32Value()
-                            << " field=" << (field != nullptr ? field->GetName() : "nullptr")
-                            << " field_type="
-                            << (field != nullptr ? field->GetTypeDescriptor() : "")
-                            << " first_ref_field_offset="
-                            << (holder_->IsClass()
-                                ? holder_->AsClass()->GetFirstReferenceStaticFieldOffset(
-                                    kRuntimePointerSize)
-                                : holder_->GetClass()->GetFirstReferenceInstanceFieldOffset())
-                            << " num_of_ref_fields="
-                            << (holder_->IsClass()
-                                ? holder_->AsClass()->NumReferenceStaticFields()
-                                : holder_->GetClass()->NumReferenceInstanceFields());
+        oss << "Field info: "
+            << " holder=" << holder_
+            << " holder is "
+            << (mark_sweep_->GetHeap()->IsLiveObjectLocked(holder_)
+                ? "alive" : "dead")
+            << " holder_size=" << holder_size
+            << " holder_type=" << holder_->PrettyTypeOf()
+            << " offset=" << offset_.Uint32Value()
+            << " field=" << (field != nullptr ? field->GetName() : "nullptr")
+            << " field_type="
+            << (field != nullptr ? field->GetTypeDescriptor() : "")
+            << " first_ref_field_offset="
+            << (holder_->IsClass()
+                ? holder_->AsClass()->GetFirstReferenceStaticFieldOffset(
+                    kRuntimePointerSize)
+                : holder_->GetClass()->GetFirstReferenceInstanceFieldOffset())
+            << " num_of_ref_fields="
+            << (holder_->IsClass()
+                ? holder_->AsClass()->NumReferenceStaticFields()
+                : holder_->GetClass()->NumReferenceInstanceFields())
+            << std::endl;
         // Print the memory content of the holder.
         for (size_t i = 0; i < holder_size / sizeof(uint32_t); ++i) {
           uint32_t* p = reinterpret_cast<uint32_t*>(holder_);
-          LOG(FATAL_WITHOUT_ABORT) << &p[i] << ": " << "holder+" << (i * sizeof(uint32_t)) << " = "
-                              << std::hex << p[i];
+          oss << &p[i] << ": " << "holder+" << (i * sizeof(uint32_t)) << " = " << std::hex << p[i]
+              << std::endl;
         }
       }
-      PrintFileToLog("/proc/self/maps", LogSeverity::FATAL_WITHOUT_ABORT);
-      MemMap::DumpMaps(LOG_STREAM(FATAL_WITHOUT_ABORT), true);
-      LOG(FATAL_WITHOUT_ABORT) << "Attempting see if it's a bad thread root";
-      mark_sweep_->VerifySuspendedThreadRoots();
-      LOG(FATAL) << "Can't mark invalid object";
+      oss << "Attempting see if it's a bad thread root" << std::endl;
+      mark_sweep_->VerifySuspendedThreadRoots(oss);
+      LOG(FATAL) << oss.str();
     }
   }
 
@@ -462,9 +468,9 @@
                                          mirror::Object* holder,
                                          MemberOffset offset) {
   DCHECK(obj != nullptr);
-  if (kUseBakerOrBrooksReadBarrier) {
-    // Verify all the objects have the correct pointer installed.
-    obj->AssertReadBarrierPointer();
+  if (kUseBakerReadBarrier) {
+    // Verify all the objects have the correct state installed.
+    obj->AssertReadBarrierState();
   }
   if (immune_spaces_.IsInImmuneRegion(obj)) {
     if (kCountMarkedObjects) {
@@ -503,9 +509,9 @@
 
 inline bool MarkSweep::MarkObjectParallel(mirror::Object* obj) {
   DCHECK(obj != nullptr);
-  if (kUseBakerOrBrooksReadBarrier) {
-    // Verify all the objects have the correct pointer installed.
-    obj->AssertReadBarrierPointer();
+  if (kUseBakerReadBarrier) {
+    // Verify all the objects have the correct state installed.
+    obj->AssertReadBarrierState();
   }
   if (immune_spaces_.IsInImmuneRegion(obj)) {
     DCHECK(IsMarked(obj) != nullptr);
@@ -567,6 +573,8 @@
 
 class MarkSweep::VerifyRootVisitor : public SingleRootVisitor {
  public:
+  explicit VerifyRootVisitor(std::ostream& os) : os_(os) {}
+
   void VisitRoot(mirror::Object* root, const RootInfo& info) OVERRIDE
       REQUIRES_SHARED(Locks::mutator_lock_, Locks::heap_bitmap_lock_) {
     // See if the root is on any space bitmap.
@@ -574,14 +582,17 @@
     if (heap->GetLiveBitmap()->GetContinuousSpaceBitmap(root) == nullptr) {
       space::LargeObjectSpace* large_object_space = heap->GetLargeObjectsSpace();
       if (large_object_space != nullptr && !large_object_space->Contains(root)) {
-        LOG(FATAL_WITHOUT_ABORT) << "Found invalid root: " << root << " " << info;
+        os_ << "Found invalid root: " << root << " " << info << std::endl;
       }
     }
   }
+
+ private:
+  std::ostream& os_;
 };
 
-void MarkSweep::VerifySuspendedThreadRoots() {
-  VerifyRootVisitor visitor;
+void MarkSweep::VerifySuspendedThreadRoots(std::ostream& os) {
+  VerifyRootVisitor visitor(os);
   Runtime::Current()->GetThreadList()->VisitRootsForSuspendedThreads(&visitor);
 }
 
@@ -608,8 +619,7 @@
 void MarkSweep::MarkConcurrentRoots(VisitRootFlags flags) {
   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
   // Visit all runtime roots and clear dirty flags.
-  Runtime::Current()->VisitConcurrentRoots(
-      this, static_cast<VisitRootFlags>(flags | kVisitRootFlagNonMoving));
+  Runtime::Current()->VisitConcurrentRoots(this, flags);
 }
 
 class MarkSweep::DelayReferenceReferentVisitor {
diff --git a/runtime/gc/collector/mark_sweep.h b/runtime/gc/collector/mark_sweep.h
index 19c2e9a..02cf462 100644
--- a/runtime/gc/collector/mark_sweep.h
+++ b/runtime/gc/collector/mark_sweep.h
@@ -98,7 +98,7 @@
       REQUIRES(!mark_stack_lock_)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  void MarkConcurrentRoots(VisitRootFlags flags)
+  virtual void MarkConcurrentRoots(VisitRootFlags flags)
       REQUIRES(Locks::heap_bitmap_lock_)
       REQUIRES(!mark_stack_lock_)
       REQUIRES_SHARED(Locks::mutator_lock_);
@@ -250,7 +250,7 @@
 
   // Verify the roots of the heap and print out information related to any invalid roots.
   // Called in MarkObject, so may we may not hold the mutator lock.
-  void VerifySuspendedThreadRoots()
+  void VerifySuspendedThreadRoots(std::ostream& os)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Expand mark stack to 2x its current size.
diff --git a/runtime/gc/collector/semi_space.cc b/runtime/gc/collector/semi_space.cc
index 2ff4a3f..a815b83 100644
--- a/runtime/gc/collector/semi_space.cc
+++ b/runtime/gc/collector/semi_space.cc
@@ -589,13 +589,9 @@
   // references.
   saved_bytes_ +=
       CopyAvoidingDirtyingPages(reinterpret_cast<void*>(forward_address), obj, object_size);
-  if (kUseBakerOrBrooksReadBarrier) {
-    obj->AssertReadBarrierPointer();
-    if (kUseBrooksReadBarrier) {
-      DCHECK_EQ(forward_address->GetReadBarrierPointer(), obj);
-      forward_address->SetReadBarrierPointer(forward_address);
-    }
-    forward_address->AssertReadBarrierPointer();
+  if (kUseBakerReadBarrier) {
+    obj->AssertReadBarrierState();
+    forward_address->AssertReadBarrierState();
   }
   DCHECK(to_space_->HasAddress(forward_address) ||
          fallback_space_->HasAddress(forward_address) ||
diff --git a/runtime/gc/collector/sticky_mark_sweep.cc b/runtime/gc/collector/sticky_mark_sweep.cc
index bb7e854..a2dbe3f 100644
--- a/runtime/gc/collector/sticky_mark_sweep.cc
+++ b/runtime/gc/collector/sticky_mark_sweep.cc
@@ -56,6 +56,19 @@
   RecursiveMarkDirtyObjects(false, accounting::CardTable::kCardDirty - 1);
 }
 
+void StickyMarkSweep::MarkConcurrentRoots(VisitRootFlags flags) {
+  TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
+  // Visit all runtime roots and clear dirty flags including class loader. This is done to prevent
+  // incorrect class unloading since the GC does not card mark when storing store the class during
+  // object allocation. Doing this for each allocation would be slow.
+  // Since the card is not dirty, it means the object may not get scanned. This can cause class
+  // unloading to occur even though the class and class loader are reachable through the object's
+  // class.
+  Runtime::Current()->VisitConcurrentRoots(
+      this,
+      static_cast<VisitRootFlags>(flags | kVisitRootFlagClassLoader));
+}
+
 void StickyMarkSweep::Sweep(bool swap_bitmaps ATTRIBUTE_UNUSED) {
   SweepArray(GetHeap()->GetLiveStack(), false);
 }
diff --git a/runtime/gc/collector/sticky_mark_sweep.h b/runtime/gc/collector/sticky_mark_sweep.h
index 100ca64..45f912f 100644
--- a/runtime/gc/collector/sticky_mark_sweep.h
+++ b/runtime/gc/collector/sticky_mark_sweep.h
@@ -33,6 +33,12 @@
   StickyMarkSweep(Heap* heap, bool is_concurrent, const std::string& name_prefix = "");
   ~StickyMarkSweep() {}
 
+  virtual void MarkConcurrentRoots(VisitRootFlags flags)
+      OVERRIDE
+      REQUIRES(Locks::heap_bitmap_lock_)
+      REQUIRES(!mark_stack_lock_)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
  protected:
   // Bind the live bits to the mark bits of bitmaps for all spaces, all spaces other than the
   // alloc space will be marked as immune.
diff --git a/runtime/gc/heap-inl.h b/runtime/gc/heap-inl.h
index 05ce9c7..54f2210 100644
--- a/runtime/gc/heap-inl.h
+++ b/runtime/gc/heap-inl.h
@@ -86,11 +86,8 @@
     obj = self->AllocTlab(byte_count);
     DCHECK(obj != nullptr) << "AllocTlab can't fail";
     obj->SetClass(klass);
-    if (kUseBakerOrBrooksReadBarrier) {
-      if (kUseBrooksReadBarrier) {
-        obj->SetReadBarrierPointer(obj.Ptr());
-      }
-      obj->AssertReadBarrierPointer();
+    if (kUseBakerReadBarrier) {
+      obj->AssertReadBarrierState();
     }
     bytes_allocated = byte_count;
     usable_size = bytes_allocated;
@@ -102,11 +99,8 @@
       LIKELY(obj != nullptr)) {
     DCHECK(!is_running_on_memory_tool_);
     obj->SetClass(klass);
-    if (kUseBakerOrBrooksReadBarrier) {
-      if (kUseBrooksReadBarrier) {
-        obj->SetReadBarrierPointer(obj.Ptr());
-      }
-      obj->AssertReadBarrierPointer();
+    if (kUseBakerReadBarrier) {
+      obj->AssertReadBarrierState();
     }
     usable_size = bytes_allocated;
     pre_fence_visitor(obj, usable_size);
@@ -143,11 +137,8 @@
     DCHECK_GT(bytes_allocated, 0u);
     DCHECK_GT(usable_size, 0u);
     obj->SetClass(klass);
-    if (kUseBakerOrBrooksReadBarrier) {
-      if (kUseBrooksReadBarrier) {
-        obj->SetReadBarrierPointer(obj.Ptr());
-      }
-      obj->AssertReadBarrierPointer();
+    if (kUseBakerReadBarrier) {
+      obj->AssertReadBarrierState();
     }
     if (collector::SemiSpace::kUseRememberedSet && UNLIKELY(allocator == kAllocatorTypeNonMoving)) {
       // (Note this if statement will be constant folded away for the
@@ -256,7 +247,7 @@
   if (allocator_type != kAllocatorTypeTLAB &&
       allocator_type != kAllocatorTypeRegionTLAB &&
       allocator_type != kAllocatorTypeRosAlloc &&
-      UNLIKELY(IsOutOfMemoryOnAllocation<kGrow>(allocator_type, alloc_size))) {
+      UNLIKELY(IsOutOfMemoryOnAllocation(allocator_type, alloc_size, kGrow))) {
     return nullptr;
   }
   mirror::Object* ret;
@@ -276,8 +267,9 @@
       if (kInstrumented && UNLIKELY(is_running_on_memory_tool_)) {
         // If running on valgrind or asan, we should be using the instrumented path.
         size_t max_bytes_tl_bulk_allocated = rosalloc_space_->MaxBytesBulkAllocatedFor(alloc_size);
-        if (UNLIKELY(IsOutOfMemoryOnAllocation<kGrow>(allocator_type,
-                                                      max_bytes_tl_bulk_allocated))) {
+        if (UNLIKELY(IsOutOfMemoryOnAllocation(allocator_type,
+                                               max_bytes_tl_bulk_allocated,
+                                               kGrow))) {
           return nullptr;
         }
         ret = rosalloc_space_->Alloc(self, alloc_size, bytes_allocated, usable_size,
@@ -286,14 +278,18 @@
         DCHECK(!is_running_on_memory_tool_);
         size_t max_bytes_tl_bulk_allocated =
             rosalloc_space_->MaxBytesBulkAllocatedForNonvirtual(alloc_size);
-        if (UNLIKELY(IsOutOfMemoryOnAllocation<kGrow>(allocator_type,
-                                                      max_bytes_tl_bulk_allocated))) {
+        if (UNLIKELY(IsOutOfMemoryOnAllocation(allocator_type,
+                                               max_bytes_tl_bulk_allocated,
+                                               kGrow))) {
           return nullptr;
         }
         if (!kInstrumented) {
           DCHECK(!rosalloc_space_->CanAllocThreadLocal(self, alloc_size));
         }
-        ret = rosalloc_space_->AllocNonvirtual(self, alloc_size, bytes_allocated, usable_size,
+        ret = rosalloc_space_->AllocNonvirtual(self,
+                                               alloc_size,
+                                               bytes_allocated,
+                                               usable_size,
                                                bytes_tl_bulk_allocated);
       }
       break;
@@ -301,22 +297,34 @@
     case kAllocatorTypeDlMalloc: {
       if (kInstrumented && UNLIKELY(is_running_on_memory_tool_)) {
         // If running on valgrind, we should be using the instrumented path.
-        ret = dlmalloc_space_->Alloc(self, alloc_size, bytes_allocated, usable_size,
+        ret = dlmalloc_space_->Alloc(self,
+                                     alloc_size,
+                                     bytes_allocated,
+                                     usable_size,
                                      bytes_tl_bulk_allocated);
       } else {
         DCHECK(!is_running_on_memory_tool_);
-        ret = dlmalloc_space_->AllocNonvirtual(self, alloc_size, bytes_allocated, usable_size,
+        ret = dlmalloc_space_->AllocNonvirtual(self,
+                                               alloc_size,
+                                               bytes_allocated,
+                                               usable_size,
                                                bytes_tl_bulk_allocated);
       }
       break;
     }
     case kAllocatorTypeNonMoving: {
-      ret = non_moving_space_->Alloc(self, alloc_size, bytes_allocated, usable_size,
+      ret = non_moving_space_->Alloc(self,
+                                     alloc_size,
+                                     bytes_allocated,
+                                     usable_size,
                                      bytes_tl_bulk_allocated);
       break;
     }
     case kAllocatorTypeLOS: {
-      ret = large_object_space_->Alloc(self, alloc_size, bytes_allocated, usable_size,
+      ret = large_object_space_->Alloc(self,
+                                       alloc_size,
+                                       bytes_allocated,
+                                       usable_size,
                                        bytes_tl_bulk_allocated);
       // Note that the bump pointer spaces aren't necessarily next to
       // the other continuous spaces like the non-moving alloc space or
@@ -324,80 +332,38 @@
       DCHECK(ret == nullptr || large_object_space_->Contains(ret));
       break;
     }
-    case kAllocatorTypeTLAB: {
-      DCHECK_ALIGNED(alloc_size, space::BumpPointerSpace::kAlignment);
-      if (UNLIKELY(self->TlabSize() < alloc_size)) {
-        const size_t new_tlab_size = alloc_size + kDefaultTLABSize;
-        if (UNLIKELY(IsOutOfMemoryOnAllocation<kGrow>(allocator_type, new_tlab_size))) {
-          return nullptr;
-        }
-        // Try allocating a new thread local buffer, if the allocaiton fails the space must be
-        // full so return null.
-        if (!bump_pointer_space_->AllocNewTlab(self, new_tlab_size)) {
-          return nullptr;
-        }
-        *bytes_tl_bulk_allocated = new_tlab_size;
-      } else {
-        *bytes_tl_bulk_allocated = 0;
-      }
-      // The allocation can't fail.
-      ret = self->AllocTlab(alloc_size);
-      DCHECK(ret != nullptr);
-      *bytes_allocated = alloc_size;
-      *usable_size = alloc_size;
-      break;
-    }
     case kAllocatorTypeRegion: {
       DCHECK(region_space_ != nullptr);
       alloc_size = RoundUp(alloc_size, space::RegionSpace::kAlignment);
-      ret = region_space_->AllocNonvirtual<false>(alloc_size, bytes_allocated, usable_size,
+      ret = region_space_->AllocNonvirtual<false>(alloc_size,
+                                                  bytes_allocated,
+                                                  usable_size,
                                                   bytes_tl_bulk_allocated);
       break;
     }
+    case kAllocatorTypeTLAB:
+      FALLTHROUGH_INTENDED;
     case kAllocatorTypeRegionTLAB: {
-      DCHECK(region_space_ != nullptr);
-      DCHECK_ALIGNED(alloc_size, space::RegionSpace::kAlignment);
+      DCHECK_ALIGNED(alloc_size, kObjectAlignment);
+      static_assert(space::RegionSpace::kAlignment == space::BumpPointerSpace::kAlignment,
+                    "mismatched alignments");
+      static_assert(kObjectAlignment == space::BumpPointerSpace::kAlignment,
+                    "mismatched alignments");
       if (UNLIKELY(self->TlabSize() < alloc_size)) {
-        if (space::RegionSpace::kRegionSize >= alloc_size) {
-          // Non-large. Check OOME for a tlab.
-          if (LIKELY(!IsOutOfMemoryOnAllocation<kGrow>(allocator_type, space::RegionSpace::kRegionSize))) {
-            // Try to allocate a tlab.
-            if (!region_space_->AllocNewTlab(self)) {
-              // Failed to allocate a tlab. Try non-tlab.
-              ret = region_space_->AllocNonvirtual<false>(alloc_size, bytes_allocated, usable_size,
-                                                          bytes_tl_bulk_allocated);
-              return ret;
-            }
-            *bytes_tl_bulk_allocated = space::RegionSpace::kRegionSize;
-            // Fall-through.
-          } else {
-            // Check OOME for a non-tlab allocation.
-            if (!IsOutOfMemoryOnAllocation<kGrow>(allocator_type, alloc_size)) {
-              ret = region_space_->AllocNonvirtual<false>(alloc_size, bytes_allocated, usable_size,
-                                                          bytes_tl_bulk_allocated);
-              return ret;
-            } else {
-              // Neither tlab or non-tlab works. Give up.
-              return nullptr;
-            }
-          }
-        } else {
-          // Large. Check OOME.
-          if (LIKELY(!IsOutOfMemoryOnAllocation<kGrow>(allocator_type, alloc_size))) {
-            ret = region_space_->AllocNonvirtual<false>(alloc_size, bytes_allocated, usable_size,
-                                                        bytes_tl_bulk_allocated);
-            return ret;
-          } else {
-            return nullptr;
-          }
-        }
-      } else {
-        *bytes_tl_bulk_allocated = 0;  // Allocated in an existing buffer.
+        // kAllocatorTypeTLAB may be the allocator for region space TLAB if the GC is not marking,
+        // that is why the allocator is not passed down.
+        return AllocWithNewTLAB(self,
+                                alloc_size,
+                                kGrow,
+                                bytes_allocated,
+                                usable_size,
+                                bytes_tl_bulk_allocated);
       }
       // The allocation can't fail.
       ret = self->AllocTlab(alloc_size);
       DCHECK(ret != nullptr);
       *bytes_allocated = alloc_size;
+      *bytes_tl_bulk_allocated = 0;  // Allocated in an existing buffer.
       *usable_size = alloc_size;
       break;
     }
@@ -417,15 +383,16 @@
   return byte_count >= large_object_threshold_ && (c->IsPrimitiveArray() || c->IsStringClass());
 }
 
-template <bool kGrow>
-inline bool Heap::IsOutOfMemoryOnAllocation(AllocatorType allocator_type, size_t alloc_size) {
+inline bool Heap::IsOutOfMemoryOnAllocation(AllocatorType allocator_type,
+                                            size_t alloc_size,
+                                            bool grow) {
   size_t new_footprint = num_bytes_allocated_.LoadSequentiallyConsistent() + alloc_size;
   if (UNLIKELY(new_footprint > max_allowed_footprint_)) {
     if (UNLIKELY(new_footprint > growth_limit_)) {
       return true;
     }
     if (!AllocatorMayHaveConcurrentGC(allocator_type) || !IsGcConcurrent()) {
-      if (!kGrow) {
+      if (!grow) {
         return true;
       }
       // TODO: Grow for allocation is racy, fix it.
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index 5de004b..5c219cc 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -264,6 +264,10 @@
   if (VLOG_IS_ON(heap) || VLOG_IS_ON(startup)) {
     LOG(INFO) << "Heap() entering";
   }
+  if (kUseReadBarrier) {
+    CHECK_EQ(foreground_collector_type_, kCollectorTypeCC);
+    CHECK_EQ(background_collector_type_, kCollectorTypeCCBackground);
+  }
   CHECK_GE(large_object_threshold, kMinLargeObjectThreshold);
   ScopedTrace trace(__FUNCTION__);
   Runtime* const runtime = Runtime::Current();
@@ -1815,7 +1819,7 @@
           break;
         }
         // Try to transition the heap if the allocation failure was due to the space being full.
-        if (!IsOutOfMemoryOnAllocation<false>(allocator, alloc_size)) {
+        if (!IsOutOfMemoryOnAllocation(allocator, alloc_size, /*grow*/ false)) {
           // If we aren't out of memory then the OOM was probably from the non moving space being
           // full. Attempt to disable compaction and turn the main space into a non moving space.
           DisableMovingGc();
@@ -2406,13 +2410,9 @@
     }
     // Copy the object over to its new location. Don't use alloc_size to avoid valgrind error.
     memcpy(reinterpret_cast<void*>(forward_address), obj, obj_size);
-    if (kUseBakerOrBrooksReadBarrier) {
-      obj->AssertReadBarrierPointer();
-      if (kUseBrooksReadBarrier) {
-        DCHECK_EQ(forward_address->GetReadBarrierPointer(), obj);
-        forward_address->SetReadBarrierPointer(forward_address);
-      }
-      forward_address->AssertReadBarrierPointer();
+    if (kUseBakerReadBarrier) {
+      obj->AssertReadBarrierState();
+      forward_address->AssertReadBarrierState();
     }
     return forward_address;
   }
@@ -2742,12 +2742,6 @@
     concurrent_start_bytes_ = std::numeric_limits<size_t>::max();
   }
 
-  // It's time to clear all inline caches, in case some classes can be unloaded.
-  if (((gc_type == collector::kGcTypeFull) || (gc_type == collector::kGcTypePartial)) &&
-      (runtime->GetJit() != nullptr)) {
-    runtime->GetJit()->GetCodeCache()->ClearGcRootsInInlineCaches(self);
-  }
-
   CHECK(collector != nullptr)
       << "Could not find garbage collector with collector_type="
       << static_cast<size_t>(collector_type_) << " and gc_type=" << gc_type;
@@ -4069,7 +4063,6 @@
 }
 
 void Heap::BroadcastForNewAllocationRecords() const {
-  CHECK(kUseReadBarrier);
   // Always broadcast without checking IsAllocTrackingEnabled() because IsAllocTrackingEnabled() may
   // be set to false while some threads are waiting for system weak access in
   // AllocRecordObjectMap::RecordAllocation() and we may fail to wake them up. b/27467554.
@@ -4226,5 +4219,72 @@
   gc_pause_listener_.StoreRelaxed(nullptr);
 }
 
+mirror::Object* Heap::AllocWithNewTLAB(Thread* self,
+                                       size_t alloc_size,
+                                       bool grow,
+                                       size_t* bytes_allocated,
+                                       size_t* usable_size,
+                                       size_t* bytes_tl_bulk_allocated) {
+  const AllocatorType allocator_type = GetCurrentAllocator();
+  if (allocator_type == kAllocatorTypeTLAB) {
+    DCHECK(bump_pointer_space_ != nullptr);
+    const size_t new_tlab_size = alloc_size + kDefaultTLABSize;
+    if (UNLIKELY(IsOutOfMemoryOnAllocation(allocator_type, new_tlab_size, grow))) {
+      return nullptr;
+    }
+    // Try allocating a new thread local buffer, if the allocation fails the space must be
+    // full so return null.
+    if (!bump_pointer_space_->AllocNewTlab(self, new_tlab_size)) {
+      return nullptr;
+    }
+    *bytes_tl_bulk_allocated = new_tlab_size;
+  } else {
+    DCHECK(allocator_type == kAllocatorTypeRegionTLAB);
+    DCHECK(region_space_ != nullptr);
+    if (space::RegionSpace::kRegionSize >= alloc_size) {
+      // Non-large. Check OOME for a tlab.
+      if (LIKELY(!IsOutOfMemoryOnAllocation(allocator_type,
+                                            space::RegionSpace::kRegionSize,
+                                            grow))) {
+        // Try to allocate a tlab.
+        if (!region_space_->AllocNewTlab(self)) {
+          // Failed to allocate a tlab. Try non-tlab.
+          return region_space_->AllocNonvirtual<false>(alloc_size,
+                                                       bytes_allocated,
+                                                       usable_size,
+                                                       bytes_tl_bulk_allocated);
+        }
+        *bytes_tl_bulk_allocated = space::RegionSpace::kRegionSize;
+        // Fall-through to using the TLAB below.
+      } else {
+        // Check OOME for a non-tlab allocation.
+        if (!IsOutOfMemoryOnAllocation(allocator_type, alloc_size, grow)) {
+          return region_space_->AllocNonvirtual<false>(alloc_size,
+                                                       bytes_allocated,
+                                                       usable_size,
+                                                       bytes_tl_bulk_allocated);
+        }
+        // Neither tlab or non-tlab works. Give up.
+        return nullptr;
+      }
+    } else {
+      // Large. Check OOME.
+      if (LIKELY(!IsOutOfMemoryOnAllocation(allocator_type, alloc_size, grow))) {
+        return region_space_->AllocNonvirtual<false>(alloc_size,
+                                                     bytes_allocated,
+                                                     usable_size,
+                                                     bytes_tl_bulk_allocated);
+      }
+      return nullptr;
+    }
+  }
+  // Refilled TLAB, return.
+  mirror::Object* ret = self->AllocTlab(alloc_size);
+  DCHECK(ret != nullptr);
+  *bytes_allocated = alloc_size;
+  *usable_size = alloc_size;
+  return ret;
+}
+
 }  // namespace gc
 }  // namespace art
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index e8eb69e..3a8e29b 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -797,7 +797,6 @@
       REQUIRES(!Locks::alloc_tracker_lock_);
 
   void BroadcastForNewAllocationRecords() const
-      REQUIRES_SHARED(Locks::mutator_lock_)
       REQUIRES(!Locks::alloc_tracker_lock_);
 
   void DisableGCForShutdown() REQUIRES(!*gc_complete_lock_);
@@ -855,6 +854,10 @@
         allocator_type != kAllocatorTypeRegionTLAB;
   }
   static ALWAYS_INLINE bool AllocatorMayHaveConcurrentGC(AllocatorType allocator_type) {
+    if (kUseReadBarrier) {
+      // Read barrier may have the TLAB allocator but is always concurrent. TODO: clean this up.
+      return true;
+    }
     return
         allocator_type != kAllocatorTypeBumpPointer &&
         allocator_type != kAllocatorTypeTLAB;
@@ -924,11 +927,20 @@
                                               size_t* bytes_tl_bulk_allocated)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
+  mirror::Object* AllocWithNewTLAB(Thread* self,
+                                   size_t alloc_size,
+                                   bool grow,
+                                   size_t* bytes_allocated,
+                                   size_t* usable_size,
+                                   size_t* bytes_tl_bulk_allocated)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
   void ThrowOutOfMemoryError(Thread* self, size_t byte_count, AllocatorType allocator_type)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  template <bool kGrow>
-  ALWAYS_INLINE bool IsOutOfMemoryOnAllocation(AllocatorType allocator_type, size_t alloc_size);
+  ALWAYS_INLINE bool IsOutOfMemoryOnAllocation(AllocatorType allocator_type,
+                                               size_t alloc_size,
+                                               bool grow);
 
   // Run the finalizers. If timeout is non zero, then we use the VMRuntime version.
   void RunFinalization(JNIEnv* env, uint64_t timeout);
diff --git a/runtime/gc/reference_processor.cc b/runtime/gc/reference_processor.cc
index 798ecd3..2cde7d5 100644
--- a/runtime/gc/reference_processor.cc
+++ b/runtime/gc/reference_processor.cc
@@ -55,7 +55,6 @@
 }
 
 void ReferenceProcessor::BroadcastForSlowPath(Thread* self) {
-  CHECK(kUseReadBarrier);
   MutexLock mu(self, *Locks::reference_processor_lock_);
   condition_.Broadcast(self);
 }
@@ -99,6 +98,9 @@
         }
       }
     }
+    // Check and run the empty checkpoint before blocking so the empty checkpoint will work in the
+    // presence of threads blocking for weak ref access.
+    self->CheckEmptyCheckpoint();
     condition_.WaitHoldingLocks(self);
   }
   return reference->GetReferent();
@@ -270,6 +272,9 @@
   // Wait untul we are done processing reference.
   while ((!kUseReadBarrier && SlowPathEnabled()) ||
          (kUseReadBarrier && !self->GetWeakRefAccessEnabled())) {
+    // Check and run the empty checkpoint before blocking so the empty checkpoint will work in the
+    // presence of threads blocking for weak ref access.
+    self->CheckEmptyCheckpoint();
     condition_.WaitHoldingLocks(self);
   }
   // At this point, since the sentinel of the reference is live, it is guaranteed to not be
diff --git a/runtime/gc/reference_queue.cc b/runtime/gc/reference_queue.cc
index 4e6f7da..a0eb197 100644
--- a/runtime/gc/reference_queue.cc
+++ b/runtime/gc/reference_queue.cc
@@ -75,19 +75,19 @@
     // collector (SemiSpace) is running.
     CHECK(ref != nullptr);
     collector::ConcurrentCopying* concurrent_copying = heap->ConcurrentCopyingCollector();
-    mirror::Object* rb_ptr = ref->GetReadBarrierPointer();
-    if (rb_ptr == ReadBarrier::GrayPtr()) {
-      ref->AtomicSetReadBarrierPointer(ReadBarrier::GrayPtr(), ReadBarrier::WhitePtr());
-      CHECK_EQ(ref->GetReadBarrierPointer(), ReadBarrier::WhitePtr());
+    uint32_t rb_state = ref->GetReadBarrierState();
+    if (rb_state == ReadBarrier::GrayState()) {
+      ref->AtomicSetReadBarrierState(ReadBarrier::GrayState(), ReadBarrier::WhiteState());
+      CHECK_EQ(ref->GetReadBarrierState(), ReadBarrier::WhiteState());
     } else {
       // In ConcurrentCopying::ProcessMarkStackRef() we may leave a white reference in the queue and
       // find it here, which is OK.
-      CHECK_EQ(rb_ptr, ReadBarrier::WhitePtr()) << "ref=" << ref << " rb_ptr=" << rb_ptr;
+      CHECK_EQ(rb_state, ReadBarrier::WhiteState()) << "ref=" << ref << " rb_state=" << rb_state;
       ObjPtr<mirror::Object> referent = ref->GetReferent<kWithoutReadBarrier>();
       // The referent could be null if it's cleared by a mutator (Reference.clear()).
       if (referent != nullptr) {
         CHECK(concurrent_copying->IsInToSpace(referent.Ptr()))
-            << "ref=" << ref << " rb_ptr=" << ref->GetReadBarrierPointer()
+            << "ref=" << ref << " rb_state=" << ref->GetReadBarrierState()
             << " referent=" << referent;
       }
     }
diff --git a/runtime/gc/space/image_space.cc b/runtime/gc/space/image_space.cc
index 6035406..6019540 100644
--- a/runtime/gc/space/image_space.cc
+++ b/runtime/gc/space/image_space.cc
@@ -399,8 +399,8 @@
     auto* obj = reinterpret_cast<mirror::Object*>(current);
     CHECK(obj->GetClass() != nullptr) << "Image object at address " << obj << " has null class";
     CHECK(live_bitmap_->Test(obj)) << obj->PrettyTypeOf();
-    if (kUseBakerOrBrooksReadBarrier) {
-      obj->AssertReadBarrierPointer();
+    if (kUseBakerReadBarrier) {
+      obj->AssertReadBarrierState();
     }
     current += RoundUp(obj->SizeOf(), kObjectAlignment);
   }
@@ -1002,7 +1002,7 @@
         mirror::IfTable* iftable = as_klass->GetIfTable<kVerifyNone, kWithoutReadBarrier>();
         // Ensure iftable arrays are fixed up since we need GetMethodArray to return the valid
         // contents.
-        if (iftable != nullptr && IsInAppImage(iftable)) {
+        if (IsInAppImage(iftable)) {
           operator()(iftable);
           for (int32_t i = 0, count = iftable->Count(); i < count; ++i) {
             if (iftable->GetMethodArrayCount<kVerifyNone, kWithoutReadBarrier>(i) > 0) {
@@ -1606,7 +1606,7 @@
 
   std::ostringstream oss;
   bool first = true;
-  for (auto msg : error_msgs) {
+  for (const auto& msg : error_msgs) {
     if (!first) {
       oss << "\n    ";
     }
diff --git a/runtime/gc/space/region_space-inl.h b/runtime/gc/space/region_space-inl.h
index bbc634d..3e79223 100644
--- a/runtime/gc/space/region_space-inl.h
+++ b/runtime/gc/space/region_space-inl.h
@@ -116,18 +116,17 @@
                                                   size_t* bytes_tl_bulk_allocated) {
   DCHECK(IsAllocated() && IsInToSpace());
   DCHECK_ALIGNED(num_bytes, kAlignment);
-  Atomic<uint8_t*>* atomic_top = reinterpret_cast<Atomic<uint8_t*>*>(&top_);
   uint8_t* old_top;
   uint8_t* new_top;
   do {
-    old_top = atomic_top->LoadRelaxed();
+    old_top = top_.LoadRelaxed();
     new_top = old_top + num_bytes;
     if (UNLIKELY(new_top > end_)) {
       return nullptr;
     }
-  } while (!atomic_top->CompareExchangeWeakSequentiallyConsistent(old_top, new_top));
-  reinterpret_cast<Atomic<uint64_t>*>(&objects_allocated_)->FetchAndAddSequentiallyConsistent(1);
-  DCHECK_LE(atomic_top->LoadRelaxed(), end_);
+  } while (!top_.CompareExchangeWeakRelaxed(old_top, new_top));
+  objects_allocated_.FetchAndAddRelaxed(1);
+  DCHECK_LE(Top(), end_);
   DCHECK_LT(old_top, end_);
   DCHECK_LE(new_top, end_);
   *bytes_allocated = num_bytes;
diff --git a/runtime/gc/space/region_space.cc b/runtime/gc/space/region_space.cc
index 35bc369..8077319 100644
--- a/runtime/gc/space/region_space.cc
+++ b/runtime/gc/space/region_space.cc
@@ -57,7 +57,7 @@
   regions_.reset(new Region[num_regions_]);
   uint8_t* region_addr = mem_map->Begin();
   for (size_t i = 0; i < num_regions_; ++i, region_addr += kRegionSize) {
-    regions_[i] = Region(i, region_addr, region_addr + kRegionSize);
+    regions_[i].Init(i, region_addr, region_addr + kRegionSize);
   }
   mark_bitmap_.reset(
       accounting::ContinuousSpaceBitmap::Create("region space live bitmap", Begin(), Capacity()));
@@ -72,7 +72,6 @@
     }
     CHECK_EQ(regions_[num_regions_ - 1].End(), Limit());
   }
-  full_region_ = Region();
   DCHECK(!full_region_.IsFree());
   DCHECK(full_region_.IsAllocated());
   current_region_ = &full_region_;
@@ -346,7 +345,7 @@
 void RegionSpace::RecordAlloc(mirror::Object* ref) {
   CHECK(ref != nullptr);
   Region* r = RefToRegion(ref);
-  reinterpret_cast<Atomic<uint64_t>*>(&r->objects_allocated_)->FetchAndAddSequentiallyConsistent(1);
+  r->objects_allocated_.FetchAndAddSequentiallyConsistent(1);
 }
 
 bool RegionSpace::AllocNewTlab(Thread* self) {
@@ -424,7 +423,8 @@
 }
 
 void RegionSpace::Region::Dump(std::ostream& os) const {
-  os << "Region[" << idx_ << "]=" << reinterpret_cast<void*>(begin_) << "-" << reinterpret_cast<void*>(top_)
+  os << "Region[" << idx_ << "]=" << reinterpret_cast<void*>(begin_) << "-"
+     << reinterpret_cast<void*>(Top())
      << "-" << reinterpret_cast<void*>(end_)
      << " state=" << static_cast<uint>(state_) << " type=" << static_cast<uint>(type_)
      << " objects_allocated=" << objects_allocated_
diff --git a/runtime/gc/space/region_space.h b/runtime/gc/space/region_space.h
index 381ccfa..f3b9595 100644
--- a/runtime/gc/space/region_space.h
+++ b/runtime/gc/space/region_space.h
@@ -246,11 +246,19 @@
           objects_allocated_(0), alloc_time_(0), live_bytes_(static_cast<size_t>(-1)),
           is_newly_allocated_(false), is_a_tlab_(false), thread_(nullptr) {}
 
-    Region(size_t idx, uint8_t* begin, uint8_t* end)
-        : idx_(idx), begin_(begin), top_(begin), end_(end),
-          state_(RegionState::kRegionStateFree), type_(RegionType::kRegionTypeNone),
-          objects_allocated_(0), alloc_time_(0), live_bytes_(static_cast<size_t>(-1)),
-          is_newly_allocated_(false), is_a_tlab_(false), thread_(nullptr) {
+    void Init(size_t idx, uint8_t* begin, uint8_t* end) {
+      idx_ = idx;
+      begin_ = begin;
+      top_.StoreRelaxed(begin);
+      end_ = end;
+      state_ = RegionState::kRegionStateFree;
+      type_ = RegionType::kRegionTypeNone;
+      objects_allocated_.StoreRelaxed(0);
+      alloc_time_ = 0;
+      live_bytes_ = static_cast<size_t>(-1);
+      is_newly_allocated_ = false;
+      is_a_tlab_ = false;
+      thread_ = nullptr;
       DCHECK_LT(begin, end);
       DCHECK_EQ(static_cast<size_t>(end - begin), kRegionSize);
     }
@@ -264,16 +272,13 @@
     }
 
     void Clear() {
-      top_ = begin_;
+      top_.StoreRelaxed(begin_);
       state_ = RegionState::kRegionStateFree;
       type_ = RegionType::kRegionTypeNone;
-      objects_allocated_ = 0;
+      objects_allocated_.StoreRelaxed(0);
       alloc_time_ = 0;
       live_bytes_ = static_cast<size_t>(-1);
-      if (!kMadviseZeroes) {
-        memset(begin_, 0, end_ - begin_);
-      }
-      madvise(begin_, end_ - begin_, MADV_DONTNEED);
+      ZeroAndReleasePages(begin_, end_ - begin_);
       is_newly_allocated_ = false;
       is_a_tlab_ = false;
       thread_ = nullptr;
@@ -287,8 +292,8 @@
       bool is_free = state_ == RegionState::kRegionStateFree;
       if (is_free) {
         DCHECK(IsInNoSpace());
-        DCHECK_EQ(begin_, top_);
-        DCHECK_EQ(objects_allocated_, 0U);
+        DCHECK_EQ(begin_, Top());
+        DCHECK_EQ(objects_allocated_.LoadRelaxed(), 0U);
       }
       return is_free;
     }
@@ -328,7 +333,7 @@
     bool IsLarge() const {
       bool is_large = state_ == RegionState::kRegionStateLarge;
       if (is_large) {
-        DCHECK_LT(begin_ + 1 * MB, top_);
+        DCHECK_LT(begin_ + 1 * MB, Top());
       }
       return is_large;
     }
@@ -337,7 +342,7 @@
     bool IsLargeTail() const {
       bool is_large_tail = state_ == RegionState::kRegionStateLargeTail;
       if (is_large_tail) {
-        DCHECK_EQ(begin_, top_);
+        DCHECK_EQ(begin_, Top());
       }
       return is_large_tail;
     }
@@ -395,15 +400,15 @@
 
     size_t BytesAllocated() const {
       if (IsLarge()) {
-        DCHECK_LT(begin_ + kRegionSize, top_);
-        return static_cast<size_t>(top_ - begin_);
+        DCHECK_LT(begin_ + kRegionSize, Top());
+        return static_cast<size_t>(Top() - begin_);
       } else if (IsLargeTail()) {
-        DCHECK_EQ(begin_, top_);
+        DCHECK_EQ(begin_, Top());
         return 0;
       } else {
         DCHECK(IsAllocated()) << static_cast<uint>(state_);
-        DCHECK_LE(begin_, top_);
-        size_t bytes = static_cast<size_t>(top_ - begin_);
+        DCHECK_LE(begin_, Top());
+        size_t bytes = static_cast<size_t>(Top() - begin_);
         DCHECK_LE(bytes, kRegionSize);
         return bytes;
       }
@@ -411,12 +416,12 @@
 
     size_t ObjectsAllocated() const {
       if (IsLarge()) {
-        DCHECK_LT(begin_ + 1 * MB, top_);
-        DCHECK_EQ(objects_allocated_, 0U);
+        DCHECK_LT(begin_ + 1 * MB, Top());
+        DCHECK_EQ(objects_allocated_.LoadRelaxed(), 0U);
         return 1;
       } else if (IsLargeTail()) {
-        DCHECK_EQ(begin_, top_);
-        DCHECK_EQ(objects_allocated_, 0U);
+        DCHECK_EQ(begin_, Top());
+        DCHECK_EQ(objects_allocated_.LoadRelaxed(), 0U);
         return 0;
       } else {
         DCHECK(IsAllocated()) << static_cast<uint>(state_);
@@ -428,12 +433,12 @@
       return begin_;
     }
 
-    uint8_t* Top() const {
-      return top_;
+    ALWAYS_INLINE uint8_t* Top() const {
+      return top_.LoadRelaxed();
     }
 
     void SetTop(uint8_t* new_top) {
-      top_ = new_top;
+      top_.StoreRelaxed(new_top);
     }
 
     uint8_t* End() const {
@@ -448,27 +453,26 @@
 
     void RecordThreadLocalAllocations(size_t num_objects, size_t num_bytes) {
       DCHECK(IsAllocated());
-      DCHECK_EQ(objects_allocated_, 0U);
-      DCHECK_EQ(top_, end_);
-      objects_allocated_ = num_objects;
-      top_ = begin_ + num_bytes;
-      DCHECK_EQ(top_, end_);
+      DCHECK_EQ(objects_allocated_.LoadRelaxed(), 0U);
+      DCHECK_EQ(Top(), end_);
+      objects_allocated_.StoreRelaxed(num_objects);
+      top_.StoreRelaxed(begin_ + num_bytes);
+      DCHECK_EQ(Top(), end_);
     }
 
    private:
-    size_t idx_;                   // The region's index in the region space.
-    uint8_t* begin_;               // The begin address of the region.
-    // Can't use Atomic<uint8_t*> as Atomic's copy operator is implicitly deleted.
-    uint8_t* top_;                 // The current position of the allocation.
-    uint8_t* end_;                 // The end address of the region.
-    RegionState state_;            // The region state (see RegionState).
-    RegionType type_;              // The region type (see RegionType).
-    uint64_t objects_allocated_;   // The number of objects allocated.
-    uint32_t alloc_time_;          // The allocation time of the region.
-    size_t live_bytes_;            // The live bytes. Used to compute the live percent.
-    bool is_newly_allocated_;      // True if it's allocated after the last collection.
-    bool is_a_tlab_;               // True if it's a tlab.
-    Thread* thread_;               // The owning thread if it's a tlab.
+    size_t idx_;                        // The region's index in the region space.
+    uint8_t* begin_;                    // The begin address of the region.
+    Atomic<uint8_t*> top_;              // The current position of the allocation.
+    uint8_t* end_;                      // The end address of the region.
+    RegionState state_;                 // The region state (see RegionState).
+    RegionType type_;                   // The region type (see RegionType).
+    Atomic<size_t> objects_allocated_;  // The number of objects allocated.
+    uint32_t alloc_time_;               // The allocation time of the region.
+    size_t live_bytes_;                 // The live bytes. Used to compute the live percent.
+    bool is_newly_allocated_;           // True if it's allocated after the last collection.
+    bool is_a_tlab_;                    // True if it's a tlab.
+    Thread* thread_;                    // The owning thread if it's a tlab.
 
     friend class RegionSpace;
   };
diff --git a/runtime/gc/space/space_test.h b/runtime/gc/space/space_test.h
index 7778871..cbb3d73 100644
--- a/runtime/gc/space/space_test.h
+++ b/runtime/gc/space/space_test.h
@@ -108,13 +108,10 @@
     EXPECT_GE(size, SizeOfZeroLengthByteArray());
     EXPECT_TRUE(byte_array_class != nullptr);
     o->SetClass(byte_array_class);
-    if (kUseBakerOrBrooksReadBarrier) {
+    if (kUseBakerReadBarrier) {
       // Like the proper heap object allocation, install and verify
-      // the correct read barrier pointer.
-      if (kUseBrooksReadBarrier) {
-        o->SetReadBarrierPointer(o);
-      }
-      o->AssertReadBarrierPointer();
+      // the correct read barrier state.
+      o->AssertReadBarrierState();
     }
     mirror::Array* arr = o->AsArray<kVerifyNone>();
     size_t header_size = SizeOfZeroLengthByteArray();
diff --git a/runtime/gc/system_weak.h b/runtime/gc/system_weak.h
index 3910a28..e5cddfc 100644
--- a/runtime/gc/system_weak.h
+++ b/runtime/gc/system_weak.h
@@ -30,7 +30,8 @@
 
   virtual void Allow() REQUIRES_SHARED(Locks::mutator_lock_) = 0;
   virtual void Disallow() REQUIRES_SHARED(Locks::mutator_lock_) = 0;
-  virtual void Broadcast() REQUIRES_SHARED(Locks::mutator_lock_) = 0;
+  // See Runtime::BroadcastForNewSystemWeaks for the broadcast_for_checkpoint definition.
+  virtual void Broadcast(bool broadcast_for_checkpoint) = 0;
 
   virtual void Sweep(IsMarkedVisitor* visitor) REQUIRES_SHARED(Locks::mutator_lock_) = 0;
 };
@@ -61,19 +62,27 @@
     allow_new_system_weak_ = false;
   }
 
-  void Broadcast() OVERRIDE
-      REQUIRES_SHARED(Locks::mutator_lock_)
+  void Broadcast(bool broadcast_for_checkpoint ATTRIBUTE_UNUSED) OVERRIDE
       REQUIRES(!allow_disallow_lock_) {
-    CHECK(kUseReadBarrier);
     MutexLock mu(Thread::Current(), allow_disallow_lock_);
     new_weak_condition_.Broadcast(Thread::Current());
   }
 
+  // WARNING: For lock annotations only.
+  Mutex* GetAllowDisallowLock() const RETURN_CAPABILITY(allow_disallow_lock_) {
+    return nullptr;
+  }
+
  protected:
-  void Wait(Thread* self) REQUIRES_SHARED(allow_disallow_lock_) {
+  void Wait(Thread* self)
+      REQUIRES_SHARED(Locks::mutator_lock_)
+      REQUIRES(allow_disallow_lock_) {
     // Wait for GC's sweeping to complete and allow new records
     while (UNLIKELY((!kUseReadBarrier && !allow_new_system_weak_) ||
                     (kUseReadBarrier && !self->GetWeakRefAccessEnabled()))) {
+      // Check and run the empty checkpoint before blocking so the empty checkpoint will work in the
+      // presence of threads blocking for weak ref access.
+      self->CheckEmptyCheckpoint();
       new_weak_condition_.WaitHoldingLocks(self);
     }
   }
diff --git a/runtime/gc/system_weak_test.cc b/runtime/gc/system_weak_test.cc
index af8a444..9b601c0 100644
--- a/runtime/gc/system_weak_test.cc
+++ b/runtime/gc/system_weak_test.cc
@@ -58,12 +58,14 @@
     disallow_count_++;
   }
 
-  void Broadcast() OVERRIDE
-      REQUIRES_SHARED(Locks::mutator_lock_)
+  void Broadcast(bool broadcast_for_checkpoint) OVERRIDE
       REQUIRES(!allow_disallow_lock_) {
-    SystemWeakHolder::Broadcast();
+    SystemWeakHolder::Broadcast(broadcast_for_checkpoint);
 
-    allow_count_++;
+    if (!broadcast_for_checkpoint) {
+      // Don't count the broadcasts for running checkpoints.
+      allow_count_++;
+    }
   }
 
   void Sweep(IsMarkedVisitor* visitor) OVERRIDE
diff --git a/runtime/generate-operator-out.py b/runtime/generate-operator-out.py
new file mode 120000
index 0000000..cc291d2
--- /dev/null
+++ b/runtime/generate-operator-out.py
@@ -0,0 +1 @@
+../tools/generate-operator-out.py
\ No newline at end of file
diff --git a/runtime/generated/asm_support_gen.h b/runtime/generated/asm_support_gen.h
index 03f5bf6..f13ff8c 100644
--- a/runtime/generated/asm_support_gen.h
+++ b/runtime/generated/asm_support_gen.h
@@ -52,6 +52,8 @@
 DEFINE_CHECK_EQ(static_cast<uint32_t>(MIRROR_CLASS_STATUS_INITIALIZED), (static_cast<uint32_t>((art::mirror::Class::kStatusInitialized))))
 #define ACCESS_FLAGS_CLASS_IS_FINALIZABLE 0x80000000
 DEFINE_CHECK_EQ(static_cast<uint32_t>(ACCESS_FLAGS_CLASS_IS_FINALIZABLE), (static_cast<uint32_t>((art::kAccClassIsFinalizable))))
+#define ACCESS_FLAGS_CLASS_IS_INTERFACE 0x200
+DEFINE_CHECK_EQ(static_cast<uint32_t>(ACCESS_FLAGS_CLASS_IS_INTERFACE), (static_cast<uint32_t>((art::kAccInterface))))
 #define ACCESS_FLAGS_CLASS_IS_FINALIZABLE_BIT 0x1f
 DEFINE_CHECK_EQ(static_cast<uint32_t>(ACCESS_FLAGS_CLASS_IS_FINALIZABLE_BIT), (static_cast<uint32_t>((art::MostSignificantBit(art::kAccClassIsFinalizable)))))
 #define ART_METHOD_DEX_CACHE_METHODS_OFFSET_32 20
@@ -96,6 +98,12 @@
 DEFINE_CHECK_EQ(static_cast<uint32_t>(LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED), (static_cast<uint32_t>(art::LockWord::kReadBarrierStateMaskShiftedToggled)))
 #define LOCK_WORD_THIN_LOCK_COUNT_ONE 65536
 DEFINE_CHECK_EQ(static_cast<int32_t>(LOCK_WORD_THIN_LOCK_COUNT_ONE), (static_cast<int32_t>(art::LockWord::kThinLockCountOne)))
+#define LOCK_WORD_STATE_FORWARDING_ADDRESS 0x3
+DEFINE_CHECK_EQ(static_cast<uint32_t>(LOCK_WORD_STATE_FORWARDING_ADDRESS), (static_cast<uint32_t>(art::LockWord::kStateForwardingAddress)))
+#define LOCK_WORD_STATE_FORWARDING_ADDRESS_OVERFLOW 0x40000000
+DEFINE_CHECK_EQ(static_cast<uint32_t>(LOCK_WORD_STATE_FORWARDING_ADDRESS_OVERFLOW), (static_cast<uint32_t>(art::LockWord::kStateForwardingAddressOverflow)))
+#define LOCK_WORD_STATE_FORWARDING_ADDRESS_SHIFT 0x3
+DEFINE_CHECK_EQ(static_cast<uint32_t>(LOCK_WORD_STATE_FORWARDING_ADDRESS_SHIFT), (static_cast<uint32_t>(art::LockWord::kForwardingAddressShift)))
 #define LOCK_WORD_GC_STATE_MASK_SHIFTED 0x30000000
 DEFINE_CHECK_EQ(static_cast<uint32_t>(LOCK_WORD_GC_STATE_MASK_SHIFTED), (static_cast<uint32_t>(art::LockWord::kGCStateMaskShifted)))
 #define LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED 0xcfffffff
@@ -134,6 +142,10 @@
 DEFINE_CHECK_EQ(static_cast<int32_t>(THREAD_SUSPEND_REQUEST), (static_cast<int32_t>((art::kSuspendRequest))))
 #define THREAD_CHECKPOINT_REQUEST 2
 DEFINE_CHECK_EQ(static_cast<int32_t>(THREAD_CHECKPOINT_REQUEST), (static_cast<int32_t>((art::kCheckpointRequest))))
+#define THREAD_EMPTY_CHECKPOINT_REQUEST 4
+DEFINE_CHECK_EQ(static_cast<int32_t>(THREAD_EMPTY_CHECKPOINT_REQUEST), (static_cast<int32_t>((art::kEmptyCheckpointRequest))))
+#define THREAD_SUSPEND_OR_CHECKPOINT_REQUEST 7
+DEFINE_CHECK_EQ(static_cast<int32_t>(THREAD_SUSPEND_OR_CHECKPOINT_REQUEST), (static_cast<int32_t>((art::kSuspendRequest | art::kCheckpointRequest | art::kEmptyCheckpointRequest))))
 #define JIT_CHECK_OSR (-1)
 DEFINE_CHECK_EQ(static_cast<int16_t>(JIT_CHECK_OSR), (static_cast<int16_t>((art::jit::kJitCheckForOSR))))
 #define JIT_HOTNESS_DISABLE (-2)
diff --git a/runtime/handle.h b/runtime/handle.h
index d33d4a6..e4b6d29 100644
--- a/runtime/handle.h
+++ b/runtime/handle.h
@@ -42,13 +42,9 @@
   Handle() : reference_(nullptr) {
   }
 
-  ALWAYS_INLINE Handle(const Handle<T>& handle) : reference_(handle.reference_) {
-  }
+  ALWAYS_INLINE Handle(const Handle<T>& handle) = default;
 
-  ALWAYS_INLINE Handle<T>& operator=(const Handle<T>& handle) {
-    reference_ = handle.reference_;
-    return *this;
-  }
+  ALWAYS_INLINE Handle<T>& operator=(const Handle<T>& handle) = default;
 
   ALWAYS_INLINE explicit Handle(StackReference<T>* reference) : reference_(reference) {
   }
@@ -65,6 +61,10 @@
     return down_cast<T*>(reference_->AsMirrorPtr());
   }
 
+  ALWAYS_INLINE bool IsNull() const REQUIRES_SHARED(Locks::mutator_lock_) {
+    return Get() == nullptr;
+  }
+
   ALWAYS_INLINE jobject ToJObject() const REQUIRES_SHARED(Locks::mutator_lock_) {
     if (UNLIKELY(reference_->AsMirrorPtr() == nullptr)) {
       // Special case so that we work with null handles.
@@ -109,15 +109,10 @@
   }
 
   ALWAYS_INLINE MutableHandle(const MutableHandle<T>& handle)
-      REQUIRES_SHARED(Locks::mutator_lock_)
-      : Handle<T>(handle.reference_) {
-  }
+      REQUIRES_SHARED(Locks::mutator_lock_) = default;
 
   ALWAYS_INLINE MutableHandle<T>& operator=(const MutableHandle<T>& handle)
-      REQUIRES_SHARED(Locks::mutator_lock_) {
-    Handle<T>::operator=(handle);
-    return *this;
-  }
+      REQUIRES_SHARED(Locks::mutator_lock_) = default;
 
   ALWAYS_INLINE explicit MutableHandle(StackReference<T>* reference)
       REQUIRES_SHARED(Locks::mutator_lock_)
diff --git a/runtime/handle_scope.h b/runtime/handle_scope.h
index 8a0aba6..adb7d8a 100644
--- a/runtime/handle_scope.h
+++ b/runtime/handle_scope.h
@@ -69,7 +69,7 @@
         number_of_references_(num_references) {}
 
   // Variable sized constructor.
-  BaseHandleScope(BaseHandleScope* link)
+  explicit BaseHandleScope(BaseHandleScope* link)
       : link_(link),
         number_of_references_(kNumReferencesVariableSized) {}
 
diff --git a/runtime/handle_scope_test.cc b/runtime/handle_scope_test.cc
index 92063c4..aab1d9c 100644
--- a/runtime/handle_scope_test.cc
+++ b/runtime/handle_scope_test.cc
@@ -14,15 +14,27 @@
  * limitations under the License.
  */
 
+#include <type_traits>
+
 #include "base/enums.h"
 #include "common_runtime_test.h"
 #include "gtest/gtest.h"
+#include "handle.h"
 #include "handle_scope-inl.h"
+#include "mirror/object.h"
 #include "scoped_thread_state_change-inl.h"
 #include "thread.h"
 
 namespace art {
 
+// Handles are value objects and should be trivially copyable.
+static_assert(std::is_trivially_copyable<Handle<mirror::Object>>::value,
+              "Handle should be trivially copyable");
+static_assert(std::is_trivially_copyable<MutableHandle<mirror::Object>>::value,
+              "MutableHandle should be trivially copyable");
+static_assert(std::is_trivially_copyable<ScopedNullHandle<mirror::Object>>::value,
+              "ScopedNullHandle should be trivially copyable");
+
 class HandleScopeTest : public CommonRuntimeTest {};
 
 // Test the offsets computed for members of HandleScope. Because of cross-compiling
diff --git a/runtime/hprof/hprof.cc b/runtime/hprof/hprof.cc
index 2336759..8cbe491 100644
--- a/runtime/hprof/hprof.cc
+++ b/runtime/hprof/hprof.cc
@@ -574,9 +574,9 @@
   }
 
   void WriteStringTable() {
-    for (const std::pair<std::string, HprofStringId>& p : strings_) {
+    for (const auto& p : strings_) {
       const std::string& string = p.first;
-      const size_t id = p.second;
+      const HprofStringId id = p.second;
 
       output_->StartNewRecord(HPROF_TAG_STRING, kHprofTime);
 
diff --git a/runtime/image.cc b/runtime/image.cc
index 299d5fd..52c9f4e 100644
--- a/runtime/image.cc
+++ b/runtime/image.cc
@@ -25,7 +25,7 @@
 namespace art {
 
 const uint8_t ImageHeader::kImageMagic[] = { 'a', 'r', 't', '\n' };
-const uint8_t ImageHeader::kImageVersion[] = { '0', '3', '1', '\0' };
+const uint8_t ImageHeader::kImageVersion[] = { '0', '3', '3', '\0' };
 
 ImageHeader::ImageHeader(uint32_t image_begin,
                          uint32_t image_size,
diff --git a/runtime/instrumentation.cc b/runtime/instrumentation.cc
index d4c322e..870d1ae 100644
--- a/runtime/instrumentation.cc
+++ b/runtime/instrumentation.cc
@@ -630,7 +630,7 @@
 }
 
 static void ResetQuickAllocEntryPointsForThread(Thread* thread, void* arg ATTRIBUTE_UNUSED) {
-  thread->ResetQuickAllocEntryPointsForThread();
+  thread->ResetQuickAllocEntryPointsForThread(kUseReadBarrier && thread->GetIsGcMarking());
 }
 
 void Instrumentation::SetEntrypointsInstrumented(bool instrumented) {
diff --git a/runtime/intern_table.cc b/runtime/intern_table.cc
index a61a187..9c05d3c 100644
--- a/runtime/intern_table.cc
+++ b/runtime/intern_table.cc
@@ -63,9 +63,9 @@
     strong_interns_.VisitRoots(visitor);
   } else if ((flags & kVisitRootFlagNewRoots) != 0) {
     for (auto& root : new_strong_intern_roots_) {
-      mirror::String* old_ref = root.Read<kWithoutReadBarrier>();
+      ObjPtr<mirror::String> old_ref = root.Read<kWithoutReadBarrier>();
       root.VisitRoot(visitor, RootInfo(kRootInternedString));
-      mirror::String* new_ref = root.Read<kWithoutReadBarrier>();
+      ObjPtr<mirror::String> new_ref = root.Read<kWithoutReadBarrier>();
       if (new_ref != old_ref) {
         // The GC moved a root in the log. Need to search the strong interns and update the
         // corresponding object. This is slow, but luckily for us, this may only happen with a
@@ -86,17 +86,17 @@
   // Note: we deliberately don't visit the weak_interns_ table and the immutable image roots.
 }
 
-mirror::String* InternTable::LookupWeak(Thread* self, mirror::String* s) {
+ObjPtr<mirror::String> InternTable::LookupWeak(Thread* self, ObjPtr<mirror::String> s) {
   MutexLock mu(self, *Locks::intern_table_lock_);
   return LookupWeakLocked(s);
 }
 
-mirror::String* InternTable::LookupStrong(Thread* self, mirror::String* s) {
+ObjPtr<mirror::String> InternTable::LookupStrong(Thread* self, ObjPtr<mirror::String> s) {
   MutexLock mu(self, *Locks::intern_table_lock_);
   return LookupStrongLocked(s);
 }
 
-mirror::String* InternTable::LookupStrong(Thread* self,
+ObjPtr<mirror::String> InternTable::LookupStrong(Thread* self,
                                           uint32_t utf16_length,
                                           const char* utf8_data) {
   DCHECK_EQ(utf16_length, CountModifiedUtf8Chars(utf8_data));
@@ -107,11 +107,11 @@
   return strong_interns_.Find(string);
 }
 
-mirror::String* InternTable::LookupWeakLocked(mirror::String* s) {
+ObjPtr<mirror::String> InternTable::LookupWeakLocked(ObjPtr<mirror::String> s) {
   return weak_interns_.Find(s);
 }
 
-mirror::String* InternTable::LookupStrongLocked(mirror::String* s) {
+ObjPtr<mirror::String> InternTable::LookupStrongLocked(ObjPtr<mirror::String> s) {
   return strong_interns_.Find(s);
 }
 
@@ -121,7 +121,7 @@
   strong_interns_.AddNewTable();
 }
 
-mirror::String* InternTable::InsertStrong(mirror::String* s) {
+ObjPtr<mirror::String> InternTable::InsertStrong(ObjPtr<mirror::String> s) {
   Runtime* runtime = Runtime::Current();
   if (runtime->IsActiveTransaction()) {
     runtime->RecordStrongStringInsertion(s);
@@ -133,7 +133,7 @@
   return s;
 }
 
-mirror::String* InternTable::InsertWeak(mirror::String* s) {
+ObjPtr<mirror::String> InternTable::InsertWeak(ObjPtr<mirror::String> s) {
   Runtime* runtime = Runtime::Current();
   if (runtime->IsActiveTransaction()) {
     runtime->RecordWeakStringInsertion(s);
@@ -142,11 +142,11 @@
   return s;
 }
 
-void InternTable::RemoveStrong(mirror::String* s) {
+void InternTable::RemoveStrong(ObjPtr<mirror::String> s) {
   strong_interns_.Remove(s);
 }
 
-void InternTable::RemoveWeak(mirror::String* s) {
+void InternTable::RemoveWeak(ObjPtr<mirror::String> s) {
   Runtime* runtime = Runtime::Current();
   if (runtime->IsActiveTransaction()) {
     runtime->RecordWeakStringRemoval(s);
@@ -155,19 +155,22 @@
 }
 
 // Insert/remove methods used to undo changes made during an aborted transaction.
-mirror::String* InternTable::InsertStrongFromTransaction(mirror::String* s) {
+ObjPtr<mirror::String> InternTable::InsertStrongFromTransaction(ObjPtr<mirror::String> s) {
   DCHECK(!Runtime::Current()->IsActiveTransaction());
   return InsertStrong(s);
 }
-mirror::String* InternTable::InsertWeakFromTransaction(mirror::String* s) {
+
+ObjPtr<mirror::String> InternTable::InsertWeakFromTransaction(ObjPtr<mirror::String> s) {
   DCHECK(!Runtime::Current()->IsActiveTransaction());
   return InsertWeak(s);
 }
-void InternTable::RemoveStrongFromTransaction(mirror::String* s) {
+
+void InternTable::RemoveStrongFromTransaction(ObjPtr<mirror::String> s) {
   DCHECK(!Runtime::Current()->IsActiveTransaction());
   RemoveStrong(s);
 }
-void InternTable::RemoveWeakFromTransaction(mirror::String* s) {
+
+void InternTable::RemoveWeakFromTransaction(ObjPtr<mirror::String> s) {
   DCHECK(!Runtime::Current()->IsActiveTransaction());
   RemoveWeak(s);
 }
@@ -185,7 +188,6 @@
 }
 
 void InternTable::BroadcastForNewInterns() {
-  CHECK(kUseReadBarrier);
   Thread* self = Thread::Current();
   MutexLock mu(self, *Locks::intern_table_lock_);
   weak_intern_condition_.Broadcast(self);
@@ -196,14 +198,17 @@
   {
     ScopedThreadSuspension sts(self, kWaitingWeakGcRootRead);
     MutexLock mu(self, *Locks::intern_table_lock_);
-    while (weak_root_state_ == gc::kWeakRootStateNoReadsOrWrites) {
+    while ((!kUseReadBarrier && weak_root_state_ == gc::kWeakRootStateNoReadsOrWrites) ||
+           (kUseReadBarrier && !self->GetWeakRefAccessEnabled())) {
       weak_intern_condition_.Wait(self);
     }
   }
   Locks::intern_table_lock_->ExclusiveLock(self);
 }
 
-mirror::String* InternTable::Insert(mirror::String* s, bool is_strong, bool holding_locks) {
+ObjPtr<mirror::String> InternTable::Insert(ObjPtr<mirror::String> s,
+                                           bool is_strong,
+                                           bool holding_locks) {
   if (s == nullptr) {
     return nullptr;
   }
@@ -222,7 +227,7 @@
       }
     }
     // Check the strong table for a match.
-    mirror::String* strong = LookupStrongLocked(s);
+    ObjPtr<mirror::String> strong = LookupStrongLocked(s);
     if (strong != nullptr) {
       return strong;
     }
@@ -244,7 +249,7 @@
     CHECK(self->GetWeakRefAccessEnabled());
   }
   // There is no match in the strong table, check the weak table.
-  mirror::String* weak = LookupWeakLocked(s);
+  ObjPtr<mirror::String> weak = LookupWeakLocked(s);
   if (weak != nullptr) {
     if (is_strong) {
       // A match was found in the weak table. Promote to the strong table.
@@ -257,11 +262,11 @@
   return is_strong ? InsertStrong(s) : InsertWeak(s);
 }
 
-mirror::String* InternTable::InternStrong(int32_t utf16_length, const char* utf8_data) {
+ObjPtr<mirror::String> InternTable::InternStrong(int32_t utf16_length, const char* utf8_data) {
   DCHECK(utf8_data != nullptr);
   Thread* self = Thread::Current();
   // Try to avoid allocation.
-  mirror::String* s = LookupStrong(self, utf16_length, utf8_data);
+  ObjPtr<mirror::String> s = LookupStrong(self, utf16_length, utf8_data);
   if (s != nullptr) {
     return s;
   }
@@ -269,25 +274,25 @@
       self, utf16_length, utf8_data));
 }
 
-mirror::String* InternTable::InternStrong(const char* utf8_data) {
+ObjPtr<mirror::String> InternTable::InternStrong(const char* utf8_data) {
   DCHECK(utf8_data != nullptr);
   return InternStrong(mirror::String::AllocFromModifiedUtf8(Thread::Current(), utf8_data));
 }
 
-mirror::String* InternTable::InternStrongImageString(mirror::String* s) {
+ObjPtr<mirror::String> InternTable::InternStrongImageString(ObjPtr<mirror::String> s) {
   // May be holding the heap bitmap lock.
   return Insert(s, true, true);
 }
 
-mirror::String* InternTable::InternStrong(mirror::String* s) {
+ObjPtr<mirror::String> InternTable::InternStrong(ObjPtr<mirror::String> s) {
   return Insert(s, true, false);
 }
 
-mirror::String* InternTable::InternWeak(mirror::String* s) {
+ObjPtr<mirror::String> InternTable::InternWeak(ObjPtr<mirror::String> s) {
   return Insert(s, false, false);
 }
 
-bool InternTable::ContainsWeak(mirror::String* s) {
+bool InternTable::ContainsWeak(ObjPtr<mirror::String> s) {
   return LookupWeak(Thread::Current(), s) == s;
 }
 
@@ -314,7 +319,7 @@
   if (kIsDebugBuild) {
     Locks::mutator_lock_->AssertSharedHeld(Thread::Current());
   }
-  return static_cast<size_t>(root.Read()->GetHashCode());
+  return static_cast<size_t>(root.Read<kWithoutReadBarrier>()->GetHashCode());
 }
 
 bool InternTable::StringHashEquals::operator()(const GcRoot<mirror::String>& a,
@@ -322,7 +327,7 @@
   if (kIsDebugBuild) {
     Locks::mutator_lock_->AssertSharedHeld(Thread::Current());
   }
-  return a.Read()->Equals(b.Read());
+  return a.Read<kWithoutReadBarrier>()->Equals(b.Read<kWithoutReadBarrier>());
 }
 
 bool InternTable::StringHashEquals::operator()(const GcRoot<mirror::String>& a,
@@ -330,7 +335,7 @@
   if (kIsDebugBuild) {
     Locks::mutator_lock_->AssertSharedHeld(Thread::Current());
   }
-  mirror::String* a_string = a.Read();
+  ObjPtr<mirror::String> a_string = a.Read<kWithoutReadBarrier>();
   uint32_t a_length = static_cast<uint32_t>(a_string->GetLength());
   if (a_length != b.GetUtf16Length()) {
     return false;
@@ -392,7 +397,7 @@
   return table_to_write->WriteToMemory(ptr);
 }
 
-void InternTable::Table::Remove(mirror::String* s) {
+void InternTable::Table::Remove(ObjPtr<mirror::String> s) {
   for (UnorderedSet& table : tables_) {
     auto it = table.Find(GcRoot<mirror::String>(s));
     if (it != table.end()) {
@@ -403,7 +408,7 @@
   LOG(FATAL) << "Attempting to remove non-interned string " << s->ToModifiedUtf8();
 }
 
-mirror::String* InternTable::Table::Find(mirror::String* s) {
+ObjPtr<mirror::String> InternTable::Table::Find(ObjPtr<mirror::String> s) {
   Locks::intern_table_lock_->AssertHeld(Thread::Current());
   for (UnorderedSet& table : tables_) {
     auto it = table.Find(GcRoot<mirror::String>(s));
@@ -414,7 +419,7 @@
   return nullptr;
 }
 
-mirror::String* InternTable::Table::Find(const Utf8String& string) {
+ObjPtr<mirror::String> InternTable::Table::Find(const Utf8String& string) {
   Locks::intern_table_lock_->AssertHeld(Thread::Current());
   for (UnorderedSet& table : tables_) {
     auto it = table.Find(string);
@@ -429,7 +434,7 @@
   tables_.push_back(UnorderedSet());
 }
 
-void InternTable::Table::Insert(mirror::String* s) {
+void InternTable::Table::Insert(ObjPtr<mirror::String> s) {
   // Always insert the last table, the image tables are before and we avoid inserting into these
   // to prevent dirty pages.
   DCHECK(!tables_.empty());
diff --git a/runtime/intern_table.h b/runtime/intern_table.h
index 30ff55d..f661d9f 100644
--- a/runtime/intern_table.h
+++ b/runtime/intern_table.h
@@ -57,43 +57,44 @@
   InternTable();
 
   // Interns a potentially new string in the 'strong' table. May cause thread suspension.
-  mirror::String* InternStrong(int32_t utf16_length, const char* utf8_data)
+  ObjPtr<mirror::String> InternStrong(int32_t utf16_length, const char* utf8_data)
       REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(!Roles::uninterruptible_);
 
   // Only used by image writer. Special version that may not cause thread suspension since the GC
   // cannot be running while we are doing image writing. Maybe be called while while holding a
   // lock since there will not be thread suspension.
-  mirror::String* InternStrongImageString(mirror::String* s)
+  ObjPtr<mirror::String> InternStrongImageString(ObjPtr<mirror::String> s)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Interns a potentially new string in the 'strong' table. May cause thread suspension.
-  mirror::String* InternStrong(const char* utf8_data) REQUIRES_SHARED(Locks::mutator_lock_)
+  ObjPtr<mirror::String> InternStrong(const char* utf8_data) REQUIRES_SHARED(Locks::mutator_lock_)
       REQUIRES(!Roles::uninterruptible_);
 
   // Interns a potentially new string in the 'strong' table. May cause thread suspension.
-  mirror::String* InternStrong(mirror::String* s) REQUIRES_SHARED(Locks::mutator_lock_)
+  ObjPtr<mirror::String> InternStrong(ObjPtr<mirror::String> s)
+      REQUIRES_SHARED(Locks::mutator_lock_)
       REQUIRES(!Roles::uninterruptible_);
 
   // Interns a potentially new string in the 'weak' table. May cause thread suspension.
-  mirror::String* InternWeak(mirror::String* s) REQUIRES_SHARED(Locks::mutator_lock_)
+  ObjPtr<mirror::String> InternWeak(ObjPtr<mirror::String> s) REQUIRES_SHARED(Locks::mutator_lock_)
       REQUIRES(!Roles::uninterruptible_);
 
   void SweepInternTableWeaks(IsMarkedVisitor* visitor) REQUIRES_SHARED(Locks::mutator_lock_)
       REQUIRES(!Locks::intern_table_lock_);
 
-  bool ContainsWeak(mirror::String* s) REQUIRES_SHARED(Locks::mutator_lock_)
+  bool ContainsWeak(ObjPtr<mirror::String> s) REQUIRES_SHARED(Locks::mutator_lock_)
       REQUIRES(!Locks::intern_table_lock_);
 
   // Lookup a strong intern, returns null if not found.
-  mirror::String* LookupStrong(Thread* self, mirror::String* s)
+  ObjPtr<mirror::String> LookupStrong(Thread* self, ObjPtr<mirror::String> s)
       REQUIRES(!Locks::intern_table_lock_)
       REQUIRES_SHARED(Locks::mutator_lock_);
-  mirror::String* LookupStrong(Thread* self, uint32_t utf16_length, const char* utf8_data)
+  ObjPtr<mirror::String> LookupStrong(Thread* self, uint32_t utf16_length, const char* utf8_data)
       REQUIRES(!Locks::intern_table_lock_)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Lookup a weak intern, returns null if not found.
-  mirror::String* LookupWeak(Thread* self, mirror::String* s)
+  ObjPtr<mirror::String> LookupWeak(Thread* self, ObjPtr<mirror::String> s)
       REQUIRES(!Locks::intern_table_lock_)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
@@ -111,7 +112,7 @@
 
   void DumpForSigQuit(std::ostream& os) const REQUIRES(!Locks::intern_table_lock_);
 
-  void BroadcastForNewInterns() REQUIRES_SHARED(Locks::mutator_lock_);
+  void BroadcastForNewInterns();
 
   // Adds all of the resolved image strings from the image spaces into the intern table. The
   // advantage of doing this is preventing expensive DexFile::FindStringId calls. Sets
@@ -181,13 +182,13 @@
   class Table {
    public:
     Table();
-    mirror::String* Find(mirror::String* s) REQUIRES_SHARED(Locks::mutator_lock_)
+    ObjPtr<mirror::String> Find(ObjPtr<mirror::String> s) REQUIRES_SHARED(Locks::mutator_lock_)
         REQUIRES(Locks::intern_table_lock_);
-    mirror::String* Find(const Utf8String& string) REQUIRES_SHARED(Locks::mutator_lock_)
+    ObjPtr<mirror::String> Find(const Utf8String& string) REQUIRES_SHARED(Locks::mutator_lock_)
         REQUIRES(Locks::intern_table_lock_);
-    void Insert(mirror::String* s) REQUIRES_SHARED(Locks::mutator_lock_)
+    void Insert(ObjPtr<mirror::String> s) REQUIRES_SHARED(Locks::mutator_lock_)
         REQUIRES(Locks::intern_table_lock_);
-    void Remove(mirror::String* s)
+    void Remove(ObjPtr<mirror::String> s)
         REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(Locks::intern_table_lock_);
     void VisitRoots(RootVisitor* visitor)
         REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(Locks::intern_table_lock_);
@@ -221,30 +222,30 @@
   // Insert if non null, otherwise return null. Must be called holding the mutator lock.
   // If holding_locks is true, then we may also hold other locks. If holding_locks is true, then we
   // require GC is not running since it is not safe to wait while holding locks.
-  mirror::String* Insert(mirror::String* s, bool is_strong, bool holding_locks)
+  ObjPtr<mirror::String> Insert(ObjPtr<mirror::String> s, bool is_strong, bool holding_locks)
       REQUIRES(!Locks::intern_table_lock_) REQUIRES_SHARED(Locks::mutator_lock_);
 
-  mirror::String* LookupStrongLocked(mirror::String* s)
+  ObjPtr<mirror::String> LookupStrongLocked(ObjPtr<mirror::String> s)
       REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(Locks::intern_table_lock_);
-  mirror::String* LookupWeakLocked(mirror::String* s)
+  ObjPtr<mirror::String> LookupWeakLocked(ObjPtr<mirror::String> s)
       REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(Locks::intern_table_lock_);
-  mirror::String* InsertStrong(mirror::String* s)
+  ObjPtr<mirror::String> InsertStrong(ObjPtr<mirror::String> s)
       REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(Locks::intern_table_lock_);
-  mirror::String* InsertWeak(mirror::String* s)
+  ObjPtr<mirror::String> InsertWeak(ObjPtr<mirror::String> s)
       REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(Locks::intern_table_lock_);
-  void RemoveStrong(mirror::String* s)
+  void RemoveStrong(ObjPtr<mirror::String> s)
       REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(Locks::intern_table_lock_);
-  void RemoveWeak(mirror::String* s)
+  void RemoveWeak(ObjPtr<mirror::String> s)
       REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(Locks::intern_table_lock_);
 
   // Transaction rollback access.
-  mirror::String* InsertStrongFromTransaction(mirror::String* s)
+  ObjPtr<mirror::String> InsertStrongFromTransaction(ObjPtr<mirror::String> s)
       REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(Locks::intern_table_lock_);
-  mirror::String* InsertWeakFromTransaction(mirror::String* s)
+  ObjPtr<mirror::String> InsertWeakFromTransaction(ObjPtr<mirror::String> s)
       REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(Locks::intern_table_lock_);
-  void RemoveStrongFromTransaction(mirror::String* s)
+  void RemoveStrongFromTransaction(ObjPtr<mirror::String> s)
       REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(Locks::intern_table_lock_);
-  void RemoveWeakFromTransaction(mirror::String* s)
+  void RemoveWeakFromTransaction(ObjPtr<mirror::String> s)
       REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(Locks::intern_table_lock_);
 
   size_t AddTableFromMemoryLocked(const uint8_t* ptr)
diff --git a/runtime/intern_table_test.cc b/runtime/intern_table_test.cc
index 74cec57..b91d946 100644
--- a/runtime/intern_table_test.cc
+++ b/runtime/intern_table_test.cc
@@ -193,22 +193,22 @@
   ASSERT_NE(foo.Get(), bar.Get());
   ASSERT_NE(foo.Get(), foobar.Get());
   ASSERT_NE(bar.Get(), foobar.Get());
-  mirror::String* lookup_foo = intern_table.LookupStrong(soa.Self(), 3, "foo");
-  EXPECT_EQ(lookup_foo, foo.Get());
-  mirror::String* lookup_bar = intern_table.LookupStrong(soa.Self(), 3, "bar");
-  EXPECT_EQ(lookup_bar, bar.Get());
-  mirror::String* lookup_foobar = intern_table.LookupStrong(soa.Self(), 6, "foobar");
-  EXPECT_EQ(lookup_foobar, foobar.Get());
-  mirror::String* lookup_foox = intern_table.LookupStrong(soa.Self(), 4, "foox");
+  ObjPtr<mirror::String> lookup_foo = intern_table.LookupStrong(soa.Self(), 3, "foo");
+  EXPECT_OBJ_PTR_EQ(lookup_foo, foo.Get());
+  ObjPtr<mirror::String> lookup_bar = intern_table.LookupStrong(soa.Self(), 3, "bar");
+  EXPECT_OBJ_PTR_EQ(lookup_bar, bar.Get());
+  ObjPtr<mirror::String> lookup_foobar = intern_table.LookupStrong(soa.Self(), 6, "foobar");
+  EXPECT_OBJ_PTR_EQ(lookup_foobar, foobar.Get());
+  ObjPtr<mirror::String> lookup_foox = intern_table.LookupStrong(soa.Self(), 4, "foox");
   EXPECT_TRUE(lookup_foox == nullptr);
-  mirror::String* lookup_fooba = intern_table.LookupStrong(soa.Self(), 5, "fooba");
+  ObjPtr<mirror::String> lookup_fooba = intern_table.LookupStrong(soa.Self(), 5, "fooba");
   EXPECT_TRUE(lookup_fooba == nullptr);
-  mirror::String* lookup_foobaR = intern_table.LookupStrong(soa.Self(), 6, "foobaR");
+  ObjPtr<mirror::String> lookup_foobaR = intern_table.LookupStrong(soa.Self(), 6, "foobaR");
   EXPECT_TRUE(lookup_foobaR == nullptr);
   // Try a hash conflict.
   ASSERT_EQ(ComputeUtf16HashFromModifiedUtf8("foobar", 6),
             ComputeUtf16HashFromModifiedUtf8("foobbS", 6));
-  mirror::String* lookup_foobbS = intern_table.LookupStrong(soa.Self(), 6, "foobbS");
+  ObjPtr<mirror::String> lookup_foobbS = intern_table.LookupStrong(soa.Self(), 6, "foobbS");
   EXPECT_TRUE(lookup_foobbS == nullptr);
 }
 
diff --git a/runtime/interpreter/interpreter.cc b/runtime/interpreter/interpreter.cc
index a32c800..1b3d339 100644
--- a/runtime/interpreter/interpreter.cc
+++ b/runtime/interpreter/interpreter.cc
@@ -543,7 +543,7 @@
           ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
           // This is a suspend point. But it's ok since value has been set into shadow_frame.
           ObjPtr<mirror::Class> klass = class_linker->ResolveType(
-              instr->VRegB_21c(), shadow_frame->GetMethod());
+              dex::TypeIndex(instr->VRegB_21c()), shadow_frame->GetMethod());
           DCHECK(klass->IsStringClass());
         }
       } else {
diff --git a/runtime/interpreter/interpreter_common.cc b/runtime/interpreter/interpreter_common.cc
index 179e48b..22da07d 100644
--- a/runtime/interpreter/interpreter_common.cc
+++ b/runtime/interpreter/interpreter_common.cc
@@ -48,7 +48,8 @@
                                            const ShadowFrame& shadow_frame,
                                            ObjPtr<mirror::Object>& obj,
                                            ArtField* field,
-                                           JValue* result) REQUIRES_SHARED(Locks::mutator_lock_) {
+                                           JValue* result)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
   field->GetDeclaringClass()->AssertInitializedOrInitializingInThread(self);
 
   // Report this field access to instrumentation if needed.
@@ -299,6 +300,42 @@
 EXPLICIT_DO_IGET_QUICK_TEMPLATE_DECL(Primitive::kPrimNot);      // iget-object-quick.
 #undef EXPLICIT_DO_IGET_QUICK_TEMPLATE_DECL
 
+static JValue GetFieldValue(const ShadowFrame& shadow_frame,
+                            Primitive::Type field_type,
+                            uint32_t vreg)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  JValue field_value;
+  switch (field_type) {
+    case Primitive::kPrimBoolean:
+      field_value.SetZ(static_cast<uint8_t>(shadow_frame.GetVReg(vreg)));
+      break;
+    case Primitive::kPrimByte:
+      field_value.SetB(static_cast<int8_t>(shadow_frame.GetVReg(vreg)));
+      break;
+    case Primitive::kPrimChar:
+      field_value.SetC(static_cast<uint16_t>(shadow_frame.GetVReg(vreg)));
+      break;
+    case Primitive::kPrimShort:
+      field_value.SetS(static_cast<int16_t>(shadow_frame.GetVReg(vreg)));
+      break;
+    case Primitive::kPrimInt:
+    case Primitive::kPrimFloat:
+      field_value.SetI(shadow_frame.GetVReg(vreg));
+      break;
+    case Primitive::kPrimLong:
+    case Primitive::kPrimDouble:
+      field_value.SetJ(shadow_frame.GetVRegLong(vreg));
+      break;
+    case Primitive::kPrimNot:
+      field_value.SetL(shadow_frame.GetVRegReference(vreg));
+      break;
+    case Primitive::kPrimVoid:
+      LOG(FATAL) << "Unreachable: " << field_type;
+      UNREACHABLE();
+  }
+  return field_value;
+}
+
 template<Primitive::Type field_type>
 static JValue GetFieldValue(const ShadowFrame& shadow_frame, uint32_t vreg)
     REQUIRES_SHARED(Locks::mutator_lock_) {
@@ -337,7 +374,8 @@
                                     const ShadowFrame& shadow_frame,
                                     ObjPtr<mirror::Object>& obj,
                                     ArtField* f,
-                                    size_t vregA) REQUIRES_SHARED(Locks::mutator_lock_) {
+                                    const JValue& value)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
   f->GetDeclaringClass()->AssertInitializedOrInitializingInThread(self);
 
   // Report this field access to instrumentation if needed. Since we only have the offset of
@@ -347,36 +385,35 @@
     StackHandleScope<1> hs(self);
     // Wrap in handle wrapper in case the listener does thread suspension.
     HandleWrapperObjPtr<mirror::Object> h(hs.NewHandleWrapper(&obj));
-    JValue field_value = GetFieldValue<field_type>(shadow_frame, vregA);
     ObjPtr<mirror::Object> this_object = f->IsStatic() ? nullptr : obj;
     instrumentation->FieldWriteEvent(self, this_object.Ptr(),
                                      shadow_frame.GetMethod(),
                                      shadow_frame.GetDexPC(),
                                      f,
-                                     field_value);
+                                     value);
   }
 
   switch (field_type) {
     case Primitive::kPrimBoolean:
-      f->SetBoolean<transaction_active>(obj, shadow_frame.GetVReg(vregA));
+      f->SetBoolean<transaction_active>(obj, value.GetZ());
       break;
     case Primitive::kPrimByte:
-      f->SetByte<transaction_active>(obj, shadow_frame.GetVReg(vregA));
+      f->SetByte<transaction_active>(obj, value.GetB());
       break;
     case Primitive::kPrimChar:
-      f->SetChar<transaction_active>(obj, shadow_frame.GetVReg(vregA));
+      f->SetChar<transaction_active>(obj, value.GetC());
       break;
     case Primitive::kPrimShort:
-      f->SetShort<transaction_active>(obj, shadow_frame.GetVReg(vregA));
+      f->SetShort<transaction_active>(obj, value.GetS());
       break;
     case Primitive::kPrimInt:
-      f->SetInt<transaction_active>(obj, shadow_frame.GetVReg(vregA));
+      f->SetInt<transaction_active>(obj, value.GetI());
       break;
     case Primitive::kPrimLong:
-      f->SetLong<transaction_active>(obj, shadow_frame.GetVRegLong(vregA));
+      f->SetLong<transaction_active>(obj, value.GetJ());
       break;
     case Primitive::kPrimNot: {
-      ObjPtr<mirror::Object> reg = shadow_frame.GetVRegReference(vregA);
+      ObjPtr<mirror::Object> reg = value.GetL();
       if (do_assignability_check && reg != nullptr) {
         // FieldHelper::GetType can resolve classes, use a handle wrapper which will restore the
         // object in the destructor.
@@ -390,7 +427,7 @@
         if (!reg->VerifierInstanceOf(field_class.Ptr())) {
           // This should never happen.
           std::string temp1, temp2, temp3;
-          self->ThrowNewExceptionF("Ljava/lang/VirtualMachineError;",
+          self->ThrowNewExceptionF("Ljava/lang/InternalError;",
                                    "Put '%s' that is not instance of field '%s' in '%s'",
                                    reg->GetClass()->GetDescriptor(&temp1),
                                    field_class->GetDescriptor(&temp2),
@@ -434,11 +471,12 @@
   }
 
   uint32_t vregA = is_static ? inst->VRegA_21c(inst_data) : inst->VRegA_22c(inst_data);
+  JValue value = GetFieldValue<field_type>(shadow_frame, vregA);
   return DoFieldPutCommon<field_type, do_assignability_check, transaction_active>(self,
                                                                                   shadow_frame,
                                                                                   obj,
                                                                                   f,
-                                                                                  vregA);
+                                                                                  value);
 }
 
 // Explicitly instantiate all DoFieldPut functions.
@@ -479,37 +517,34 @@
                                     ObjPtr<mirror::Object>& obj,
                                     ArtField* field,
                                     Primitive::Type field_type,
-                                    size_t vregA) REQUIRES_SHARED(Locks::mutator_lock_) {
+                                    const JValue& value)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
   static const bool kDoCheckAssignability = false;
   static const bool kTransaction = false;
   switch (field_type) {
     case Primitive::kPrimBoolean:
       return DoFieldPutCommon<Primitive::kPrimBoolean, kDoCheckAssignability, kTransaction>(
-          self, shadow_frame, obj, field, vregA);
+          self, shadow_frame, obj, field, value);
     case Primitive::kPrimByte:
       return DoFieldPutCommon<Primitive::kPrimByte, kDoCheckAssignability, kTransaction>(
-          self, shadow_frame, obj, field, vregA);
+          self, shadow_frame, obj, field, value);
     case Primitive::kPrimChar:
       return DoFieldPutCommon<Primitive::kPrimChar, kDoCheckAssignability, kTransaction>(
-          self, shadow_frame, obj, field, vregA);
+          self, shadow_frame, obj, field, value);
     case Primitive::kPrimShort:
       return DoFieldPutCommon<Primitive::kPrimShort, kDoCheckAssignability, kTransaction>(
-          self, shadow_frame, obj, field, vregA);
+          self, shadow_frame, obj, field, value);
     case Primitive::kPrimInt:
-      return DoFieldPutCommon<Primitive::kPrimInt, kDoCheckAssignability, kTransaction>(
-          self, shadow_frame, obj, field, vregA);
-    case Primitive::kPrimLong:
-      return DoFieldPutCommon<Primitive::kPrimLong, kDoCheckAssignability, kTransaction>(
-          self, shadow_frame, obj, field, vregA);
     case Primitive::kPrimFloat:
       return DoFieldPutCommon<Primitive::kPrimInt, kDoCheckAssignability, kTransaction>(
-          self, shadow_frame, obj, field, vregA);
+          self, shadow_frame, obj, field, value);
+    case Primitive::kPrimLong:
     case Primitive::kPrimDouble:
       return DoFieldPutCommon<Primitive::kPrimLong, kDoCheckAssignability, kTransaction>(
-          self, shadow_frame, obj, field, vregA);
+          self, shadow_frame, obj, field, value);
     case Primitive::kPrimNot:
       return DoFieldPutCommon<Primitive::kPrimNot, kDoCheckAssignability, kTransaction>(
-          self, shadow_frame, obj, field, vregA);
+          self, shadow_frame, obj, field, value);
     case Primitive::kPrimVoid:
       LOG(FATAL) << "Unreachable: " << field_type;
       UNREACHABLE();
@@ -653,53 +688,51 @@
 //
 
 template <bool is_range, bool do_assignability_check>
-    REQUIRES_SHARED(Locks::mutator_lock_)
-static inline bool DoCallCommon(ArtMethod* called_method,
-                                Thread* self,
-                                ShadowFrame& shadow_frame,
-                                JValue* result,
-                                uint16_t number_of_inputs,
-                                uint32_t (&arg)[Instruction::kMaxVarArgRegs],
-                                uint32_t vregC) ALWAYS_INLINE;
-
-template <bool is_range> REQUIRES_SHARED(Locks::mutator_lock_)
-static inline bool DoCallPolymorphic(ArtMethod* called_method,
-                                     Handle<mirror::MethodType> callsite_type,
-                                     Handle<mirror::MethodType> target_type,
-                                     Thread* self,
-                                     ShadowFrame& shadow_frame,
-                                     JValue* result,
-                                     uint32_t (&arg)[Instruction::kMaxVarArgRegs],
-                                     uint32_t vregC,
-                                     const MethodHandleKind handle_kind) ALWAYS_INLINE;
-
-template <bool is_range> REQUIRES_SHARED(Locks::mutator_lock_)
-static inline bool DoCallTransform(ArtMethod* called_method,
-                                   Handle<mirror::MethodType> callsite_type,
-                                   Handle<mirror::MethodType> callee_type,
-                                   Thread* self,
-                                   ShadowFrame& shadow_frame,
-                                   Handle<mirror::MethodHandleImpl> receiver,
-                                   JValue* result,
-                                   uint32_t (&arg)[Instruction::kMaxVarArgRegs],
-                                   uint32_t vregC) ALWAYS_INLINE;
-
-REQUIRES_SHARED(Locks::mutator_lock_)
-inline void PerformCall(Thread* self,
-                        const DexFile::CodeItem* code_item,
-                        ArtMethod* caller_method,
-                        const size_t first_dest_reg,
-                        ShadowFrame* callee_frame,
-                        JValue* result) ALWAYS_INLINE;
+static ALWAYS_INLINE bool DoCallCommon(ArtMethod* called_method,
+                                       Thread* self,
+                                       ShadowFrame& shadow_frame,
+                                       JValue* result,
+                                       uint16_t number_of_inputs,
+                                       uint32_t (&arg)[Instruction::kMaxVarArgRegs],
+                                       uint32_t vregC) REQUIRES_SHARED(Locks::mutator_lock_);
 
 template <bool is_range>
-REQUIRES_SHARED(Locks::mutator_lock_)
-inline void CopyRegisters(ShadowFrame& caller_frame,
-                          ShadowFrame* callee_frame,
-                          const uint32_t (&arg)[Instruction::kMaxVarArgRegs],
-                          const size_t first_src_reg,
-                          const size_t first_dest_reg,
-                          const size_t num_regs) ALWAYS_INLINE;
+static ALWAYS_INLINE bool DoCallPolymorphic(ArtMethod* called_method,
+                                            Handle<mirror::MethodType> callsite_type,
+                                            Handle<mirror::MethodType> target_type,
+                                            Thread* self,
+                                            ShadowFrame& shadow_frame,
+                                            JValue* result,
+                                            uint32_t (&arg)[Instruction::kMaxVarArgRegs],
+                                            uint32_t vregC,
+                                            const MethodHandleKind handle_kind)
+  REQUIRES_SHARED(Locks::mutator_lock_);
+
+template <bool is_range>
+static ALWAYS_INLINE bool DoCallTransform(ArtMethod* called_method,
+                                          Handle<mirror::MethodType> callsite_type,
+                                          Handle<mirror::MethodType> callee_type,
+                                          Thread* self,
+                                          ShadowFrame& shadow_frame,
+                                          Handle<mirror::MethodHandleImpl> receiver,
+                                          JValue* result,
+                                          uint32_t (&arg)[Instruction::kMaxVarArgRegs],
+                                          uint32_t vregC) REQUIRES_SHARED(Locks::mutator_lock_);
+
+ALWAYS_INLINE void PerformCall(Thread* self,
+                               const DexFile::CodeItem* code_item,
+                               ArtMethod* caller_method,
+                               const size_t first_dest_reg,
+                               ShadowFrame* callee_frame,
+                               JValue* result) REQUIRES_SHARED(Locks::mutator_lock_);
+
+template <bool is_range>
+ALWAYS_INLINE void CopyRegisters(ShadowFrame& caller_frame,
+                                 ShadowFrame* callee_frame,
+                                 const uint32_t (&arg)[Instruction::kMaxVarArgRegs],
+                                 const size_t first_src_reg,
+                                 const size_t first_dest_reg,
+                                 const size_t num_regs) REQUIRES_SHARED(Locks::mutator_lock_);
 
 // END DECLARATIONS.
 
@@ -776,16 +809,54 @@
   return is_invoke_exact;
 }
 
+inline static ObjPtr<mirror::Class> GetAndInitializeDeclaringClass(Thread* self, ArtField* field)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  // Method handle invocations on static fields should ensure class is
+  // initialized. This usually happens when an instance is constructed
+  // or class members referenced, but this is not guaranteed when
+  // looking up method handles.
+  ObjPtr<mirror::Class> klass = field->GetDeclaringClass();
+  if (UNLIKELY(!klass->IsInitialized())) {
+    StackHandleScope<1> hs(self);
+    HandleWrapperObjPtr<mirror::Class> h(hs.NewHandleWrapper(&klass));
+    if (!Runtime::Current()->GetClassLinker()->EnsureInitialized(self, h, true, true)) {
+      DCHECK(self->IsExceptionPending());
+      return nullptr;
+    }
+  }
+  return klass;
+}
+
+// Returns true iff. the callsite type for a polymorphic invoke is transformer
+// like, i.e that it has a single input argument whose type is
+// dalvik.system.EmulatedStackFrame.
+static inline bool IsCallerTransformer(Handle<mirror::MethodType> callsite_type)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  ObjPtr<mirror::ObjectArray<mirror::Class>> param_types(callsite_type->GetPTypes());
+  if (param_types->GetLength() == 1) {
+    ObjPtr<mirror::Class> param(param_types->GetWithoutChecks(0));
+    return param == WellKnownClasses::ToClass(WellKnownClasses::dalvik_system_EmulatedStackFrame);
+  }
+
+  return false;
+}
+
 template<bool is_range, bool do_access_check>
 inline bool DoInvokePolymorphic(Thread* self,
                                 ShadowFrame& shadow_frame,
                                 const Instruction* inst,
                                 uint16_t inst_data,
-                                JValue* result) REQUIRES_SHARED(Locks::mutator_lock_) {
+                                JValue* result)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
   // Invoke-polymorphic instructions always take a receiver. i.e, they are never static.
   const uint32_t vRegC = (is_range) ? inst->VRegC_4rcc() : inst->VRegC_45cc();
   const int invoke_method_idx = (is_range) ? inst->VRegB_4rcc() : inst->VRegB_45cc();
 
+  // Initialize |result| to 0 as this is the default return value for
+  // polymorphic invocations of method handle types with void return
+  // and provides sane return result in error cases.
+  result->SetJ(0);
+
   // Determine if this invocation is MethodHandle.invoke() or
   // MethodHandle.invokeExact().
   bool is_invoke_exact = IsInvokeExact(shadow_frame.GetMethod()->GetDeclaringClass()->GetDexFile(),
@@ -794,11 +865,6 @@
   // The invoke_method_idx here is the name of the signature polymorphic method that
   // was symbolically invoked in bytecode (say MethodHandle.invoke or MethodHandle.invokeExact)
   // and not the method that we'll dispatch to in the end.
-  //
-  // TODO(narayan) We'll have to check in the verifier that this is in fact a
-  // signature polymorphic method so that we disallow calls via invoke-polymorphic
-  // to non sig-poly methods. This would also have the side effect of verifying
-  // that vRegC really is a reference type.
   StackHandleScope<6> hs(self);
   Handle<mirror::MethodHandleImpl> method_handle(hs.NewHandle(
       ObjPtr<mirror::MethodHandleImpl>::DownCast(
@@ -807,7 +873,6 @@
     // Note that the invoke type is kVirtual here because a call to a signature
     // polymorphic method is shaped like a virtual call at the bytecode level.
     ThrowNullPointerExceptionForMethodAccess(invoke_method_idx, InvokeType::kVirtual);
-    result->SetJ(0);
     return false;
   }
 
@@ -828,16 +893,37 @@
   // This implies we couldn't resolve one or more types in this method handle.
   if (UNLIKELY(callsite_type.Get() == nullptr)) {
     CHECK(self->IsExceptionPending());
-    result->SetJ(0);
     return false;
   }
 
   const MethodHandleKind handle_kind = method_handle->GetHandleKind();
   Handle<mirror::MethodType> handle_type(hs.NewHandle(method_handle->GetMethodType()));
   CHECK(handle_type.Get() != nullptr);
-  if (UNLIKELY(is_invoke_exact && !callsite_type->IsExactMatch(handle_type.Get()))) {
-    ThrowWrongMethodTypeException(callsite_type.Get(), handle_type.Get());
-    return false;
+  {
+    // We need to check the nominal type of the handle in addition to the
+    // real type. The "nominal" type is present when MethodHandle.asType is
+    // called any handle, and results in the declared type of the handle
+    // changing.
+    ObjPtr<mirror::MethodType> nominal_type(method_handle->GetNominalType());
+    ObjPtr<mirror::MethodType> check_type(nullptr);
+    if (LIKELY(nominal_type.Ptr() == nullptr)) {
+      check_type.Assign(handle_type.Get());
+    } else {
+      check_type.Assign(nominal_type.Ptr());
+    }
+
+    if (is_invoke_exact) {
+      if (UNLIKELY(!callsite_type->IsExactMatch(check_type.Ptr()))) {
+        ThrowWrongMethodTypeException(check_type.Ptr(), callsite_type.Get());
+        return false;
+      }
+    } else if (!IsInvokeTransform(handle_kind)) {
+      if (UNLIKELY(!IsCallerTransformer(callsite_type) &&
+                   !callsite_type->IsConvertible(check_type.Ptr()))) {
+        ThrowWrongMethodTypeException(check_type.Ptr(), callsite_type.Get());
+        return false;
+      }
+    }
   }
 
   uint32_t arg[Instruction::kMaxVarArgRegs] = {};
@@ -866,18 +952,14 @@
       // TODO: Unfortunately, we have to postpone dynamic receiver based checks
       // because the receiver might be cast or might come from an emulated stack
       // frame, which means that it is unknown at this point. We perform these
-      // checks inside DoCallPolymorphic right before we do the actualy invoke.
+      // checks inside DoCallPolymorphic right before we do the actual invoke.
     } else if (handle_kind == kInvokeDirect) {
-      if (called_method->IsConstructor()) {
-        // TODO(narayan) : We need to handle the case where the target method is a
-        // constructor here.
-        UNIMPLEMENTED(FATAL) << "Direct invokes for constructors are not implemented yet.";
-        return false;
+      // String constructors are a special case, they are replaced with StringFactory
+      // methods.
+      if (called_method->IsConstructor() && called_method->GetDeclaringClass()->IsStringClass()) {
+        DCHECK(handle_type->GetRType()->IsStringClass());
+        called_method = WellKnownClasses::StringInitToStringFactory(called_method);
       }
-
-      // Nothing special to do in the case where we're not dealing with a
-      // constructor. It's a private method, and we've already access checked at
-      // the point of creating the handle.
     } else if (handle_kind == kInvokeSuper) {
       ObjPtr<mirror::Class> declaring_class = called_method->GetDeclaringClass();
 
@@ -903,10 +985,20 @@
       CHECK(called_method != nullptr);
     }
 
-    if (handle_kind == kInvokeTransform) {
+    if (IsInvokeTransform(handle_kind)) {
+      // There are two cases here - method handles representing regular
+      // transforms and those representing call site transforms. Method
+      // handles for call site transforms adapt their MethodType to match
+      // the call site. For these, the |callee_type| is the same as the
+      // |callsite_type|. The VarargsCollector is such a tranform, its
+      // method type depends on the call site, ie. x(a) or x(a, b), or
+      // x(a, b, c). The VarargsCollector invokes a variable arity method
+      // with the arity arguments in an array.
+      Handle<mirror::MethodType> callee_type =
+          (handle_kind == kInvokeCallSiteTransform) ? callsite_type : handle_type;
       return DoCallTransform<is_range>(called_method,
                                        callsite_type,
-                                       handle_type,
+                                       callee_type,
                                        self,
                                        shadow_frame,
                                        method_handle /* receiver */,
@@ -927,31 +1019,48 @@
   } else {
     DCHECK(!is_range);
     ArtField* field = method_handle->GetTargetField();
-    Primitive::Type field_type = field->GetTypeAsPrimitiveType();;
-
-    if (!is_invoke_exact) {
-      // TODO(oth): conversion plumbing for invoke().
-      UNIMPLEMENTED(FATAL);
-    }
+    Primitive::Type field_type = field->GetTypeAsPrimitiveType();
 
     switch (handle_kind) {
       case kInstanceGet: {
         ObjPtr<mirror::Object> obj = shadow_frame.GetVRegReference(first_src_reg);
         DoFieldGetForInvokePolymorphic(self, shadow_frame, obj, field, field_type, result);
+        if (!ConvertReturnValue(callsite_type, handle_type, result)) {
+          DCHECK(self->IsExceptionPending());
+          return false;
+        }
+        return true;
+      }
+      case kStaticGet: {
+        ObjPtr<mirror::Object> obj = GetAndInitializeDeclaringClass(self, field);
+        if (obj == nullptr) {
+          DCHECK(self->IsExceptionPending());
+          return false;
+        }
+        DoFieldGetForInvokePolymorphic(self, shadow_frame, obj, field, field_type, result);
+        if (!ConvertReturnValue(callsite_type, handle_type, result)) {
+          DCHECK(self->IsExceptionPending());
+          return false;
+        }
         return true;
       }
       case kInstancePut: {
+        JValue value = GetFieldValue(shadow_frame, field_type, arg[1]);
+        if (!ConvertArgumentValue(callsite_type, handle_type, 1, &value)) {
+          DCHECK(self->IsExceptionPending());
+          return false;
+        }
         ObjPtr<mirror::Object> obj = shadow_frame.GetVRegReference(first_src_reg);
-        return DoFieldPutForInvokePolymorphic(self, shadow_frame, obj, field, field_type, arg[1]);
-      }
-      case kStaticGet: {
-        ObjPtr<mirror::Object> obj = field->GetDeclaringClass();
-        DoFieldGetForInvokePolymorphic(self, shadow_frame, obj, field, field_type, result);
-        return true;
+        return DoFieldPutForInvokePolymorphic(self, shadow_frame, obj, field, field_type, value);
       }
       case kStaticPut: {
+        JValue value = GetFieldValue(shadow_frame, field_type, arg[0]);
+        if (!ConvertArgumentValue(callsite_type, handle_type, 0, &value)) {
+          DCHECK(self->IsExceptionPending());
+          return false;
+        }
         ObjPtr<mirror::Object> obj = field->GetDeclaringClass();
-        return DoFieldPutForInvokePolymorphic(self, shadow_frame, obj, field, field_type, arg[0]);
+        return DoFieldPutForInvokePolymorphic(self, shadow_frame, obj, field, field_type, value);
       }
       default:
         LOG(FATAL) << "Unreachable: " << handle_kind;
@@ -987,7 +1096,6 @@
   return num_ins;
 }
 
-
 inline void PerformCall(Thread* self,
                         const DexFile::CodeItem* code_item,
                         ArtMethod* caller_method,
@@ -1031,20 +1139,6 @@
   }
 }
 
-// Returns true iff. the callsite type for a polymorphic invoke is transformer
-// like, i.e that it has a single input argument whose type is
-// dalvik.system.EmulatedStackFrame.
-static inline bool IsCallerTransformer(Handle<mirror::MethodType> callsite_type)
-    REQUIRES_SHARED(Locks::mutator_lock_) {
-  ObjPtr<mirror::ObjectArray<mirror::Class>> param_types(callsite_type->GetPTypes());
-  if (param_types->GetLength() == 1) {
-    ObjPtr<mirror::Class> param(param_types->GetWithoutChecks(0));
-    return param == WellKnownClasses::ToClass(WellKnownClasses::dalvik_system_EmulatedStackFrame);
-  }
-
-  return false;
-}
-
 template <bool is_range>
 static inline bool DoCallPolymorphic(ArtMethod* called_method,
                                      Handle<mirror::MethodType> callsite_type,
@@ -1055,8 +1149,6 @@
                                      uint32_t (&arg)[Instruction::kMaxVarArgRegs],
                                      uint32_t first_src_reg,
                                      const MethodHandleKind handle_kind) {
-  // TODO(narayan): Wire in the String.init hacks.
-
   // Compute method information.
   const DexFile::CodeItem* code_item = called_method->GetCodeItem();
 
@@ -1157,20 +1249,31 @@
   }
 
   PerformCall(self, code_item, shadow_frame.GetMethod(), first_dest_reg, new_shadow_frame, result);
-
-  // TODO(narayan): Perform return value conversions.
+  if (self->IsExceptionPending()) {
+    return false;
+  }
 
   // If the caller of this signature polymorphic method was a transformer,
   // we need to copy the result back out to the emulated stack frame.
-  if (is_caller_transformer && !self->IsExceptionPending()) {
-    ObjPtr<mirror::EmulatedStackFrame> emulated_stack_frame(
-        reinterpret_cast<mirror::EmulatedStackFrame*>(
-            shadow_frame.GetVRegReference(first_src_reg)));
+  if (is_caller_transformer) {
+    StackHandleScope<2> hs(self);
+    Handle<mirror::EmulatedStackFrame> emulated_stack_frame(
+        hs.NewHandle(reinterpret_cast<mirror::EmulatedStackFrame*>(
+            shadow_frame.GetVRegReference(first_src_reg))));
+    Handle<mirror::MethodType> emulated_stack_type(hs.NewHandle(emulated_stack_frame->GetType()));
+    JValue local_result;
+    local_result.SetJ(result->GetJ());
 
-    emulated_stack_frame->SetReturnValue(self, *result);
+    if (ConvertReturnValue(emulated_stack_type, target_type, &local_result)) {
+      emulated_stack_frame->SetReturnValue(self, local_result);
+      return true;
+    } else {
+      DCHECK(self->IsExceptionPending());
+      return false;
+    }
+  } else {
+    return ConvertReturnValue(callsite_type, target_type, result);
   }
-
-  return !self->IsExceptionPending();
 }
 
 template <bool is_range>
@@ -1237,14 +1340,14 @@
               0 /* first dest reg */,
               new_shadow_frame,
               result);
+  if (self->IsExceptionPending()) {
+    return false;
+  }
 
   // If the called transformer method we called has returned a value, then we
   // need to copy it back to |result|.
-  if (!self->IsExceptionPending()) {
-    sf->GetReturnValue(self, result);
-  }
-
-  return !self->IsExceptionPending();
+  sf->GetReturnValue(self, result);
+  return ConvertReturnValue(callsite_type, callee_type, result);
 }
 
 template <bool is_range,
@@ -1368,7 +1471,7 @@
           ObjPtr<mirror::Object> o = shadow_frame.GetVRegReference(src_reg);
           if (do_assignability_check && o != nullptr) {
             PointerSize pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
-            const uint32_t type_idx = params->GetTypeItem(shorty_pos).type_idx_;
+            const dex::TypeIndex type_idx = params->GetTypeItem(shorty_pos).type_idx_;
             ObjPtr<mirror::Class> arg_type = method->GetDexCacheResolvedType(type_idx,
                                                                              pointer_size);
             if (arg_type == nullptr) {
@@ -1385,7 +1488,7 @@
             if (!o->VerifierInstanceOf(arg_type)) {
               // This should never happen.
               std::string temp1, temp2;
-              self->ThrowNewExceptionF("Ljava/lang/VirtualMachineError;",
+              self->ThrowNewExceptionF("Ljava/lang/InternalError;",
                                        "Invoking %s with bad arg %d, type '%s' not instance of '%s'",
                                        new_shadow_frame->GetMethod()->GetName(), shorty_pos,
                                        o->GetClass()->GetDescriptor(&temp1),
@@ -1476,7 +1579,7 @@
     return false;
   }
   uint16_t type_idx = is_range ? inst->VRegB_3rc() : inst->VRegB_35c();
-  ObjPtr<mirror::Class> array_class = ResolveVerifyAndClinit(type_idx,
+  ObjPtr<mirror::Class> array_class = ResolveVerifyAndClinit(dex::TypeIndex(type_idx),
                                                              shadow_frame.GetMethod(),
                                                              self,
                                                              false,
diff --git a/runtime/interpreter/interpreter_common.h b/runtime/interpreter/interpreter_common.h
index 9c26d24..c9a5b44 100644
--- a/runtime/interpreter/interpreter_common.h
+++ b/runtime/interpreter/interpreter_common.h
@@ -236,7 +236,7 @@
 // java.lang.String class is initialized.
 static inline ObjPtr<mirror::String> ResolveString(Thread* self,
                                                    ShadowFrame& shadow_frame,
-                                                   uint32_t string_idx)
+                                                   dex::StringIndex string_idx)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   ObjPtr<mirror::Class> java_lang_string_class = mirror::String::GetJavaLangString();
   if (UNLIKELY(!java_lang_string_class->IsInitialized())) {
@@ -251,11 +251,11 @@
   ArtMethod* method = shadow_frame.GetMethod();
   ObjPtr<mirror::Class> declaring_class = method->GetDeclaringClass();
   // MethodVerifier refuses methods with string_idx out of bounds.
-  DCHECK_LT(string_idx % mirror::DexCache::kDexCacheStringCacheSize,
+  DCHECK_LT(string_idx.index_ % mirror::DexCache::kDexCacheStringCacheSize,
             declaring_class->GetDexFile().NumStringIds());
   ObjPtr<mirror::String> string_ptr =
       mirror::StringDexCachePair::Lookup(declaring_class->GetDexCacheStrings(),
-                                         string_idx,
+                                         string_idx.index_,
                                          mirror::DexCache::kDexCacheStringCacheSize).Read();
   if (UNLIKELY(string_ptr == nullptr)) {
     StackHandleScope<1> hs(self);
diff --git a/runtime/interpreter/interpreter_switch_impl.cc b/runtime/interpreter/interpreter_switch_impl.cc
index 435ac62..52eacd5 100644
--- a/runtime/interpreter/interpreter_switch_impl.cc
+++ b/runtime/interpreter/interpreter_switch_impl.cc
@@ -287,7 +287,7 @@
           if (!obj_result->VerifierInstanceOf(return_type)) {
             // This should never happen.
             std::string temp1, temp2;
-            self->ThrowNewExceptionF("Ljava/lang/VirtualMachineError;",
+            self->ThrowNewExceptionF("Ljava/lang/InternalError;",
                                      "Returning '%s' that is not instance of return type '%s'",
                                      obj_result->GetClass()->GetDescriptor(&temp1),
                                      return_type->GetDescriptor(&temp2));
@@ -373,7 +373,9 @@
         break;
       case Instruction::CONST_STRING: {
         PREAMBLE();
-        ObjPtr<mirror::String> s = ResolveString(self, shadow_frame,  inst->VRegB_21c());
+        ObjPtr<mirror::String> s = ResolveString(self,
+                                                 shadow_frame,
+                                                 dex::StringIndex(inst->VRegB_21c()));
         if (UNLIKELY(s == nullptr)) {
           HANDLE_PENDING_EXCEPTION();
         } else {
@@ -384,7 +386,9 @@
       }
       case Instruction::CONST_STRING_JUMBO: {
         PREAMBLE();
-        ObjPtr<mirror::String> s = ResolveString(self, shadow_frame,  inst->VRegB_31c());
+        ObjPtr<mirror::String> s = ResolveString(self,
+                                                 shadow_frame,
+                                                 dex::StringIndex(inst->VRegB_31c()));
         if (UNLIKELY(s == nullptr)) {
           HANDLE_PENDING_EXCEPTION();
         } else {
@@ -395,7 +399,7 @@
       }
       case Instruction::CONST_CLASS: {
         PREAMBLE();
-        ObjPtr<mirror::Class> c = ResolveVerifyAndClinit(inst->VRegB_21c(),
+        ObjPtr<mirror::Class> c = ResolveVerifyAndClinit(dex::TypeIndex(inst->VRegB_21c()),
                                                          shadow_frame.GetMethod(),
                                                          self,
                                                          false,
@@ -434,7 +438,7 @@
       }
       case Instruction::CHECK_CAST: {
         PREAMBLE();
-        ObjPtr<mirror::Class> c = ResolveVerifyAndClinit(inst->VRegB_21c(),
+        ObjPtr<mirror::Class> c = ResolveVerifyAndClinit(dex::TypeIndex(inst->VRegB_21c()),
                                                          shadow_frame.GetMethod(),
                                                          self,
                                                          false,
@@ -454,7 +458,7 @@
       }
       case Instruction::INSTANCE_OF: {
         PREAMBLE();
-        ObjPtr<mirror::Class> c = ResolveVerifyAndClinit(inst->VRegC_22c(),
+        ObjPtr<mirror::Class> c = ResolveVerifyAndClinit(dex::TypeIndex(inst->VRegC_22c()),
                                                          shadow_frame.GetMethod(),
                                                          self,
                                                          false,
@@ -484,7 +488,7 @@
       case Instruction::NEW_INSTANCE: {
         PREAMBLE();
         ObjPtr<mirror::Object> obj = nullptr;
-        ObjPtr<mirror::Class> c = ResolveVerifyAndClinit(inst->VRegB_21c(),
+        ObjPtr<mirror::Class> c = ResolveVerifyAndClinit(dex::TypeIndex(inst->VRegB_21c()),
                                                          shadow_frame.GetMethod(),
                                                          self,
                                                          false,
@@ -495,8 +499,10 @@
             obj = mirror::String::AllocEmptyString<true>(self, allocator_type);
           } else {
             obj = AllocObjectFromCode<do_access_check, true>(
-              inst->VRegB_21c(), shadow_frame.GetMethod(), self,
-              Runtime::Current()->GetHeap()->GetCurrentAllocator());
+                dex::TypeIndex(inst->VRegB_21c()),
+                shadow_frame.GetMethod(),
+                self,
+                Runtime::Current()->GetHeap()->GetCurrentAllocator());
           }
         }
         if (UNLIKELY(obj == nullptr)) {
@@ -520,7 +526,10 @@
         PREAMBLE();
         int32_t length = shadow_frame.GetVReg(inst->VRegB_22c(inst_data));
         ObjPtr<mirror::Object> obj = AllocArrayFromCode<do_access_check, true>(
-            inst->VRegC_22c(), length, shadow_frame.GetMethod(), self,
+            dex::TypeIndex(inst->VRegC_22c()),
+            length,
+            shadow_frame.GetMethod(),
+            self,
             Runtime::Current()->GetHeap()->GetCurrentAllocator());
         if (UNLIKELY(obj == nullptr)) {
           HANDLE_PENDING_EXCEPTION();
@@ -572,7 +581,7 @@
         } else if (do_assignability_check && !exception->GetClass()->IsThrowableClass()) {
           // This should never happen.
           std::string temp;
-          self->ThrowNewExceptionF("Ljava/lang/VirtualMachineError;",
+          self->ThrowNewExceptionF("Ljava/lang/InternalError;",
                                    "Throwing '%s' that is not instance of Throwable",
                                    exception->GetClass()->GetDescriptor(&temp));
         } else {
diff --git a/runtime/interpreter/mterp/arm/footer.S b/runtime/interpreter/mterp/arm/footer.S
index 62e573a..cd32ea2 100644
--- a/runtime/interpreter/mterp/arm/footer.S
+++ b/runtime/interpreter/mterp/arm/footer.S
@@ -156,7 +156,7 @@
     REFRESH_IBASE
     add     r2, rINST, rINST            @ r2<- byte offset
     FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
-    ands    lr, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    ands    lr, #THREAD_SUSPEND_OR_CHECKPOINT_REQUEST
     bne     .L_suspend_request_pending
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_return.S b/runtime/interpreter/mterp/arm/op_return.S
index 1888373..f9c0f0f 100644
--- a/runtime/interpreter/mterp/arm/op_return.S
+++ b/runtime/interpreter/mterp/arm/op_return.S
@@ -8,7 +8,7 @@
     bl      MterpThreadFenceForConstructor
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
     mov     r0, rSELF
-    ands    lr, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    ands    lr, #THREAD_SUSPEND_OR_CHECKPOINT_REQUEST
     blne    MterpSuspendCheck                       @ (self)
     mov     r2, rINST, lsr #8           @ r2<- AA
     GET_VREG r0, r2                     @ r0<- vAA
diff --git a/runtime/interpreter/mterp/arm/op_return_void.S b/runtime/interpreter/mterp/arm/op_return_void.S
index cbea2bf..a91ccb3 100644
--- a/runtime/interpreter/mterp/arm/op_return_void.S
+++ b/runtime/interpreter/mterp/arm/op_return_void.S
@@ -2,7 +2,7 @@
     bl      MterpThreadFenceForConstructor
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
     mov     r0, rSELF
-    ands    lr, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    ands    lr, #THREAD_SUSPEND_OR_CHECKPOINT_REQUEST
     blne    MterpSuspendCheck                       @ (self)
     mov    r0, #0
     mov    r1, #0
diff --git a/runtime/interpreter/mterp/arm/op_return_void_no_barrier.S b/runtime/interpreter/mterp/arm/op_return_void_no_barrier.S
index 2dde7ae..b953f4c 100644
--- a/runtime/interpreter/mterp/arm/op_return_void_no_barrier.S
+++ b/runtime/interpreter/mterp/arm/op_return_void_no_barrier.S
@@ -1,6 +1,6 @@
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
     mov     r0, rSELF
-    ands    lr, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    ands    lr, #THREAD_SUSPEND_OR_CHECKPOINT_REQUEST
     blne    MterpSuspendCheck                       @ (self)
     mov    r0, #0
     mov    r1, #0
diff --git a/runtime/interpreter/mterp/arm/op_return_wide.S b/runtime/interpreter/mterp/arm/op_return_wide.S
index ceae878..df582c0 100644
--- a/runtime/interpreter/mterp/arm/op_return_wide.S
+++ b/runtime/interpreter/mterp/arm/op_return_wide.S
@@ -6,7 +6,7 @@
     bl      MterpThreadFenceForConstructor
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
     mov     r0, rSELF
-    ands    lr, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    ands    lr, #THREAD_SUSPEND_OR_CHECKPOINT_REQUEST
     blne    MterpSuspendCheck                       @ (self)
     mov     r2, rINST, lsr #8           @ r2<- AA
     VREG_INDEX_TO_ADDR r2, r2           @ r2<- &fp[AA]
diff --git a/runtime/interpreter/mterp/arm64/entry.S b/runtime/interpreter/mterp/arm64/entry.S
index 9fbbbd3..441c1a1 100644
--- a/runtime/interpreter/mterp/arm64/entry.S
+++ b/runtime/interpreter/mterp/arm64/entry.S
@@ -31,11 +31,11 @@
 
 ExecuteMterpImpl:
     .cfi_startproc
-    stp     xPROFILE, x27, [sp, #-80]!
-    stp     xIBASE, xREFS, [sp, #16]
-    stp     xSELF, xINST, [sp, #32]
-    stp     xPC, xFP, [sp, #48]
-    stp     fp, lr, [sp, #64]
+    SAVE_TWO_REGS_INCREASE_FRAME xPROFILE, x27, 80
+    SAVE_TWO_REGS                xIBASE, xREFS, 16
+    SAVE_TWO_REGS                xSELF, xINST, 32
+    SAVE_TWO_REGS                xPC, xFP, 48
+    SAVE_TWO_REGS                fp, lr, 64
     add     fp, sp, #64
 
     /* Remember the return register */
diff --git a/runtime/interpreter/mterp/arm64/footer.S b/runtime/interpreter/mterp/arm64/footer.S
index 7628ed3..6ffbd3f 100644
--- a/runtime/interpreter/mterp/arm64/footer.S
+++ b/runtime/interpreter/mterp/arm64/footer.S
@@ -141,7 +141,7 @@
     add     w2, wINST, wINST            // w2<- byte offset
     FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
     REFRESH_IBASE
-    ands    lr, lr, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    ands    lr, lr, #THREAD_SUSPEND_OR_CHECKPOINT_REQUEST
     b.ne    .L_suspend_request_pending
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
@@ -215,7 +215,7 @@
  */
 MterpCheckSuspendAndContinue:
     ldr     xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]  // refresh xIBASE
-    ands    w7, w7, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    ands    w7, w7, #THREAD_SUSPEND_OR_CHECKPOINT_REQUEST
     b.ne    check1
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
@@ -270,7 +270,7 @@
     ldr     lr, [xSELF, #THREAD_FLAGS_OFFSET]
     str     x0, [x2]
     mov     x0, xSELF
-    ands    lr, lr, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    ands    lr, lr, #THREAD_SUSPEND_OR_CHECKPOINT_REQUEST
     b.eq    check2
     bl      MterpSuspendCheck                       // (self)
 check2:
@@ -285,12 +285,15 @@
  */
     cmp     wPROFILE, #0
     bgt     MterpProfileActive                      // if > 0, we may have some counts to report.
-    ldp     fp, lr, [sp, #64]
-    ldp     xPC, xFP, [sp, #48]
-    ldp     xSELF, xINST, [sp, #32]
-    ldp     xIBASE, xREFS, [sp, #16]
-    ldp     xPROFILE, x27, [sp], #80
+    .cfi_remember_state
+    RESTORE_TWO_REGS                fp, lr, 64
+    RESTORE_TWO_REGS                xPC, xFP, 48
+    RESTORE_TWO_REGS                xSELF, xINST, 32
+    RESTORE_TWO_REGS                xIBASE, xREFS, 16
+    RESTORE_TWO_REGS_DECREASE_FRAME xPROFILE, x27, 80
     ret
+    .cfi_restore_state                              // Reset unwind info so following code unwinds.
+    .cfi_def_cfa_offset 80                          // workaround for clang bug: 31975598
 
 MterpProfileActive:
     mov     xINST, x0                               // stash return value
@@ -301,11 +304,11 @@
     strh    wPROFILE, [x1, #SHADOWFRAME_HOTNESS_COUNTDOWN_OFFSET]
     bl      MterpAddHotnessBatch                    // (method, shadow_frame, self)
     mov     x0, xINST                               // restore return value
-    ldp     fp, lr, [sp, #64]
-    ldp     xPC, xFP, [sp, #48]
-    ldp     xSELF, xINST, [sp, #32]
-    ldp     xIBASE, xREFS, [sp, #16]
-    ldp     xPROFILE, x27, [sp], #80
+    RESTORE_TWO_REGS                fp, lr, 64
+    RESTORE_TWO_REGS                xPC, xFP, 48
+    RESTORE_TWO_REGS                xSELF, xINST, 32
+    RESTORE_TWO_REGS                xIBASE, xREFS, 16
+    RESTORE_TWO_REGS_DECREASE_FRAME xPROFILE, x27, 80
     ret
 
     .cfi_endproc
diff --git a/runtime/interpreter/mterp/arm64/header.S b/runtime/interpreter/mterp/arm64/header.S
index c791eb5..7125d5a 100644
--- a/runtime/interpreter/mterp/arm64/header.S
+++ b/runtime/interpreter/mterp/arm64/header.S
@@ -292,3 +292,41 @@
 .macro REFRESH_IBASE
   ldr     xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]
 .endm
+
+/*
+ * Save two registers to the stack.
+ */
+.macro SAVE_TWO_REGS reg1, reg2, offset
+    stp \reg1, \reg2, [sp, #(\offset)]
+    .cfi_rel_offset \reg1, (\offset)
+    .cfi_rel_offset \reg2, (\offset) + 8
+.endm
+
+/*
+ * Restore two registers from the stack.
+ */
+.macro RESTORE_TWO_REGS reg1, reg2, offset
+    ldp \reg1, \reg2, [sp, #(\offset)]
+    .cfi_restore \reg1
+    .cfi_restore \reg2
+.endm
+
+/*
+ * Increase frame size and save two registers to the bottom of the stack.
+ */
+.macro SAVE_TWO_REGS_INCREASE_FRAME reg1, reg2, frame_adjustment
+    stp \reg1, \reg2, [sp, #-(\frame_adjustment)]!
+    .cfi_adjust_cfa_offset (\frame_adjustment)
+    .cfi_rel_offset \reg1, 0
+    .cfi_rel_offset \reg2, 8
+.endm
+
+/*
+ * Restore two registers from the bottom of the stack and decrease frame size.
+ */
+.macro RESTORE_TWO_REGS_DECREASE_FRAME reg1, reg2, frame_adjustment
+    ldp \reg1, \reg2, [sp], #(\frame_adjustment)
+    .cfi_restore \reg1
+    .cfi_restore \reg2
+    .cfi_adjust_cfa_offset -(\frame_adjustment)
+.endm
diff --git a/runtime/interpreter/mterp/arm64/op_return.S b/runtime/interpreter/mterp/arm64/op_return.S
index 28630ee..9f125c7 100644
--- a/runtime/interpreter/mterp/arm64/op_return.S
+++ b/runtime/interpreter/mterp/arm64/op_return.S
@@ -8,7 +8,7 @@
     bl      MterpThreadFenceForConstructor
     ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
     mov     x0, xSELF
-    ands    w7, w7, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    ands    w7, w7, #THREAD_SUSPEND_OR_CHECKPOINT_REQUEST
     b.ne    .L${opcode}_check
 .L${opcode}_return:
     lsr     w2, wINST, #8               // r2<- AA
diff --git a/runtime/interpreter/mterp/arm64/op_return_void.S b/runtime/interpreter/mterp/arm64/op_return_void.S
index 3a5aa56..b253006 100644
--- a/runtime/interpreter/mterp/arm64/op_return_void.S
+++ b/runtime/interpreter/mterp/arm64/op_return_void.S
@@ -2,7 +2,7 @@
     bl      MterpThreadFenceForConstructor
     ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
     mov     x0, xSELF
-    ands    w7, w7, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    ands    w7, w7, #THREAD_SUSPEND_OR_CHECKPOINT_REQUEST
     b.ne    .L${opcode}_check
 .L${opcode}_return:
     mov     x0, #0
diff --git a/runtime/interpreter/mterp/arm64/op_return_void_no_barrier.S b/runtime/interpreter/mterp/arm64/op_return_void_no_barrier.S
index 1e06953..c817169 100644
--- a/runtime/interpreter/mterp/arm64/op_return_void_no_barrier.S
+++ b/runtime/interpreter/mterp/arm64/op_return_void_no_barrier.S
@@ -1,6 +1,6 @@
     ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
     mov     x0, xSELF
-    ands    w7, w7, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    ands    w7, w7, #THREAD_SUSPEND_OR_CHECKPOINT_REQUEST
     b.ne    .L${opcode}_check
 .L${opcode}_return:
     mov     x0, #0
diff --git a/runtime/interpreter/mterp/arm64/op_return_wide.S b/runtime/interpreter/mterp/arm64/op_return_wide.S
index c6e1d9d..c47661c 100644
--- a/runtime/interpreter/mterp/arm64/op_return_wide.S
+++ b/runtime/interpreter/mterp/arm64/op_return_wide.S
@@ -7,7 +7,7 @@
     bl      MterpThreadFenceForConstructor
     ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
     mov     x0, xSELF
-    ands    w7, w7, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    ands    w7, w7, #THREAD_SUSPEND_OR_CHECKPOINT_REQUEST
     b.ne    .L${opcode}_check
 .L${opcode}_return:
     lsr     w2, wINST, #8               // w2<- AA
diff --git a/runtime/interpreter/mterp/mips/binop.S b/runtime/interpreter/mterp/mips/binop.S
index 66627e2..862d95a 100644
--- a/runtime/interpreter/mterp/mips/binop.S
+++ b/runtime/interpreter/mterp/mips/binop.S
@@ -30,4 +30,3 @@
     $instr                                 #  $result <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     SET_VREG_GOTO($result, rOBJ, t0)       #  vAA <- $result
-    /* 11-14 instructions */
diff --git a/runtime/interpreter/mterp/mips/binop2addr.S b/runtime/interpreter/mterp/mips/binop2addr.S
index 548cbcb..17aa8eb 100644
--- a/runtime/interpreter/mterp/mips/binop2addr.S
+++ b/runtime/interpreter/mterp/mips/binop2addr.S
@@ -25,5 +25,4 @@
     $preinstr                              #  optional op
     $instr                                 #  $result <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG_GOTO($result, rOBJ, t0)       #  vAA <- $result
-    /* 10-13 instructions */
+    SET_VREG_GOTO($result, rOBJ, t0)       #  vA <- $result
diff --git a/runtime/interpreter/mterp/mips/binopLit16.S b/runtime/interpreter/mterp/mips/binopLit16.S
index fc0c9ff..0696e7a 100644
--- a/runtime/interpreter/mterp/mips/binopLit16.S
+++ b/runtime/interpreter/mterp/mips/binopLit16.S
@@ -11,12 +11,11 @@
      * For: add-int/lit16, rsub-int, mul-int/lit16, div-int/lit16,
      *      rem-int/lit16, and-int/lit16, or-int/lit16, xor-int/lit16
      */
-    # binop/lit16 vA, vB,                  /* +CCCC */
+    /* binop/lit16 vA, vB, +CCCC */
     FETCH_S(a1, 1)                         #  a1 <- ssssCCCC (sign-extended)
     GET_OPB(a2)                            #  a2 <- B
-    GET_OPA(rOBJ)                          #  rOBJ <- A+
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
     GET_VREG(a0, a2)                       #  a0 <- vB
-    and       rOBJ, rOBJ, 15
     .if $chkzero
     # cmp a1, 0; is second operand zero?
     beqz      a1, common_errDivideByZero
@@ -26,5 +25,4 @@
     $preinstr                              #  optional op
     $instr                                 #  $result <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG_GOTO($result, rOBJ, t0)       #  vAA <- $result
-    /* 10-13 instructions */
+    SET_VREG_GOTO($result, rOBJ, t0)       #  vA <- $result
diff --git a/runtime/interpreter/mterp/mips/binopLit8.S b/runtime/interpreter/mterp/mips/binopLit8.S
index a591408..382dd2b 100644
--- a/runtime/interpreter/mterp/mips/binopLit8.S
+++ b/runtime/interpreter/mterp/mips/binopLit8.S
@@ -12,7 +12,7 @@
      *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
      *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
      */
-    # binop/lit8 vAA, vBB,                 /* +CC */
+    /* binop/lit8 vAA, vBB, +CC */
     FETCH_S(a3, 1)                         #  a3 <- ssssCCBB (sign-extended for CC)
     GET_OPA(rOBJ)                          #  rOBJ <- AA
     and       a2, a3, 255                  #  a2 <- BB
@@ -28,4 +28,3 @@
     $instr                                 #  $result <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     SET_VREG_GOTO($result, rOBJ, t0)       #  vAA <- $result
-    /* 10-12 instructions */
diff --git a/runtime/interpreter/mterp/mips/binopWide.S b/runtime/interpreter/mterp/mips/binopWide.S
index 608525b..604134d 100644
--- a/runtime/interpreter/mterp/mips/binopWide.S
+++ b/runtime/interpreter/mterp/mips/binopWide.S
@@ -3,10 +3,10 @@
      * Generic 64-bit binary operation.  Provide an "instr" line that
      * specifies an instruction that performs "result = a0-a1 op a2-a3".
      * This could be a MIPS instruction or a function call.  (If the result
-     * comes back in a register other than a0, you can override "result".)
+     * comes back in a register pair other than a0-a1, you can override "result".)
      *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
-     * vCC (a1).  Useful for integer division and modulus.
+     * vCC (a2-a3).  Useful for integer division and modulus.
      *
      * for: add-long, sub-long, div-long, rem-long, and-long, or-long,
      *      xor-long
@@ -32,4 +32,3 @@
     $instr                                 #  result <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     SET_VREG64_GOTO($result0, $result1, rOBJ, t0)   #  vAA/vAA+1 <- $result0/$result1
-    /* 14-17 instructions */
diff --git a/runtime/interpreter/mterp/mips/binopWide2addr.S b/runtime/interpreter/mterp/mips/binopWide2addr.S
index cc92149..f96fdb2 100644
--- a/runtime/interpreter/mterp/mips/binopWide2addr.S
+++ b/runtime/interpreter/mterp/mips/binopWide2addr.S
@@ -3,22 +3,21 @@
      * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
      * that specifies an instruction that performs "result = a0-a1 op a2-a3".
      * This could be a MIPS instruction or a function call.  (If the result
-     * comes back in a register other than a0, you can override "result".)
+     * comes back in a register pair other than a0-a1, you can override "result".)
      *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
-     * vCC (a1).  Useful for integer division and modulus.
+     * vB (a2-a3).  Useful for integer division and modulus.
      *
      * For: add-long/2addr, sub-long/2addr, div-long/2addr, rem-long/2addr,
      *      and-long/2addr, or-long/2addr, xor-long/2addr
-     *      rem-double/2addr
      */
     /* binop/2addr vA, vB */
     GET_OPA4(rOBJ)                         #  rOBJ <- A+
     GET_OPB(a1)                            #  a1 <- B
     EAS2(a1, rFP, a1)                      #  a1 <- &fp[B]
     EAS2(t0, rFP, rOBJ)                    #  t0 <- &fp[A]
-    LOAD64($arg2, $arg3, a1)               #  a2/a3 <- vBB/vBB+1
-    LOAD64($arg0, $arg1, t0)               #  a0/a1 <- vAA/vAA+1
+    LOAD64($arg2, $arg3, a1)               #  a2/a3 <- vB/vB+1
+    LOAD64($arg0, $arg1, t0)               #  a0/a1 <- vA/vA+1
     .if $chkzero
     or        t0, $arg2, $arg3             #  second arg (a2-a3) is zero?
     beqz      t0, common_errDivideByZero
@@ -28,6 +27,4 @@
     $preinstr                              #  optional op
     $instr                                 #  result <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG64($result0, $result1, rOBJ)   #  vAA/vAA+1 <- $result0/$result1
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-    /* 12-15 instructions */
+    SET_VREG64_GOTO($result0, $result1, rOBJ, t0)   #  vA/vA+1 <- $result0/$result1
diff --git a/runtime/interpreter/mterp/mips/fbinop.S b/runtime/interpreter/mterp/mips/fbinop.S
index d0d39ae..6c1468c 100644
--- a/runtime/interpreter/mterp/mips/fbinop.S
+++ b/runtime/interpreter/mterp/mips/fbinop.S
@@ -6,7 +6,7 @@
 
     /* binop vAA, vBB, vCC */
     FETCH(a0, 1)                           #  a0 <- CCBB
-    GET_OPA(rOBJ)                          #  s5 <- AA
+    GET_OPA(rOBJ)                          #  rOBJ <- AA
     srl       a3, a0, 8                    #  a3 <- CC
     and       a2, a0, 255                  #  a2 <- BB
     GET_VREG_F(fa1, a3)                    #  a1 <- vCC
@@ -14,6 +14,5 @@
 
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     $instr                                 #  f0 = result
-    SET_VREG_F(fv0, rOBJ)                  #  vAA <- fv0
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG_F_GOTO(fv0, rOBJ, t0)         #  vAA <- fv0
diff --git a/runtime/interpreter/mterp/mips/fbinop2addr.S b/runtime/interpreter/mterp/mips/fbinop2addr.S
index ccb67b1..2caaf9c 100644
--- a/runtime/interpreter/mterp/mips/fbinop2addr.S
+++ b/runtime/interpreter/mterp/mips/fbinop2addr.S
@@ -1,19 +1,18 @@
     /*
      * Generic 32-bit "/2addr" binary operation.  Provide an "instr"
-     * that specifies an instruction that performs "result = a0 op a1".
+     * that specifies an instruction that performs "fv0 = fa0 op fa1".
      * This could be an MIPS instruction or a function call.
      *
      * For: add-float/2addr, sub-float/2addr, mul-float/2addr,
-     * div-float/2addr, rem-float/2addr
+     *      div-float/2addr, rem-float/2addr
      */
     /* binop/2addr vA, vB */
-    GET_OPA4(rOBJ)                         #  t1 <- A+
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
     GET_OPB(a3)                            #  a3 <- B
     GET_VREG_F(fa0, rOBJ)
     GET_VREG_F(fa1, a3)
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
 
     $instr
-    SET_VREG_F(fv0, rOBJ)                  #  vAA <- result
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG_F_GOTO(fv0, rOBJ, t0)         #  vA <- result
diff --git a/runtime/interpreter/mterp/mips/fbinopWide.S b/runtime/interpreter/mterp/mips/fbinopWide.S
index 3be9325..a1fe91e 100644
--- a/runtime/interpreter/mterp/mips/fbinopWide.S
+++ b/runtime/interpreter/mterp/mips/fbinopWide.S
@@ -1,6 +1,6 @@
     /*
-     * Generic 64-bit binary operation.  Provide an "instr" line that
-     * specifies an instruction that performs "result = a0-a1 op a2-a3".
+     * Generic 64-bit floating-point binary operation.  Provide an "instr"
+     * line that specifies an instruction that performs "fv0 = fa0 op fa1".
      * This could be an MIPS instruction or a function call.
      *
      * for: add-double, sub-double, mul-double, div-double,
@@ -9,7 +9,7 @@
      */
     /* binop vAA, vBB, vCC */
     FETCH(a0, 1)                           #  a0 <- CCBB
-    GET_OPA(rOBJ)                          #  s5 <- AA
+    GET_OPA(rOBJ)                          #  rOBJ <- AA
     and       a2, a0, 255                  #  a2 <- BB
     srl       a3, a0, 8                    #  a3 <- CC
     EAS2(a2, rFP, a2)                      #  a2 <- &fp[BB]
@@ -19,10 +19,5 @@
 
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     $instr
-    SET_VREG64_F(fv0, fv0f, rOBJ)
-    b         .L${opcode}_finish
-%break
-
-.L${opcode}_finish:
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG64_F_GOTO(fv0, fv0f, rOBJ, t0)  #  vAA/vAA+1 <- fv0
diff --git a/runtime/interpreter/mterp/mips/fbinopWide2addr.S b/runtime/interpreter/mterp/mips/fbinopWide2addr.S
index 8541f11..7303441 100644
--- a/runtime/interpreter/mterp/mips/fbinopWide2addr.S
+++ b/runtime/interpreter/mterp/mips/fbinopWide2addr.S
@@ -1,10 +1,11 @@
     /*
-     * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
-     * that specifies an instruction that performs "result = a0-a1 op a2-a3".
+     * Generic 64-bit floating-point "/2addr" binary operation.
+     * Provide an "instr" line that specifies an instruction that
+     * performs "fv0 = fa0 op fa1".
      * This could be an MIPS instruction or a function call.
      *
      * For: add-double/2addr, sub-double/2addr, mul-double/2addr,
-     *  div-double/2addr, rem-double/2addr
+     *      div-double/2addr, rem-double/2addr
      */
     /* binop/2addr vA, vB */
     GET_OPA4(rOBJ)                         #  rOBJ <- A+
@@ -16,6 +17,5 @@
 
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
     $instr
-    SET_VREG64_F(fv0, fv0f, rOBJ)
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG64_F_GOTO(fv0, fv0f, rOBJ, t0)  #  vA/vA+1 <- fv0
diff --git a/runtime/interpreter/mterp/mips/footer.S b/runtime/interpreter/mterp/mips/footer.S
index 1363751..9909dfe 100644
--- a/runtime/interpreter/mterp/mips/footer.S
+++ b/runtime/interpreter/mterp/mips/footer.S
@@ -151,7 +151,7 @@
     REFRESH_IBASE()
     addu    a2, rINST, rINST            # a2<- byte offset
     FETCH_ADVANCE_INST_RB(a2)           # update rPC, load rINST
-    and     ra, (THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    and     ra, THREAD_SUSPEND_OR_CHECKPOINT_REQUEST
     bnez    ra, .L_suspend_request_pending
     GET_INST_OPCODE(t0)                 # extract opcode from rINST
     GOTO_OPCODE(t0)                     # jump to next instruction
diff --git a/runtime/interpreter/mterp/mips/funop.S b/runtime/interpreter/mterp/mips/funop.S
index bfb9346..b2b22c9 100644
--- a/runtime/interpreter/mterp/mips/funop.S
+++ b/runtime/interpreter/mterp/mips/funop.S
@@ -1,18 +1,15 @@
     /*
-     * Generic 32-bit unary operation.  Provide an "instr" line that
-     * specifies an instruction that performs "result = op a0".
+     * Generic 32-bit floating-point unary operation.  Provide an "instr"
+     * line that specifies an instruction that performs "fv0 = op fa0".
      * This could be a MIPS instruction or a function call.
      *
-     * for: int-to-float, float-to-int
+     * for: int-to-float
      */
     /* unop vA, vB */
     GET_OPB(a3)                            #  a3 <- B
-    GET_OPA4(rOBJ)                         #  t0 <- A+
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
     GET_VREG_F(fa0, a3)
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
     $instr
-
-.L${opcode}_set_vreg_f:
-    SET_VREG_F(fv0, rOBJ)
     GET_INST_OPCODE(t1)                    #  extract opcode from rINST
-    GOTO_OPCODE(t1)                        #  jump to next instruction
+    SET_VREG_F_GOTO(fv0, rOBJ, t1)         #  vA <- fv0
diff --git a/runtime/interpreter/mterp/mips/funopWide.S b/runtime/interpreter/mterp/mips/funopWide.S
deleted file mode 100644
index 3d4cf22..0000000
--- a/runtime/interpreter/mterp/mips/funopWide.S
+++ /dev/null
@@ -1,22 +0,0 @@
-%default {"preinstr":"", "ld_arg":"LOAD64_F(fa0, fa0f, a3)", "st_result":"SET_VREG64_F(fv0, fv0f, rOBJ)"}
-    /*
-     * Generic 64-bit unary operation.  Provide an "instr" line that
-     * specifies an instruction that performs "result = op a0/a1".
-     * This could be a MIPS instruction or a function call.
-     *
-     * long-to-double, double-to-long
-     */
-    /* unop vA, vB */
-    GET_OPA4(rOBJ)                         #  t1 <- A+
-    GET_OPB(a3)                            #  a3 <- B
-    EAS2(a3, rFP, a3)                      #  a3 <- &fp[B]
-    $ld_arg
-    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
-    $preinstr                              #  optional op
-    $instr                                 #  a0/a1 <- op, a2-a3 changed
-
-.L${opcode}_set_vreg:
-    $st_result                             #  vAA <- a0/a1
-    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-    /* 12-13 instructions */
diff --git a/runtime/interpreter/mterp/mips/funopWider.S b/runtime/interpreter/mterp/mips/funopWider.S
index efb85f3..6862e24 100644
--- a/runtime/interpreter/mterp/mips/funopWider.S
+++ b/runtime/interpreter/mterp/mips/funopWider.S
@@ -1,10 +1,8 @@
-%default {"st_result":"SET_VREG64_F(fv0, fv0f, rOBJ)"}
     /*
-     * Generic 32bit-to-64bit unary operation.  Provide an "instr" line
-     * that specifies an instruction that performs "result = op a0", where
-     * "result" is a 64-bit quantity in a0/a1.
+     * Generic 32bit-to-64bit floating-point unary operation.  Provide an "instr"
+     * line that specifies an instruction that performs "fv0 = op fa0".
      *
-     * For: int-to-double, float-to-long, float-to-double
+     * For: int-to-double, float-to-double
      */
     /* unop vA, vB */
     GET_OPA4(rOBJ)                         #  rOBJ <- A+
@@ -12,8 +10,5 @@
     GET_VREG_F(fa0, a3)
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
     $instr
-
-.L${opcode}_set_vreg:
-    $st_result                             #  vA/vA+1 <- a0/a1
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG64_F_GOTO(fv0, fv0f, rOBJ, t0) #  vA/vA+1 <- fv0
diff --git a/runtime/interpreter/mterp/mips/header.S b/runtime/interpreter/mterp/mips/header.S
index a3a6744..0ce7745 100644
--- a/runtime/interpreter/mterp/mips/header.S
+++ b/runtime/interpreter/mterp/mips/header.S
@@ -153,6 +153,58 @@
 #define fcc1   $$fcc1
 #endif
 
+#ifdef MIPS32REVGE2
+#define SEB(rd, rt) \
+    seb       rd, rt
+#define SEH(rd, rt) \
+    seh       rd, rt
+#define INSERT_HIGH_HALF(rd_lo, rt_hi) \
+    ins       rd_lo, rt_hi, 16, 16
+#else
+#define SEB(rd, rt) \
+    sll       rd, rt, 24; \
+    sra       rd, rd, 24
+#define SEH(rd, rt) \
+    sll       rd, rt, 16; \
+    sra       rd, rd, 16
+/* Clobbers rt_hi on pre-R2. */
+#define INSERT_HIGH_HALF(rd_lo, rt_hi) \
+    sll       rt_hi, rt_hi, 16; \
+    or        rd_lo, rt_hi
+#endif
+
+#ifdef FPU64
+#define MOVE_TO_FPU_HIGH(r, flo, fhi) \
+    mthc1     r, flo
+#else
+#define MOVE_TO_FPU_HIGH(r, flo, fhi) \
+    mtc1      r, fhi
+#endif
+
+#ifdef MIPS32REVGE6
+#define JR(rt) \
+    jic       rt, 0
+#define LSA(rd, rs, rt, sa) \
+    .if sa; \
+    lsa       rd, rs, rt, sa; \
+    .else; \
+    addu      rd, rs, rt; \
+    .endif
+#else
+#define JR(rt) \
+    jalr      zero, rt
+#define LSA(rd, rs, rt, sa) \
+    .if sa; \
+    .set      push; \
+    .set      noat; \
+    sll       AT, rs, sa; \
+    addu      rd, AT, rt; \
+    .set      pop; \
+    .else; \
+    addu      rd, rs, rt; \
+    .endif
+#endif
+
 /*
  * Instead of holding a pointer to the shadow frame, we keep rFP at the base of the vregs.  So,
  * to access other shadow frame fields, we need to use a backwards offset.  Define those here.
@@ -186,12 +238,12 @@
     sw        rPC, OFF_FP_DEX_PC_PTR(rFP)
 
 #define EXPORT_DEX_PC(tmp) \
-    lw   tmp, OFF_FP_CODE_ITEM(rFP) \
-    sw   rPC, OFF_FP_DEX_PC_PTR(rFP) \
-    addu tmp, CODEITEM_INSNS_OFFSET \
-    subu tmp, rPC, tmp \
-    sra  tmp, tmp, 1 \
-    sw   tmp, OFF_FP_DEX_PC(rFP)
+    lw        tmp, OFF_FP_CODE_ITEM(rFP); \
+    sw        rPC, OFF_FP_DEX_PC_PTR(rFP); \
+    addu      tmp, CODEITEM_INSNS_OFFSET; \
+    subu      tmp, rPC, tmp; \
+    sra       tmp, tmp, 1; \
+    sw        tmp, OFF_FP_DEX_PC(rFP)
 
 /*
  * Fetch the next instruction from rPC into rINST.  Does not advance rPC.
@@ -206,18 +258,11 @@
  * exception catch may miss.  (This also implies that it must come after
  * EXPORT_PC().)
  */
-#define FETCH_ADVANCE_INST(_count) lhu rINST, ((_count)*2)(rPC); \
+#define FETCH_ADVANCE_INST(_count) \
+    lhu       rINST, ((_count)*2)(rPC); \
     addu      rPC, rPC, ((_count) * 2)
 
 /*
- * The operation performed here is similar to FETCH_ADVANCE_INST, except the
- * src and dest registers are parameterized (not hard-wired to rPC and rINST).
- */
-#define PREFETCH_ADVANCE_INST(_dreg, _sreg, _count) \
-    lhu       _dreg, ((_count)*2)(_sreg) ;            \
-    addu      _sreg, _sreg, (_count)*2
-
-/*
  * Similar to FETCH_ADVANCE_INST, but does not update rPC.  Used to load
  * rINST ahead of possible exception point.  Be sure to manually advance rPC
  * later.
@@ -232,7 +277,8 @@
  * rPC to point to the next instruction.  "rd" must specify the distance
  * in bytes, *not* 16-bit code units, and may be a signed value.
  */
-#define FETCH_ADVANCE_INST_RB(rd) addu rPC, rPC, rd; \
+#define FETCH_ADVANCE_INST_RB(rd) \
+    addu      rPC, rPC, rd; \
     lhu       rINST, (rPC)
 
 /*
@@ -257,38 +303,75 @@
 #define GET_INST_OPCODE(rd) and rd, rINST, 0xFF
 
 /*
- * Put the prefetched instruction's opcode field into the specified register.
+ * Transform opcode into branch target address.
  */
-#define GET_PREFETCHED_OPCODE(dreg, sreg)   andi     dreg, sreg, 255
+#define GET_OPCODE_TARGET(rd) \
+    sll       rd, rd, ${handler_size_bits}; \
+    addu      rd, rIBASE, rd
 
 /*
  * Begin executing the opcode in rd.
  */
-#define GOTO_OPCODE(rd) sll rd, rd, ${handler_size_bits}; \
-    addu      rd, rIBASE, rd; \
-    jalr      zero, rd
-
-#define GOTO_OPCODE_BASE(_base, rd)  sll rd, rd, ${handler_size_bits}; \
-    addu      rd, _base, rd; \
-    jalr      zero, rd
+#define GOTO_OPCODE(rd) \
+    GET_OPCODE_TARGET(rd); \
+    JR(rd)
 
 /*
  * Get/set the 32-bit value from a Dalvik register.
  */
 #define GET_VREG(rd, rix) LOAD_eas2(rd, rFP, rix)
 
-#define GET_VREG_F(rd, rix) EAS2(AT, rFP, rix); \
-    .set noat; l.s rd, (AT); .set at
+#define GET_VREG_F(rd, rix) \
+    .set noat; \
+    EAS2(AT, rFP, rix); \
+    l.s       rd, (AT); \
+    .set at
 
-#define SET_VREG(rd, rix) .set noat; \
+#ifdef MIPS32REVGE6
+#define SET_VREG(rd, rix) \
+    lsa       t8, rix, rFP, 2; \
+    sw        rd, 0(t8); \
+    lsa       t8, rix, rREFS, 2; \
+    sw        zero, 0(t8)
+#else
+#define SET_VREG(rd, rix) \
+    .set noat; \
     sll       AT, rix, 2; \
     addu      t8, rFP, AT; \
     sw        rd, 0(t8); \
     addu      t8, rREFS, AT; \
     .set at; \
     sw        zero, 0(t8)
+#endif
 
-#define SET_VREG64(rlo, rhi, rix) .set noat; \
+#ifdef MIPS32REVGE6
+#define SET_VREG_OBJECT(rd, rix) \
+    lsa       t8, rix, rFP, 2; \
+    sw        rd, 0(t8); \
+    lsa       t8, rix, rREFS, 2; \
+    sw        rd, 0(t8)
+#else
+#define SET_VREG_OBJECT(rd, rix) \
+    .set noat; \
+    sll       AT, rix, 2; \
+    addu      t8, rFP, AT; \
+    sw        rd, 0(t8); \
+    addu      t8, rREFS, AT; \
+    .set at; \
+    sw        rd, 0(t8)
+#endif
+
+#ifdef MIPS32REVGE6
+#define SET_VREG64(rlo, rhi, rix) \
+    lsa       t8, rix, rFP, 2; \
+    sw        rlo, 0(t8); \
+    sw        rhi, 4(t8); \
+    lsa       t8, rix, rREFS, 2; \
+    sw        zero, 0(t8); \
+    sw        zero, 4(t8)
+#else
+#define SET_VREG64(rlo, rhi, rix) \
+    .set noat; \
     sll       AT, rix, 2; \
     addu      t8, rFP, AT; \
     sw        rlo, 0(t8); \
@@ -297,9 +380,39 @@
     .set at; \
     sw        zero, 0(t8); \
     sw        zero, 4(t8)
+#endif
 
-#ifdef FPU64
-#define SET_VREG64_F(rlo, rhi, rix) .set noat; \
+#ifdef MIPS32REVGE6
+#define SET_VREG_F(rd, rix) \
+    lsa       t8, rix, rFP, 2; \
+    s.s       rd, 0(t8); \
+    lsa       t8, rix, rREFS, 2; \
+    sw        zero, 0(t8)
+#else
+#define SET_VREG_F(rd, rix) \
+    .set noat; \
+    sll       AT, rix, 2; \
+    addu      t8, rFP, AT; \
+    s.s       rd, 0(t8); \
+    addu      t8, rREFS, AT; \
+    .set at; \
+    sw        zero, 0(t8)
+#endif
+
+#ifdef MIPS32REVGE6
+#define SET_VREG64_F(rlo, rhi, rix) \
+    lsa       t8, rix, rFP, 2; \
+    .set noat; \
+    mfhc1     AT, rlo; \
+    s.s       rlo, 0(t8); \
+    sw        AT, 4(t8); \
+    .set at; \
+    lsa       t8, rix, rREFS, 2; \
+    sw        zero, 0(t8); \
+    sw        zero, 4(t8)
+#elif defined(FPU64)
+#define SET_VREG64_F(rlo, rhi, rix) \
+    .set noat; \
     sll       AT, rix, 2; \
     addu      t8, rREFS, AT; \
     sw        zero, 0(t8); \
@@ -310,7 +423,8 @@
     .set at; \
     s.s       rlo, 0(t8)
 #else
-#define SET_VREG64_F(rlo, rhi, rix) .set noat; \
+#define SET_VREG64_F(rlo, rhi, rix) \
+    .set noat; \
     sll       AT, rix, 2; \
     addu      t8, rFP, AT; \
     s.s       rlo, 0(t8); \
@@ -321,18 +435,21 @@
     sw        zero, 4(t8)
 #endif
 
-#define SET_VREG_OBJECT(rd, rix) .set noat; \
-    sll       AT, rix, 2; \
-    addu      t8, rFP, AT; \
-    sw        rd, 0(t8); \
-    addu      t8, rREFS, AT; \
-    .set at; \
-    sw        rd, 0(t8)
-
 /* Combination of the SET_VREG and GOTO_OPCODE functions to save 1 instruction */
-#define SET_VREG_GOTO(rd, rix, dst) .set noreorder; \
-    sll       dst, dst, ${handler_size_bits}; \
-    addu      dst, rIBASE, dst; \
+#ifdef MIPS32REVGE6
+#define SET_VREG_GOTO(rd, rix, dst) \
+    .set noreorder; \
+    GET_OPCODE_TARGET(dst); \
+    lsa       t8, rix, rFP, 2; \
+    sw        rd, 0(t8); \
+    lsa       t8, rix, rREFS, 2; \
+    jalr      zero, dst; \
+    sw        zero, 0(t8); \
+    .set reorder
+#else
+#define SET_VREG_GOTO(rd, rix, dst) \
+    .set noreorder; \
+    GET_OPCODE_TARGET(dst); \
     .set noat; \
     sll       AT, rix, 2; \
     addu      t8, rFP, AT; \
@@ -342,11 +459,51 @@
     jalr      zero, dst; \
     sw        zero, 0(t8); \
     .set reorder
+#endif
+
+/* Combination of the SET_VREG_OBJECT and GOTO_OPCODE functions to save 1 instruction */
+#ifdef MIPS32REVGE6
+#define SET_VREG_OBJECT_GOTO(rd, rix, dst) \
+    .set noreorder; \
+    GET_OPCODE_TARGET(dst); \
+    lsa       t8, rix, rFP, 2; \
+    sw        rd, 0(t8); \
+    lsa       t8, rix, rREFS, 2; \
+    jalr      zero, dst; \
+    sw        rd, 0(t8); \
+    .set reorder
+#else
+#define SET_VREG_OBJECT_GOTO(rd, rix, dst) \
+    .set noreorder; \
+    GET_OPCODE_TARGET(dst); \
+    .set noat; \
+    sll       AT, rix, 2; \
+    addu      t8, rFP, AT; \
+    sw        rd, 0(t8); \
+    addu      t8, rREFS, AT; \
+    .set at; \
+    jalr      zero, dst; \
+    sw        rd, 0(t8); \
+    .set reorder
+#endif
 
 /* Combination of the SET_VREG64 and GOTO_OPCODE functions to save 1 instruction */
-#define SET_VREG64_GOTO(rlo, rhi, rix, dst) .set noreorder; \
-    sll       dst, dst, ${handler_size_bits}; \
-    addu      dst, rIBASE, dst; \
+#ifdef MIPS32REVGE6
+#define SET_VREG64_GOTO(rlo, rhi, rix, dst) \
+    .set noreorder; \
+    GET_OPCODE_TARGET(dst); \
+    lsa       t8, rix, rFP, 2; \
+    sw        rlo, 0(t8); \
+    sw        rhi, 4(t8); \
+    lsa       t8, rix, rREFS, 2; \
+    sw        zero, 0(t8); \
+    jalr      zero, dst; \
+    sw        zero, 4(t8); \
+    .set reorder
+#else
+#define SET_VREG64_GOTO(rlo, rhi, rix, dst) \
+    .set noreorder; \
+    GET_OPCODE_TARGET(dst); \
     .set noat; \
     sll       AT, rix, 2; \
     addu      t8, rFP, AT; \
@@ -358,14 +515,82 @@
     jalr      zero, dst; \
     sw        zero, 4(t8); \
     .set reorder
+#endif
 
-#define SET_VREG_F(rd, rix) .set noat; \
+/* Combination of the SET_VREG_F and GOTO_OPCODE functions to save 1 instruction */
+#ifdef MIPS32REVGE6
+#define SET_VREG_F_GOTO(rd, rix, dst) \
+    .set noreorder; \
+    GET_OPCODE_TARGET(dst); \
+    lsa       t8, rix, rFP, 2; \
+    s.s       rd, 0(t8); \
+    lsa       t8, rix, rREFS, 2; \
+    jalr      zero, dst; \
+    sw        zero, 0(t8); \
+    .set reorder
+#else
+#define SET_VREG_F_GOTO(rd, rix, dst) \
+    .set noreorder; \
+    GET_OPCODE_TARGET(dst); \
+    .set noat; \
     sll       AT, rix, 2; \
     addu      t8, rFP, AT; \
     s.s       rd, 0(t8); \
     addu      t8, rREFS, AT; \
     .set at; \
-    sw        zero, 0(t8)
+    jalr      zero, dst; \
+    sw        zero, 0(t8); \
+    .set reorder
+#endif
+
+/* Combination of the SET_VREG64_F and GOTO_OPCODE functions to save 1 instruction */
+#ifdef MIPS32REVGE6
+#define SET_VREG64_F_GOTO(rlo, rhi, rix, dst) \
+    .set noreorder; \
+    GET_OPCODE_TARGET(dst); \
+    lsa       t8, rix, rFP, 2; \
+    .set noat; \
+    mfhc1     AT, rlo; \
+    s.s       rlo, 0(t8); \
+    sw        AT, 4(t8); \
+    .set at; \
+    lsa       t8, rix, rREFS, 2; \
+    sw        zero, 0(t8); \
+    jalr      zero, dst; \
+    sw        zero, 4(t8); \
+    .set reorder
+#elif defined(FPU64)
+#define SET_VREG64_F_GOTO(rlo, rhi, rix, dst) \
+    .set noreorder; \
+    GET_OPCODE_TARGET(dst); \
+    .set noat; \
+    sll       AT, rix, 2; \
+    addu      t8, rREFS, AT; \
+    sw        zero, 0(t8); \
+    sw        zero, 4(t8); \
+    addu      t8, rFP, AT; \
+    mfhc1     AT, rlo; \
+    sw        AT, 4(t8); \
+    .set at; \
+    jalr      zero, dst; \
+    s.s       rlo, 0(t8); \
+    .set reorder
+#else
+#define SET_VREG64_F_GOTO(rlo, rhi, rix, dst) \
+    .set noreorder; \
+    GET_OPCODE_TARGET(dst); \
+    .set noat; \
+    sll       AT, rix, 2; \
+    addu      t8, rFP, AT; \
+    s.s       rlo, 0(t8); \
+    s.s       rhi, 4(t8); \
+    addu      t8, rREFS, AT; \
+    .set at; \
+    sw        zero, 0(t8); \
+    jalr      zero, dst; \
+    sw        zero, 4(t8); \
+    .set reorder
+#endif
 
 #define GET_OPA(rd) srl rd, rINST, 8
 #ifdef MIPS32REVGE2
@@ -376,60 +601,60 @@
 #define GET_OPB(rd) srl rd, rINST, 12
 
 /*
- * Form an Effective Address rd = rbase + roff<<n;
- * Uses reg AT
+ * Form an Effective Address rd = rbase + roff<<shift;
+ * Uses reg AT on pre-R6.
  */
-#define EASN(rd, rbase, roff, rshift) .set noat; \
-    sll       AT, roff, rshift; \
-    addu      rd, rbase, AT; \
-    .set at
+#define EASN(rd, rbase, roff, shift) LSA(rd, roff, rbase, shift)
 
 #define EAS1(rd, rbase, roff) EASN(rd, rbase, roff, 1)
 #define EAS2(rd, rbase, roff) EASN(rd, rbase, roff, 2)
 #define EAS3(rd, rbase, roff) EASN(rd, rbase, roff, 3)
 #define EAS4(rd, rbase, roff) EASN(rd, rbase, roff, 4)
 
-/*
- * Form an Effective Shift Right rd = rbase + roff>>n;
- * Uses reg AT
- */
-#define ESRN(rd, rbase, roff, rshift) .set noat; \
-    srl       AT, roff, rshift; \
-    addu      rd, rbase, AT; \
+#define LOAD_eas2(rd, rbase, roff) \
+    .set noat; \
+    EAS2(AT, rbase, roff); \
+    lw        rd, 0(AT); \
     .set at
 
-#define LOAD_eas2(rd, rbase, roff) EAS2(AT, rbase, roff); \
-    .set noat; lw rd, 0(AT); .set at
-
-#define STORE_eas2(rd, rbase, roff) EAS2(AT, rbase, roff); \
-    .set noat; sw rd, 0(AT); .set at
+#define STORE_eas2(rd, rbase, roff) \
+    .set noat; \
+    EAS2(AT, rbase, roff); \
+    sw        rd, 0(AT); \
+    .set at
 
 #define LOAD_RB_OFF(rd, rbase, off) lw rd, off(rbase)
 #define STORE_RB_OFF(rd, rbase, off) sw rd, off(rbase)
 
-#define STORE64_off(rlo, rhi, rbase, off) sw rlo, off(rbase); \
+#define STORE64_off(rlo, rhi, rbase, off) \
+    sw        rlo, off(rbase); \
     sw        rhi, (off+4)(rbase)
-#define LOAD64_off(rlo, rhi, rbase, off) lw rlo, off(rbase); \
+#define LOAD64_off(rlo, rhi, rbase, off) \
+    lw        rlo, off(rbase); \
     lw        rhi, (off+4)(rbase)
 
 #define STORE64(rlo, rhi, rbase) STORE64_off(rlo, rhi, rbase, 0)
 #define LOAD64(rlo, rhi, rbase) LOAD64_off(rlo, rhi, rbase, 0)
 
 #ifdef FPU64
-#define STORE64_off_F(rlo, rhi, rbase, off) s.s rlo, off(rbase); \
+#define STORE64_off_F(rlo, rhi, rbase, off) \
+    s.s       rlo, off(rbase); \
     .set noat; \
     mfhc1     AT, rlo; \
     sw        AT, (off+4)(rbase); \
     .set at
-#define LOAD64_off_F(rlo, rhi, rbase, off) l.s rlo, off(rbase); \
+#define LOAD64_off_F(rlo, rhi, rbase, off) \
+    l.s       rlo, off(rbase); \
     .set noat; \
     lw        AT, (off+4)(rbase); \
     mthc1     AT, rlo; \
     .set at
 #else
-#define STORE64_off_F(rlo, rhi, rbase, off) s.s rlo, off(rbase); \
+#define STORE64_off_F(rlo, rhi, rbase, off) \
+    s.s       rlo, off(rbase); \
     s.s       rhi, (off+4)(rbase)
-#define LOAD64_off_F(rlo, rhi, rbase, off) l.s rlo, off(rbase); \
+#define LOAD64_off_F(rlo, rhi, rbase, off) \
+    l.s       rlo, off(rbase); \
     l.s       rhi, (off+4)(rbase)
 #endif
 
@@ -490,3 +715,11 @@
 
 #define REFRESH_IBASE() \
     lw        rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)
+
+/* Constants for float/double_to_int/long conversions */
+#define INT_MIN                 0x80000000
+#define INT_MIN_AS_FLOAT        0xCF000000
+#define INT_MIN_AS_DOUBLE_HIGH  0xC1E00000
+#define LONG_MIN_HIGH           0x80000000
+#define LONG_MIN_AS_FLOAT       0xDF000000
+#define LONG_MIN_AS_DOUBLE_HIGH 0xC3E00000
diff --git a/runtime/interpreter/mterp/mips/invoke.S b/runtime/interpreter/mterp/mips/invoke.S
index bcd3a57..db3b8af 100644
--- a/runtime/interpreter/mterp/mips/invoke.S
+++ b/runtime/interpreter/mterp/mips/invoke.S
@@ -2,8 +2,8 @@
     /*
      * Generic invoke handler wrapper.
      */
-    # op vB, {vD, vE, vF, vG, vA}, class   /* CCCC */
-    # op {vCCCC..v(CCCC+AA-1)}, meth       /* BBBB */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
     .extern $helper
     EXPORT_PC()
     move    a0, rSELF
diff --git a/runtime/interpreter/mterp/mips/op_aget.S b/runtime/interpreter/mterp/mips/op_aget.S
index 8aa8992..e88402c 100644
--- a/runtime/interpreter/mterp/mips/op_aget.S
+++ b/runtime/interpreter/mterp/mips/op_aget.S
@@ -19,11 +19,7 @@
     # null array object?
     beqz      a0, common_errNullObject     #  yes, bail
     LOAD_base_offMirrorArray_length(a3, a0) #  a3 <- arrayObj->length
-    .if $shift
     EASN(a0, a0, a1, $shift)               #  a0 <- arrayObj + index*width
-    .else
-    addu      a0, a0, a1
-    .endif
     # a1 >= a3; compare unsigned index
     bgeu      a1, a3, common_errArrayIndex #  index >= length, bail
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
diff --git a/runtime/interpreter/mterp/mips/op_aget_object.S b/runtime/interpreter/mterp/mips/op_aget_object.S
index e3ab9d8..9c49dfe 100644
--- a/runtime/interpreter/mterp/mips/op_aget_object.S
+++ b/runtime/interpreter/mterp/mips/op_aget_object.S
@@ -14,7 +14,6 @@
     lw   a1, THREAD_EXCEPTION_OFFSET(rSELF)
     PREFETCH_INST(2)                       #  load rINST
     bnez a1, MterpException
-    SET_VREG_OBJECT(v0, rOBJ)              #  vAA <- v0
     ADVANCE(2)                             #  advance rPC
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG_OBJECT_GOTO(v0, rOBJ, t0)     #  vAA <- v0
diff --git a/runtime/interpreter/mterp/mips/op_aput.S b/runtime/interpreter/mterp/mips/op_aput.S
index 53d6ae0..46dcaee 100644
--- a/runtime/interpreter/mterp/mips/op_aput.S
+++ b/runtime/interpreter/mterp/mips/op_aput.S
@@ -17,14 +17,11 @@
     # null array object?
     beqz      a0, common_errNullObject     #  yes, bail
     LOAD_base_offMirrorArray_length(a3, a0) #  a3 <- arrayObj->length
-    .if $shift
     EASN(a0, a0, a1, $shift)               #  a0 <- arrayObj + index*width
-    .else
-    addu      a0, a0, a1
-    .endif
     bgeu      a1, a3, common_errArrayIndex #  index >= length, bail
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     GET_VREG(a2, rOBJ)                     #  a2 <- vAA
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GET_OPCODE_TARGET(t0)
     $store a2, $data_offset(a0)            #  vBB[vCC] <- a2
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    JR(t0)                                 #  jump to next instruction
diff --git a/runtime/interpreter/mterp/mips/op_aput_wide.S b/runtime/interpreter/mterp/mips/op_aput_wide.S
index ef99261..c3cff56 100644
--- a/runtime/interpreter/mterp/mips/op_aput_wide.S
+++ b/runtime/interpreter/mterp/mips/op_aput_wide.S
@@ -1,7 +1,5 @@
     /*
      * Array put, 64 bits.  vBB[vCC] <- vAA.
-     *
-     * Arrays of long/double are 64-bit aligned, so it's okay to use STRD.
      */
     /* aput-wide vAA, vBB, vCC */
     FETCH(a0, 1)                           #  a0 <- CCBB
@@ -21,5 +19,6 @@
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     LOAD64(a2, a3, rOBJ)                   #  a2/a3 <- vAA/vAA+1
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GET_OPCODE_TARGET(t0)
     STORE64_off(a2, a3, a0, MIRROR_WIDE_ARRAY_DATA_OFFSET) #  a2/a3 <- vBB[vCC]
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    JR(t0)                                 #  jump to next instruction
diff --git a/runtime/interpreter/mterp/mips/op_array_length.S b/runtime/interpreter/mterp/mips/op_array_length.S
index 2b4a86f..ae2fe68 100644
--- a/runtime/interpreter/mterp/mips/op_array_length.S
+++ b/runtime/interpreter/mterp/mips/op_array_length.S
@@ -1,6 +1,7 @@
     /*
      * Return the length of an array.
      */
+    /* array-length vA, vB */
     GET_OPB(a1)                            #  a1 <- B
     GET_OPA4(a2)                           #  a2 <- A+
     GET_VREG(a0, a1)                       #  a0 <- vB (object ref)
diff --git a/runtime/interpreter/mterp/mips/op_check_cast.S b/runtime/interpreter/mterp/mips/op_check_cast.S
index 9a6cefa..3875ce6 100644
--- a/runtime/interpreter/mterp/mips/op_check_cast.S
+++ b/runtime/interpreter/mterp/mips/op_check_cast.S
@@ -1,7 +1,7 @@
     /*
      * Check to see if a cast from one class to another is allowed.
      */
-    # check-cast vAA, class                /* BBBB */
+    /* check-cast vAA, class@BBBB */
     EXPORT_PC()
     FETCH(a0, 1)                           #  a0 <- BBBB
     GET_OPA(a1)                            #  a1 <- AA
diff --git a/runtime/interpreter/mterp/mips/op_cmpg_double.S b/runtime/interpreter/mterp/mips/op_cmpg_double.S
index e7965a7..b2e7532 100644
--- a/runtime/interpreter/mterp/mips/op_cmpg_double.S
+++ b/runtime/interpreter/mterp/mips/op_cmpg_double.S
@@ -1 +1 @@
-%include "mips/op_cmpl_double.S" { "naninst":"li rTEMP, 1" }
+%include "mips/op_cmpl_double.S" { "gt_bias":"1" }
diff --git a/runtime/interpreter/mterp/mips/op_cmpg_float.S b/runtime/interpreter/mterp/mips/op_cmpg_float.S
index 53519a6..76550b5 100644
--- a/runtime/interpreter/mterp/mips/op_cmpg_float.S
+++ b/runtime/interpreter/mterp/mips/op_cmpg_float.S
@@ -1 +1 @@
-%include "mips/op_cmpl_float.S" { "naninst":"li rTEMP, 1" }
+%include "mips/op_cmpl_float.S" { "gt_bias":"1" }
diff --git a/runtime/interpreter/mterp/mips/op_cmpl_double.S b/runtime/interpreter/mterp/mips/op_cmpl_double.S
index db89242..369e5b3 100644
--- a/runtime/interpreter/mterp/mips/op_cmpl_double.S
+++ b/runtime/interpreter/mterp/mips/op_cmpl_double.S
@@ -1,53 +1,51 @@
-%default { "naninst":"li rTEMP, -1" }
+%default { "gt_bias":"0" }
     /*
      * Compare two floating-point values. Puts 0(==), 1(>), or -1(<)
-     * into the destination register (rTEMP) based on the comparison results.
-     *
-     * Provide a "naninst" instruction that puts 1 or -1 into rTEMP depending
-     * on what value we'd like to return when one of the operands is NaN.
-     *
-     * See op_cmpl_float for more details.
+     * into the destination register based on the comparison results.
      *
      * For: cmpl-double, cmpg-double
      */
     /* op vAA, vBB, vCC */
 
     FETCH(a0, 1)                           #  a0 <- CCBB
-    and       rOBJ, a0, 255                #  s5 <- BB
+    and       rOBJ, a0, 255                #  rOBJ <- BB
     srl       t0, a0, 8                    #  t0 <- CC
-    EAS2(rOBJ, rFP, rOBJ)                  #  s5 <- &fp[BB]
+    EAS2(rOBJ, rFP, rOBJ)                  #  rOBJ <- &fp[BB]
     EAS2(t0, rFP, t0)                      #  t0 <- &fp[CC]
     LOAD64_F(ft0, ft0f, rOBJ)
     LOAD64_F(ft1, ft1f, t0)
 #ifdef MIPS32REVGE6
-    cmp.lt.d  ft2, ft0, ft1
-    li        rTEMP, -1
-    bc1nez    ft2, .L${opcode}_finish
-    cmp.lt.d  ft2, ft1, ft0
-    li        rTEMP, 1
-    bc1nez    ft2, .L${opcode}_finish
     cmp.eq.d  ft2, ft0, ft1
     li        rTEMP, 0
-    bc1nez    ft2, .L${opcode}_finish
-    b         .L${opcode}_nan
-#else
-    c.olt.d   fcc0, ft0, ft1
+    bc1nez    ft2, 1f                      # done if vBB == vCC (ordered)
+    .if $gt_bias
+    cmp.lt.d  ft2, ft0, ft1
     li        rTEMP, -1
-    bc1t      fcc0, .L${opcode}_finish
-    c.olt.d   fcc0, ft1, ft0
+    bc1nez    ft2, 1f                      # done if vBB < vCC (ordered)
+    li        rTEMP, 1                     # vBB > vCC or unordered
+    .else
+    cmp.lt.d  ft2, ft1, ft0
     li        rTEMP, 1
-    bc1t      fcc0, .L${opcode}_finish
+    bc1nez    ft2, 1f                      # done if vBB > vCC (ordered)
+    li        rTEMP, -1                    # vBB < vCC or unordered
+    .endif
+#else
     c.eq.d    fcc0, ft0, ft1
     li        rTEMP, 0
-    bc1t      fcc0, .L${opcode}_finish
-    b         .L${opcode}_nan
+    bc1t      fcc0, 1f                     # done if vBB == vCC (ordered)
+    .if $gt_bias
+    c.olt.d   fcc0, ft0, ft1
+    li        rTEMP, -1
+    bc1t      fcc0, 1f                     # done if vBB < vCC (ordered)
+    li        rTEMP, 1                     # vBB > vCC or unordered
+    .else
+    c.olt.d   fcc0, ft1, ft0
+    li        rTEMP, 1
+    bc1t      fcc0, 1f                     # done if vBB > vCC (ordered)
+    li        rTEMP, -1                    # vBB < vCC or unordered
+    .endif
 #endif
-%break
-
-.L${opcode}_nan:
-    $naninst
-
-.L${opcode}_finish:
+1:
     GET_OPA(rOBJ)
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
diff --git a/runtime/interpreter/mterp/mips/op_cmpl_float.S b/runtime/interpreter/mterp/mips/op_cmpl_float.S
index b8c0961..1dd5506 100644
--- a/runtime/interpreter/mterp/mips/op_cmpl_float.S
+++ b/runtime/interpreter/mterp/mips/op_cmpl_float.S
@@ -1,60 +1,49 @@
-%default { "naninst":"li rTEMP, -1" }
+%default { "gt_bias":"0" }
     /*
-     * Compare two floating-point values.  Puts 0, 1, or -1 into the
-     * destination register rTEMP based on the results of the comparison.
-     *
-     * Provide a "naninst" instruction that puts 1 or -1 into rTEMP depending
-     * on what value we'd like to return when one of the operands is NaN.
-     *
-     * The operation we're implementing is:
-     *   if (x == y)
-     *     return 0;
-     *   else if (x < y)
-     *     return -1;
-     *   else if (x > y)
-     *     return 1;
-     *   else
-     *     return {-1 or 1};  // one or both operands was NaN
+     * Compare two floating-point values. Puts 0(==), 1(>), or -1(<)
+     * into the destination register based on the comparison results.
      *
      * for: cmpl-float, cmpg-float
      */
     /* op vAA, vBB, vCC */
 
-    /* "clasic" form */
     FETCH(a0, 1)                           #  a0 <- CCBB
     and       a2, a0, 255                  #  a2 <- BB
     srl       a3, a0, 8
     GET_VREG_F(ft0, a2)
     GET_VREG_F(ft1, a3)
 #ifdef MIPS32REVGE6
-    cmp.lt.s  ft2, ft0, ft1               # Is ft0 < ft1
-    li        rTEMP, -1
-    bc1nez    ft2, .L${opcode}_finish
-    cmp.lt.s  ft2, ft1, ft0
-    li        rTEMP, 1
-    bc1nez    ft2, .L${opcode}_finish
     cmp.eq.s  ft2, ft0, ft1
     li        rTEMP, 0
-    bc1nez    ft2, .L${opcode}_finish
-    b         .L${opcode}_nan
-#else
-    c.olt.s   fcc0, ft0, ft1               # Is ft0 < ft1
+    bc1nez    ft2, 1f                      # done if vBB == vCC (ordered)
+    .if $gt_bias
+    cmp.lt.s  ft2, ft0, ft1
     li        rTEMP, -1
-    bc1t      fcc0, .L${opcode}_finish
-    c.olt.s   fcc0, ft1, ft0
+    bc1nez    ft2, 1f                      # done if vBB < vCC (ordered)
+    li        rTEMP, 1                     # vBB > vCC or unordered
+    .else
+    cmp.lt.s  ft2, ft1, ft0
     li        rTEMP, 1
-    bc1t      fcc0, .L${opcode}_finish
+    bc1nez    ft2, 1f                      # done if vBB > vCC (ordered)
+    li        rTEMP, -1                    # vBB < vCC or unordered
+    .endif
+#else
     c.eq.s    fcc0, ft0, ft1
     li        rTEMP, 0
-    bc1t      fcc0, .L${opcode}_finish
-    b         .L${opcode}_nan
+    bc1t      fcc0, 1f                     # done if vBB == vCC (ordered)
+    .if $gt_bias
+    c.olt.s   fcc0, ft0, ft1
+    li        rTEMP, -1
+    bc1t      fcc0, 1f                     # done if vBB < vCC (ordered)
+    li        rTEMP, 1                     # vBB > vCC or unordered
+    .else
+    c.olt.s   fcc0, ft1, ft0
+    li        rTEMP, 1
+    bc1t      fcc0, 1f                     # done if vBB > vCC (ordered)
+    li        rTEMP, -1                    # vBB < vCC or unordered
+    .endif
 #endif
-%break
-
-.L${opcode}_nan:
-    $naninst
-
-.L${opcode}_finish:
+1:
     GET_OPA(rOBJ)
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
diff --git a/runtime/interpreter/mterp/mips/op_const.S b/runtime/interpreter/mterp/mips/op_const.S
index c505761..bd9f873 100644
--- a/runtime/interpreter/mterp/mips/op_const.S
+++ b/runtime/interpreter/mterp/mips/op_const.S
@@ -1,9 +1,8 @@
-    # const vAA,                           /* +BBBBbbbb */
+    /* const vAA, +BBBBbbbb */
     GET_OPA(a3)                            #  a3 <- AA
     FETCH(a0, 1)                           #  a0 <- bbbb (low)
     FETCH(a1, 2)                           #  a1 <- BBBB (high)
     FETCH_ADVANCE_INST(3)                  #  advance rPC, load rINST
-    sll       a1, a1, 16
-    or        a0, a1, a0                   #  a0 <- BBBBbbbb
+    INSERT_HIGH_HALF(a0, a1)               #  a0 <- BBBBbbbb
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     SET_VREG_GOTO(a0, a3, t0)              #  vAA <- a0
diff --git a/runtime/interpreter/mterp/mips/op_const_16.S b/runtime/interpreter/mterp/mips/op_const_16.S
index 5e47633..2ffb30f 100644
--- a/runtime/interpreter/mterp/mips/op_const_16.S
+++ b/runtime/interpreter/mterp/mips/op_const_16.S
@@ -1,4 +1,4 @@
-    # const/16 vAA,                        /* +BBBB */
+    /* const/16 vAA, +BBBB */
     FETCH_S(a0, 1)                         #  a0 <- ssssBBBB (sign-extended)
     GET_OPA(a3)                            #  a3 <- AA
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
diff --git a/runtime/interpreter/mterp/mips/op_const_4.S b/runtime/interpreter/mterp/mips/op_const_4.S
index 8b662f9..6866c78 100644
--- a/runtime/interpreter/mterp/mips/op_const_4.S
+++ b/runtime/interpreter/mterp/mips/op_const_4.S
@@ -1,4 +1,4 @@
-    # const/4 vA,                          /* +B */
+    /* const/4 vA, +B */
     sll       a1, rINST, 16                #  a1 <- Bxxx0000
     GET_OPA(a0)                            #  a0 <- A+
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
diff --git a/runtime/interpreter/mterp/mips/op_const_class.S b/runtime/interpreter/mterp/mips/op_const_class.S
index 7202b11..9adea44 100644
--- a/runtime/interpreter/mterp/mips/op_const_class.S
+++ b/runtime/interpreter/mterp/mips/op_const_class.S
@@ -1,4 +1,4 @@
-    # const/class vAA, Class               /* BBBB */
+    /* const/class vAA, class@BBBB */
     EXPORT_PC()
     FETCH(a0, 1)                        # a0 <- BBBB
     GET_OPA(a1)                         # a1 <- AA
diff --git a/runtime/interpreter/mterp/mips/op_const_high16.S b/runtime/interpreter/mterp/mips/op_const_high16.S
index 36c1c35..5162402 100644
--- a/runtime/interpreter/mterp/mips/op_const_high16.S
+++ b/runtime/interpreter/mterp/mips/op_const_high16.S
@@ -1,4 +1,4 @@
-    # const/high16 vAA,                    /* +BBBB0000 */
+    /* const/high16 vAA, +BBBB0000 */
     FETCH(a0, 1)                           #  a0 <- 0000BBBB (zero-extended)
     GET_OPA(a3)                            #  a3 <- AA
     sll       a0, a0, 16                   #  a0 <- BBBB0000
diff --git a/runtime/interpreter/mterp/mips/op_const_string.S b/runtime/interpreter/mterp/mips/op_const_string.S
index d8eeb46..006e114 100644
--- a/runtime/interpreter/mterp/mips/op_const_string.S
+++ b/runtime/interpreter/mterp/mips/op_const_string.S
@@ -1,4 +1,4 @@
-    # const/string vAA, String             /* BBBB */
+    /* const/string vAA, string@BBBB */
     EXPORT_PC()
     FETCH(a0, 1)                        # a0 <- BBBB
     GET_OPA(a1)                         # a1 <- AA
diff --git a/runtime/interpreter/mterp/mips/op_const_string_jumbo.S b/runtime/interpreter/mterp/mips/op_const_string_jumbo.S
index d732ca1..54cec97 100644
--- a/runtime/interpreter/mterp/mips/op_const_string_jumbo.S
+++ b/runtime/interpreter/mterp/mips/op_const_string_jumbo.S
@@ -1,10 +1,9 @@
-    # const/string vAA, String          /* BBBBBBBB */
+    /* const/string vAA, string@BBBBBBBB */
     EXPORT_PC()
     FETCH(a0, 1)                        # a0 <- bbbb (low)
     FETCH(a2, 2)                        # a2 <- BBBB (high)
     GET_OPA(a1)                         # a1 <- AA
-    sll    a2, a2, 16
-    or     a0, a0, a2                   # a0 <- BBBBbbbb
+    INSERT_HIGH_HALF(a0, a2)            # a0 <- BBBBbbbb
     addu   a2, rFP, OFF_FP_SHADOWFRAME  # a2 <- shadow frame
     move   a3, rSELF
     JAL(MterpConstString)               # v0 <- Mterp(index, tgt_reg, shadow_frame, self)
diff --git a/runtime/interpreter/mterp/mips/op_const_wide.S b/runtime/interpreter/mterp/mips/op_const_wide.S
index 01d0f87..f8911e3 100644
--- a/runtime/interpreter/mterp/mips/op_const_wide.S
+++ b/runtime/interpreter/mterp/mips/op_const_wide.S
@@ -1,14 +1,11 @@
-    # const-wide vAA,                      /* +HHHHhhhhBBBBbbbb */
+    /* const-wide vAA, +HHHHhhhhBBBBbbbb */
     FETCH(a0, 1)                           #  a0 <- bbbb (low)
     FETCH(a1, 2)                           #  a1 <- BBBB (low middle)
     FETCH(a2, 3)                           #  a2 <- hhhh (high middle)
-    sll       a1, 16 #
-    or        a0, a1                       #  a0 <- BBBBbbbb (low word)
+    INSERT_HIGH_HALF(a0, a1)               #  a0 <- BBBBbbbb (low word)
     FETCH(a3, 4)                           #  a3 <- HHHH (high)
     GET_OPA(t1)                            #  t1 <- AA
-    sll       a3, 16
-    or        a1, a3, a2                   #  a1 <- HHHHhhhh (high word)
+    INSERT_HIGH_HALF(a2, a3)               #  a2 <- HHHHhhhh (high word)
     FETCH_ADVANCE_INST(5)                  #  advance rPC, load rINST
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG64(a0, a1, t1)                 #  vAA <- a0/a1
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG64_GOTO(a0, a2, t1, t0)        #  vAA/vAA+1 <- a0/a2
diff --git a/runtime/interpreter/mterp/mips/op_const_wide_16.S b/runtime/interpreter/mterp/mips/op_const_wide_16.S
index 583d9ef..2ca5ab9 100644
--- a/runtime/interpreter/mterp/mips/op_const_wide_16.S
+++ b/runtime/interpreter/mterp/mips/op_const_wide_16.S
@@ -1,8 +1,7 @@
-    # const-wide/16 vAA,                   /* +BBBB */
+    /* const-wide/16 vAA, +BBBB */
     FETCH_S(a0, 1)                         #  a0 <- ssssBBBB (sign-extended)
     GET_OPA(a3)                            #  a3 <- AA
     sra       a1, a0, 31                   #  a1 <- ssssssss
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG64(a0, a1, a3)                 #  vAA <- a0/a1
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG64_GOTO(a0, a1, a3, t0)        #  vAA/vAA+1 <- a0/a1
diff --git a/runtime/interpreter/mterp/mips/op_const_wide_32.S b/runtime/interpreter/mterp/mips/op_const_wide_32.S
index 3eb4574..bf802ca 100644
--- a/runtime/interpreter/mterp/mips/op_const_wide_32.S
+++ b/runtime/interpreter/mterp/mips/op_const_wide_32.S
@@ -1,11 +1,9 @@
-    # const-wide/32 vAA,                   /* +BBBBbbbb */
+    /* const-wide/32 vAA, +BBBBbbbb */
     FETCH(a0, 1)                           #  a0 <- 0000bbbb (low)
     GET_OPA(a3)                            #  a3 <- AA
     FETCH_S(a2, 2)                         #  a2 <- ssssBBBB (high)
     FETCH_ADVANCE_INST(3)                  #  advance rPC, load rINST
-    sll       a2, a2, 16
-    or        a0, a0, a2                   #  a0 <- BBBBbbbb
+    INSERT_HIGH_HALF(a0, a2)               #  a0 <- BBBBbbbb
     sra       a1, a0, 31                   #  a1 <- ssssssss
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG64(a0, a1, a3)                 #  vAA <- a0/a1
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG64_GOTO(a0, a1, a3, t0)        #  vAA/vAA+1 <- a0/a1
diff --git a/runtime/interpreter/mterp/mips/op_const_wide_high16.S b/runtime/interpreter/mterp/mips/op_const_wide_high16.S
index 88382c6..04b90fa 100644
--- a/runtime/interpreter/mterp/mips/op_const_wide_high16.S
+++ b/runtime/interpreter/mterp/mips/op_const_wide_high16.S
@@ -1,9 +1,8 @@
-    # const-wide/high16 vAA,               /* +BBBB000000000000 */
+    /* const-wide/high16 vAA, +BBBB000000000000 */
     FETCH(a1, 1)                           #  a1 <- 0000BBBB (zero-extended)
     GET_OPA(a3)                            #  a3 <- AA
     li        a0, 0                        #  a0 <- 00000000
     sll       a1, 16                       #  a1 <- BBBB0000
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG64(a0, a1, a3)                 #  vAA <- a0/a1
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG64_GOTO(a0, a1, a3, t0)        #  vAA/vAA+1 <- a0/a1
diff --git a/runtime/interpreter/mterp/mips/op_double_to_int.S b/runtime/interpreter/mterp/mips/op_double_to_int.S
index b1792ec..3b44964 100644
--- a/runtime/interpreter/mterp/mips/op_double_to_int.S
+++ b/runtime/interpreter/mterp/mips/op_double_to_int.S
@@ -1,58 +1,39 @@
-%include "mips/unopNarrower.S" {"instr":"b d2i_doconv"}
-/*
- * Convert the double in a0/a1 to an int in a0.
- *
- * We have to clip values to int min/max per the specification.  The
- * expected common case is a "reasonable" value that converts directly
- * to modest integer.  The EABI convert function isn't doing this for us.
- */
-%break
+    /*
+     * double-to-int
+     *
+     * We have to clip values to int min/max per the specification.  The
+     * expected common case is a "reasonable" value that converts directly
+     * to modest integer.  The EABI convert function isn't doing this for us.
+     */
+    /* unop vA, vB */
+    GET_OPB(a3)                            #  a3 <- B
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
+    EAS2(a3, rFP, a3)                      #  a3 <- &fp[B]
+    LOAD64_F(fa0, fa0f, a3)
+    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
 
-d2i_doconv:
+    li        t0, INT_MIN_AS_DOUBLE_HIGH
+    mtc1      zero, fa1
+    MOVE_TO_FPU_HIGH(t0, fa1, fa1f)
 #ifdef MIPS32REVGE6
-    la        t0, .LDOUBLE_TO_INT_max
-    LOAD64_F(fa1, fa1f, t0)
-    cmp.le.d  ft2, fa1, fa0
-    l.s       fv0, .LDOUBLE_TO_INT_maxret
-    bc1nez    ft2, .L${opcode}_set_vreg_f
-
-    la        t0, .LDOUBLE_TO_INT_min
-    LOAD64_F(fa1, fa1f, t0)
-    cmp.le.d  ft2, fa0, fa1
-    l.s       fv0, .LDOUBLE_TO_INT_minret
-    bc1nez    ft2, .L${opcode}_set_vreg_f
-
-    mov.d     fa1, fa0
-    cmp.un.d  ft2, fa0, fa1
-    li.s      fv0, 0
-    bc1nez    ft2, .L${opcode}_set_vreg_f
+    /*
+     * TODO: simplify this when the MIPS64R6 emulator
+     * supports NAN2008=1.
+     */
+    cmp.le.d  ft0, fa1, fa0
+    GET_INST_OPCODE(t1)                    #  extract opcode from rINST
+    bc1nez    ft0, 1f                      #  if INT_MIN <= vB, proceed to truncation
+    cmp.eq.d  ft0, fa0, fa0
+    selnez.d  fa0, fa1, ft0                #  fa0 = ordered(vB) ? INT_MIN_AS_DOUBLE : 0
 #else
-    la        t0, .LDOUBLE_TO_INT_max
-    LOAD64_F(fa1, fa1f, t0)
     c.ole.d   fcc0, fa1, fa0
-    l.s       fv0, .LDOUBLE_TO_INT_maxret
-    bc1t      .L${opcode}_set_vreg_f
-
-    la        t0, .LDOUBLE_TO_INT_min
-    LOAD64_F(fa1, fa1f, t0)
-    c.ole.d   fcc0, fa0, fa1
-    l.s       fv0, .LDOUBLE_TO_INT_minret
-    bc1t      .L${opcode}_set_vreg_f
-
-    mov.d     fa1, fa0
-    c.un.d    fcc0, fa0, fa1
-    li.s      fv0, 0
-    bc1t      .L${opcode}_set_vreg_f
+    GET_INST_OPCODE(t1)                    #  extract opcode from rINST
+    bc1t      fcc0, 1f                     #  if INT_MIN <= vB, proceed to truncation
+    c.eq.d    fcc0, fa0, fa0
+    mtc1      zero, fa0
+    MOVE_TO_FPU_HIGH(zero, fa0, fa0f)
+    movt.d    fa0, fa1, fcc0               #  fa0 = ordered(vB) ? INT_MIN_AS_DOUBLE : 0
 #endif
-
-    trunc.w.d  fv0, fa0
-    b         .L${opcode}_set_vreg_f
-
-.LDOUBLE_TO_INT_max:
-    .dword 0x41dfffffffc00000
-.LDOUBLE_TO_INT_min:
-    .dword 0xc1e0000000000000              #  minint, as a double (high word)
-.LDOUBLE_TO_INT_maxret:
-    .word 0x7fffffff
-.LDOUBLE_TO_INT_minret:
-    .word 0x80000000
+1:
+    trunc.w.d fa0, fa0
+    SET_VREG_F_GOTO(fa0, rOBJ, t1)         #  vA <- result
diff --git a/runtime/interpreter/mterp/mips/op_double_to_long.S b/runtime/interpreter/mterp/mips/op_double_to_long.S
index 7f7a799..78d4a8f 100644
--- a/runtime/interpreter/mterp/mips/op_double_to_long.S
+++ b/runtime/interpreter/mterp/mips/op_double_to_long.S
@@ -1,56 +1,61 @@
-%include "mips/funopWide.S" {"instr":"b d2l_doconv", "st_result":"SET_VREG64(rRESULT0, rRESULT1, rOBJ)"}
+    /*
+     * double-to-long
+     *
+     * We have to clip values to long min/max per the specification.  The
+     * expected common case is a "reasonable" value that converts directly
+     * to modest integer.  The EABI convert function isn't doing this for us.
+     */
+    /* unop vA, vB */
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
+    GET_OPB(a3)                            #  a3 <- B
+    EAS2(a3, rFP, a3)                      #  a3 <- &fp[B]
+    LOAD64_F(fa0, fa0f, a3)
+    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
+
+#ifdef MIPS32REVGE6
+    /*
+     * TODO: simplify this when the MIPS64R6 emulator
+     * supports NAN2008=1.
+     */
+    li        t0, LONG_MIN_AS_DOUBLE_HIGH
+    mtc1      zero, fa1
+    mthc1     t0, fa1
+    cmp.le.d  ft0, fa1, fa0
+    GET_INST_OPCODE(t1)                    #  extract opcode from rINST
+    bc1nez    ft0, 1f                      #  if LONG_MIN <= vB, proceed to truncation
+    cmp.eq.d  ft0, fa0, fa0
+    selnez.d  fa0, fa1, ft0                #  fa0 = ordered(vB) ? LONG_MIN_AS_DOUBLE : 0
+1:
+    trunc.l.d fa0, fa0
+    SET_VREG64_F_GOTO(fa0, fa0f, rOBJ, t1) #  vA <- result
+#else
+    c.eq.d    fcc0, fa0, fa0
+    li        rRESULT0, 0
+    li        rRESULT1, 0
+    bc1f      fcc0, .L${opcode}_get_opcode
+
+    li        t0, LONG_MIN_AS_DOUBLE_HIGH
+    mtc1      zero, fa1
+    MOVE_TO_FPU_HIGH(t0, fa1, fa1f)
+    c.ole.d   fcc0, fa0, fa1
+    li        rRESULT1, LONG_MIN_HIGH
+    bc1t      fcc0, .L${opcode}_get_opcode
+
+    neg.d     fa1, fa1
+    c.ole.d   fcc0, fa1, fa0
+    nor       rRESULT0, rRESULT0, zero
+    nor       rRESULT1, rRESULT1, zero
+    bc1t      fcc0, .L${opcode}_get_opcode
+
+    JAL(__fixdfdi)
+    GET_INST_OPCODE(t1)                    #  extract opcode from rINST
+    b         .L${opcode}_set_vreg
+#endif
 %break
 
-d2l_doconv:
-#ifdef MIPS32REVGE6
-    la        t0, .LDOUBLE_TO_LONG_max
-    LOAD64_F(fa1, fa1f, t0)
-    cmp.le.d  ft2, fa1, fa0
-    la        t0, .LDOUBLE_TO_LONG_ret_max
-    LOAD64(rRESULT0, rRESULT1, t0)
-    bc1nez    ft2, .L${opcode}_set_vreg
-
-    la        t0, .LDOUBLE_TO_LONG_min
-    LOAD64_F(fa1, fa1f, t0)
-    cmp.le.d  ft2, fa0, fa1
-    la        t0, .LDOUBLE_TO_LONG_ret_min
-    LOAD64(rRESULT0, rRESULT1, t0)
-    bc1nez    ft2, .L${opcode}_set_vreg
-
-    mov.d     fa1, fa0
-    cmp.un.d  ft2, fa0, fa1
-    li        rRESULT0, 0
-    li        rRESULT1, 0
-    bc1nez    ft2, .L${opcode}_set_vreg
-#else
-    la        t0, .LDOUBLE_TO_LONG_max
-    LOAD64_F(fa1, fa1f, t0)
-    c.ole.d   fcc0, fa1, fa0
-    la        t0, .LDOUBLE_TO_LONG_ret_max
-    LOAD64(rRESULT0, rRESULT1, t0)
-    bc1t      .L${opcode}_set_vreg
-
-    la        t0, .LDOUBLE_TO_LONG_min
-    LOAD64_F(fa1, fa1f, t0)
-    c.ole.d   fcc0, fa0, fa1
-    la        t0, .LDOUBLE_TO_LONG_ret_min
-    LOAD64(rRESULT0, rRESULT1, t0)
-    bc1t      .L${opcode}_set_vreg
-
-    mov.d     fa1, fa0
-    c.un.d    fcc0, fa0, fa1
-    li        rRESULT0, 0
-    li        rRESULT1, 0
-    bc1t      .L${opcode}_set_vreg
+#ifndef MIPS32REVGE6
+.L${opcode}_get_opcode:
+    GET_INST_OPCODE(t1)                    #  extract opcode from rINST
+.L${opcode}_set_vreg:
+    SET_VREG64_GOTO(rRESULT0, rRESULT1, rOBJ, t1)   #  vA/vA+1 <- v0/v1
 #endif
-    JAL(__fixdfdi)
-    b         .L${opcode}_set_vreg
-
-.LDOUBLE_TO_LONG_max:
-    .dword 0x43e0000000000000              #  maxlong, as a double (high word)
-.LDOUBLE_TO_LONG_min:
-    .dword 0xc3e0000000000000              #  minlong, as a double (high word)
-.LDOUBLE_TO_LONG_ret_max:
-    .dword 0x7fffffffffffffff
-.LDOUBLE_TO_LONG_ret_min:
-    .dword 0x8000000000000000
diff --git a/runtime/interpreter/mterp/mips/op_fill_array_data.S b/runtime/interpreter/mterp/mips/op_fill_array_data.S
index 8605746..c3cd371 100644
--- a/runtime/interpreter/mterp/mips/op_fill_array_data.S
+++ b/runtime/interpreter/mterp/mips/op_fill_array_data.S
@@ -1,10 +1,9 @@
     /* fill-array-data vAA, +BBBBBBBB */
     EXPORT_PC()
-    FETCH(a0, 1)                           #  a0 <- bbbb (lo)
-    FETCH(a1, 2)                           #  a1 <- BBBB (hi)
+    FETCH(a1, 1)                           #  a1 <- bbbb (lo)
+    FETCH(a0, 2)                           #  a0 <- BBBB (hi)
     GET_OPA(a3)                            #  a3 <- AA
-    sll       a1, a1, 16                   #  a1 <- BBBBbbbb
-    or        a1, a0, a1                   #  a1 <- BBBBbbbb
+    INSERT_HIGH_HALF(a1, a0)               #  a1 <- BBBBbbbb
     GET_VREG(a0, a3)                       #  a0 <- vAA (array object)
     EAS1(a1, rPC, a1)                      #  a1 <- PC + BBBBbbbb*2 (array data off.)
     JAL(MterpFillArrayData)                #  v0 <- Mterp(obj, payload)
diff --git a/runtime/interpreter/mterp/mips/op_filled_new_array.S b/runtime/interpreter/mterp/mips/op_filled_new_array.S
index 3f62fae..9511578 100644
--- a/runtime/interpreter/mterp/mips/op_filled_new_array.S
+++ b/runtime/interpreter/mterp/mips/op_filled_new_array.S
@@ -4,8 +4,8 @@
      *
      * for: filled-new-array, filled-new-array/range
      */
-    # op vB, {vD, vE, vF, vG, vA}, class   /* CCCC */
-    # op {vCCCC..v(CCCC+AA-1)}, type       /* BBBB */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, type@BBBB */
     .extern $helper
     EXPORT_PC()
     addu   a0, rFP, OFF_FP_SHADOWFRAME     # a0 <- shadow frame
diff --git a/runtime/interpreter/mterp/mips/op_float_to_int.S b/runtime/interpreter/mterp/mips/op_float_to_int.S
index 8292652..087e50f 100644
--- a/runtime/interpreter/mterp/mips/op_float_to_int.S
+++ b/runtime/interpreter/mterp/mips/op_float_to_int.S
@@ -1,50 +1,36 @@
-%include "mips/funop.S" {"instr":"b f2i_doconv"}
-%break
+    /*
+     * float-to-int
+     *
+     * We have to clip values to int min/max per the specification.  The
+     * expected common case is a "reasonable" value that converts directly
+     * to modest integer.  The EABI convert function isn't doing this for us.
+     */
+    /* unop vA, vB */
+    GET_OPB(a3)                            #  a3 <- B
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
+    GET_VREG_F(fa0, a3)
+    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
 
-/*
- * Not an entry point as it is used only once !!
- */
-f2i_doconv:
+    li        t0, INT_MIN_AS_FLOAT
+    mtc1      t0, fa1
 #ifdef MIPS32REVGE6
-    l.s       fa1, .LFLOAT_TO_INT_max
-    cmp.le.s  ft2, fa1, fa0
-    l.s       fv0, .LFLOAT_TO_INT_ret_max
-    bc1nez    ft2, .L${opcode}_set_vreg_f
-
-    l.s       fa1, .LFLOAT_TO_INT_min
-    cmp.le.s  ft2, fa0, fa1
-    l.s       fv0, .LFLOAT_TO_INT_ret_min
-    bc1nez    ft2, .L${opcode}_set_vreg_f
-
-    mov.s     fa1, fa0
-    cmp.un.s  ft2, fa0, fa1
-    li.s      fv0, 0
-    bc1nez    ft2, .L${opcode}_set_vreg_f
+    /*
+     * TODO: simplify this when the MIPS64R6 emulator
+     * supports NAN2008=1.
+     */
+    cmp.le.s  ft0, fa1, fa0
+    GET_INST_OPCODE(t1)                    #  extract opcode from rINST
+    bc1nez    ft0, 1f                      #  if INT_MIN <= vB, proceed to truncation
+    cmp.eq.s  ft0, fa0, fa0
+    selnez.s  fa0, fa1, ft0                #  fa0 = ordered(vB) ? INT_MIN_AS_FLOAT : 0
 #else
-    l.s       fa1, .LFLOAT_TO_INT_max
     c.ole.s   fcc0, fa1, fa0
-    l.s       fv0, .LFLOAT_TO_INT_ret_max
-    bc1t      .L${opcode}_set_vreg_f
-
-    l.s       fa1, .LFLOAT_TO_INT_min
-    c.ole.s   fcc0, fa0, fa1
-    l.s       fv0, .LFLOAT_TO_INT_ret_min
-    bc1t      .L${opcode}_set_vreg_f
-
-    mov.s     fa1, fa0
-    c.un.s    fcc0, fa0, fa1
-    li.s      fv0, 0
-    bc1t      .L${opcode}_set_vreg_f
+    GET_INST_OPCODE(t1)                    #  extract opcode from rINST
+    bc1t      fcc0, 1f                     #  if INT_MIN <= vB, proceed to truncation
+    c.eq.s    fcc0, fa0, fa0
+    mtc1      zero, fa0
+    movt.s    fa0, fa1, fcc0               #  fa0 = ordered(vB) ? INT_MIN_AS_FLOAT : 0
 #endif
-
-    trunc.w.s  fv0, fa0
-    b         .L${opcode}_set_vreg_f
-
-.LFLOAT_TO_INT_max:
-    .word 0x4f000000
-.LFLOAT_TO_INT_min:
-    .word 0xcf000000
-.LFLOAT_TO_INT_ret_max:
-    .word 0x7fffffff
-.LFLOAT_TO_INT_ret_min:
-    .word 0x80000000
+1:
+    trunc.w.s fa0, fa0
+    SET_VREG_F_GOTO(fa0, rOBJ, t1)         #  vA <- result
diff --git a/runtime/interpreter/mterp/mips/op_float_to_long.S b/runtime/interpreter/mterp/mips/op_float_to_long.S
index a51384f..dc88a78 100644
--- a/runtime/interpreter/mterp/mips/op_float_to_long.S
+++ b/runtime/interpreter/mterp/mips/op_float_to_long.S
@@ -1,51 +1,58 @@
-%include "mips/funopWider.S" {"instr":"b f2l_doconv", "st_result":"SET_VREG64(rRESULT0, rRESULT1, rOBJ)"}
-%break
+    /*
+     * float-to-long
+     *
+     * We have to clip values to long min/max per the specification.  The
+     * expected common case is a "reasonable" value that converts directly
+     * to modest integer.  The EABI convert function isn't doing this for us.
+     */
+    /* unop vA, vB */
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
+    GET_OPB(a3)                            #  a3 <- B
+    GET_VREG_F(fa0, a3)
+    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
 
-f2l_doconv:
 #ifdef MIPS32REVGE6
-    l.s       fa1, .LLONG_TO_max
-    cmp.le.s  ft2, fa1, fa0
-    li        rRESULT0, ~0
-    li        rRESULT1, ~0x80000000
-    bc1nez    ft2, .L${opcode}_set_vreg
-
-    l.s       fa1, .LLONG_TO_min
-    cmp.le.s  ft2, fa0, fa1
-    li        rRESULT0, 0
-    li        rRESULT1, 0x80000000
-    bc1nez    ft2, .L${opcode}_set_vreg
-
-    mov.s     fa1, fa0
-    cmp.un.s  ft2, fa0, fa1
-    li        rRESULT0, 0
-    li        rRESULT1, 0
-    bc1nez    ft2, .L${opcode}_set_vreg
+    /*
+     * TODO: simplify this when the MIPS64R6 emulator
+     * supports NAN2008=1.
+     */
+    li        t0, LONG_MIN_AS_FLOAT
+    mtc1      t0, fa1
+    cmp.le.s  ft0, fa1, fa0
+    GET_INST_OPCODE(t1)                    #  extract opcode from rINST
+    bc1nez    ft0, 1f                      #  if LONG_MIN <= vB, proceed to truncation
+    cmp.eq.s  ft0, fa0, fa0
+    selnez.s  fa0, fa1, ft0                #  fa0 = ordered(vB) ? LONG_MIN_AS_FLOAT : 0
+1:
+    trunc.l.s fa0, fa0
+    SET_VREG64_F_GOTO(fa0, fa0f, rOBJ, t1) #  vA <- result
 #else
-    l.s       fa1, .LLONG_TO_max
-    c.ole.s   fcc0, fa1, fa0
-    li        rRESULT0, ~0
-    li        rRESULT1, ~0x80000000
-    bc1t      .L${opcode}_set_vreg
-
-    l.s       fa1, .LLONG_TO_min
-    c.ole.s   fcc0, fa0, fa1
-    li        rRESULT0, 0
-    li        rRESULT1, 0x80000000
-    bc1t      .L${opcode}_set_vreg
-
-    mov.s     fa1, fa0
-    c.un.s    fcc0, fa0, fa1
+    c.eq.s    fcc0, fa0, fa0
     li        rRESULT0, 0
     li        rRESULT1, 0
-    bc1t      .L${opcode}_set_vreg
-#endif
+    bc1f      fcc0, .L${opcode}_get_opcode
+
+    li        t0, LONG_MIN_AS_FLOAT
+    mtc1      t0, fa1
+    c.ole.s   fcc0, fa0, fa1
+    li        rRESULT1, LONG_MIN_HIGH
+    bc1t      fcc0, .L${opcode}_get_opcode
+
+    neg.s     fa1, fa1
+    c.ole.s   fcc0, fa1, fa0
+    nor       rRESULT0, rRESULT0, zero
+    nor       rRESULT1, rRESULT1, zero
+    bc1t      fcc0, .L${opcode}_get_opcode
 
     JAL(__fixsfdi)
-
+    GET_INST_OPCODE(t1)                    #  extract opcode from rINST
     b         .L${opcode}_set_vreg
+#endif
+%break
 
-.LLONG_TO_max:
-    .word 0x5f000000
-
-.LLONG_TO_min:
-    .word 0xdf000000
+#ifndef MIPS32REVGE6
+.L${opcode}_get_opcode:
+    GET_INST_OPCODE(t1)                    #  extract opcode from rINST
+.L${opcode}_set_vreg:
+    SET_VREG64_GOTO(rRESULT0, rRESULT1, rOBJ, t1)   #  vA/vA+1 <- v0/v1
+#endif
diff --git a/runtime/interpreter/mterp/mips/op_goto_32.S b/runtime/interpreter/mterp/mips/op_goto_32.S
index 67f52e9..ef5bf6b 100644
--- a/runtime/interpreter/mterp/mips/op_goto_32.S
+++ b/runtime/interpreter/mterp/mips/op_goto_32.S
@@ -8,8 +8,7 @@
      * our "backward branch" test must be "<=0" instead of "<0".
      */
     /* goto/32 +AAAAAAAA */
-    FETCH(a0, 1)                           #  a0 <- aaaa (lo)
+    FETCH(rINST, 1)                        #  rINST <- aaaa (lo)
     FETCH(a1, 2)                           #  a1 <- AAAA (hi)
-    sll       a1, a1, 16
-    or        rINST, a0, a1                #  rINST <- AAAAaaaa
+    INSERT_HIGH_HALF(rINST, a1)            #  rINST <- AAAAaaaa
     b         MterpCommonTakenBranchNoFlags
diff --git a/runtime/interpreter/mterp/mips/op_iget.S b/runtime/interpreter/mterp/mips/op_iget.S
index 86d44fa..01f42d9 100644
--- a/runtime/interpreter/mterp/mips/op_iget.S
+++ b/runtime/interpreter/mterp/mips/op_iget.S
@@ -4,6 +4,7 @@
      *
      * for: iget, iget-object, iget-boolean, iget-byte, iget-char, iget-short
      */
+    /* op vA, vB, field@CCCC */
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field ref CCCC
     GET_OPB(a1)                            # a1 <- B
@@ -15,11 +16,10 @@
     GET_OPA4(a2)                           # a2<- A+
     PREFETCH_INST(2)                       # load rINST
     bnez  a3, MterpPossibleException        # bail out
-    .if $is_object
-    SET_VREG_OBJECT(v0, a2)                # fp[A] <- v0
-    .else
-    SET_VREG(v0, a2)                       # fp[A] <- v0
-    .endif
     ADVANCE(2)                             #  advance rPC
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    .if $is_object
+    SET_VREG_OBJECT_GOTO(v0, a2, t0)       # fp[A] <- v0
+    .else
+    SET_VREG_GOTO(v0, a2, t0)              # fp[A] <- v0
+    .endif
diff --git a/runtime/interpreter/mterp/mips/op_iget_object_quick.S b/runtime/interpreter/mterp/mips/op_iget_object_quick.S
index 31d94b9..95c34d7 100644
--- a/runtime/interpreter/mterp/mips/op_iget_object_quick.S
+++ b/runtime/interpreter/mterp/mips/op_iget_object_quick.S
@@ -9,7 +9,6 @@
     GET_OPA4(a2)                           #  a2<- A+
     PREFETCH_INST(2)                       #  load rINST
     bnez a3, MterpPossibleException        #  bail out
-    SET_VREG_OBJECT(v0, a2)                #  fp[A] <- v0
     ADVANCE(2)                             #  advance rPC
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG_OBJECT_GOTO(v0, a2, t0)       #  fp[A] <- v0
diff --git a/runtime/interpreter/mterp/mips/op_iget_quick.S b/runtime/interpreter/mterp/mips/op_iget_quick.S
index fbafa5b..46277d3 100644
--- a/runtime/interpreter/mterp/mips/op_iget_quick.S
+++ b/runtime/interpreter/mterp/mips/op_iget_quick.S
@@ -1,6 +1,6 @@
 %default { "load":"lw" }
     /* For: iget-quick, iget-boolean-quick, iget-byte-quick, iget-char-quick, iget-short-quick */
-    # op vA, vB, offset                    /* CCCC */
+    /* op vA, vB, offset@CCCC */
     GET_OPB(a2)                            #  a2 <- B
     GET_VREG(a3, a2)                       #  a3 <- object we're operating on
     FETCH(a1, 1)                           #  a1 <- field byte offset
diff --git a/runtime/interpreter/mterp/mips/op_iget_wide.S b/runtime/interpreter/mterp/mips/op_iget_wide.S
index 8fe3089..cf5019e 100644
--- a/runtime/interpreter/mterp/mips/op_iget_wide.S
+++ b/runtime/interpreter/mterp/mips/op_iget_wide.S
@@ -3,6 +3,7 @@
      *
      * for: iget-wide
      */
+    /* op vA, vB, field@CCCC */
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field byte offset
     GET_OPB(a1)                            # a1 <- B
@@ -14,7 +15,6 @@
     GET_OPA4(a2)                           # a2<- A+
     PREFETCH_INST(2)                       # load rINST
     bnez a3, MterpException                # bail out
-    SET_VREG64(v0, v1, a2)                 # fp[A] <- v0/v1
     ADVANCE(2)                             # advance rPC
     GET_INST_OPCODE(t0)                    # extract opcode from rINST
-    GOTO_OPCODE(t0)                        # jump to next instruction
+    SET_VREG64_GOTO(v0, v1, a2, t0)        # fp[A] <- v0/v1
diff --git a/runtime/interpreter/mterp/mips/op_iget_wide_quick.S b/runtime/interpreter/mterp/mips/op_iget_wide_quick.S
index 4d2f291..128be57 100644
--- a/runtime/interpreter/mterp/mips/op_iget_wide_quick.S
+++ b/runtime/interpreter/mterp/mips/op_iget_wide_quick.S
@@ -1,4 +1,4 @@
-    # iget-wide-quick vA, vB, offset       /* CCCC */
+    /* iget-wide-quick vA, vB, offset@CCCC */
     GET_OPB(a2)                            #  a2 <- B
     GET_VREG(a3, a2)                       #  a3 <- object we're operating on
     FETCH(a1, 1)                           #  a1 <- field byte offset
@@ -9,5 +9,4 @@
     LOAD64(a0, a1, t0)                     #  a0 <- obj.field (64 bits, aligned)
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG64(a0, a1, a2)                 #  fp[A] <- a0/a1
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG64_GOTO(a0, a1, a2, t0)        #  fp[A] <- a0/a1
diff --git a/runtime/interpreter/mterp/mips/op_instance_of.S b/runtime/interpreter/mterp/mips/op_instance_of.S
index d2679bd..706dcf3 100644
--- a/runtime/interpreter/mterp/mips/op_instance_of.S
+++ b/runtime/interpreter/mterp/mips/op_instance_of.S
@@ -4,7 +4,7 @@
      * Most common situation is a non-null object, being compared against
      * an already-resolved class.
      */
-    # instance-of vA, vB, class            /* CCCC */
+    /* instance-of vA, vB, class@CCCC */
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- CCCC
     GET_OPB(a1)                            # a1 <- B
diff --git a/runtime/interpreter/mterp/mips/op_int_to_byte.S b/runtime/interpreter/mterp/mips/op_int_to_byte.S
index 77314c62..9266aab 100644
--- a/runtime/interpreter/mterp/mips/op_int_to_byte.S
+++ b/runtime/interpreter/mterp/mips/op_int_to_byte.S
@@ -1 +1 @@
-%include "mips/unop.S" {"preinstr":"sll a0, a0, 24", "instr":"sra a0, a0, 24"}
+%include "mips/unop.S" {"instr":"SEB(a0, a0)"}
diff --git a/runtime/interpreter/mterp/mips/op_int_to_short.S b/runtime/interpreter/mterp/mips/op_int_to_short.S
index 5649c2a..8749cd8 100644
--- a/runtime/interpreter/mterp/mips/op_int_to_short.S
+++ b/runtime/interpreter/mterp/mips/op_int_to_short.S
@@ -1 +1 @@
-%include "mips/unop.S" {"preinstr":"sll a0, 16", "instr":"sra a0, 16"}
+%include "mips/unop.S" {"instr":"SEH(a0, a0)"}
diff --git a/runtime/interpreter/mterp/mips/op_iput.S b/runtime/interpreter/mterp/mips/op_iput.S
index 732a9a4..9133d60 100644
--- a/runtime/interpreter/mterp/mips/op_iput.S
+++ b/runtime/interpreter/mterp/mips/op_iput.S
@@ -4,7 +4,7 @@
      *
      * for: iput, iput-boolean, iput-byte, iput-char, iput-short
      */
-    # op vA, vB, field                     /* CCCC */
+    /* op vA, vB, field@CCCC */
     .extern $handler
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field ref CCCC
diff --git a/runtime/interpreter/mterp/mips/op_iput_object.S b/runtime/interpreter/mterp/mips/op_iput_object.S
index 6b856e7..cfa56ec 100644
--- a/runtime/interpreter/mterp/mips/op_iput_object.S
+++ b/runtime/interpreter/mterp/mips/op_iput_object.S
@@ -3,7 +3,7 @@
      *
      * for: iput-object, iput-object-volatile
      */
-    # op vA, vB, field                     /* CCCC */
+    /* op vA, vB, field@CCCC */
     EXPORT_PC()
     addu   a0, rFP, OFF_FP_SHADOWFRAME
     move   a1, rPC
diff --git a/runtime/interpreter/mterp/mips/op_iput_object_quick.S b/runtime/interpreter/mterp/mips/op_iput_object_quick.S
index c3f1526..82044f5 100644
--- a/runtime/interpreter/mterp/mips/op_iput_object_quick.S
+++ b/runtime/interpreter/mterp/mips/op_iput_object_quick.S
@@ -1,5 +1,5 @@
     /* For: iput-object-quick */
-    # op vA, vB, offset                 /* CCCC */
+    /* op vA, vB, offset@CCCC */
     EXPORT_PC()
     addu   a0, rFP, OFF_FP_SHADOWFRAME
     move   a1, rPC
diff --git a/runtime/interpreter/mterp/mips/op_iput_quick.S b/runtime/interpreter/mterp/mips/op_iput_quick.S
index 0829666..d9753b1 100644
--- a/runtime/interpreter/mterp/mips/op_iput_quick.S
+++ b/runtime/interpreter/mterp/mips/op_iput_quick.S
@@ -1,6 +1,6 @@
 %default { "store":"sw" }
     /* For: iput-quick, iput-object-quick */
-    # op vA, vB, offset                    /* CCCC */
+    /* op vA, vB, offset@CCCC */
     GET_OPB(a2)                            #  a2 <- B
     GET_VREG(a3, a2)                       #  a3 <- fp[B], the object pointer
     FETCH(a1, 1)                           #  a1 <- field byte offset
@@ -9,6 +9,7 @@
     GET_VREG(a0, a2)                       #  a0 <- fp[A]
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     addu      t0, a3, a1
+    GET_INST_OPCODE(t1)                    #  extract opcode from rINST
+    GET_OPCODE_TARGET(t1)
     $store    a0, 0(t0)                    #  obj.field (8/16/32 bits) <- a0
-    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    JR(t1)                                 #  jump to next instruction
diff --git a/runtime/interpreter/mterp/mips/op_iput_wide.S b/runtime/interpreter/mterp/mips/op_iput_wide.S
index 6d23f8c..bc3d758 100644
--- a/runtime/interpreter/mterp/mips/op_iput_wide.S
+++ b/runtime/interpreter/mterp/mips/op_iput_wide.S
@@ -1,4 +1,4 @@
-    # iput-wide vA, vB, field              /* CCCC */
+    /* iput-wide vA, vB, field@CCCC */
     .extern artSet64InstanceFromMterp
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field ref CCCC
diff --git a/runtime/interpreter/mterp/mips/op_iput_wide_quick.S b/runtime/interpreter/mterp/mips/op_iput_wide_quick.S
index 9fdb847..0eb228d 100644
--- a/runtime/interpreter/mterp/mips/op_iput_wide_quick.S
+++ b/runtime/interpreter/mterp/mips/op_iput_wide_quick.S
@@ -1,4 +1,4 @@
-    # iput-wide-quick vA, vB, offset       /* CCCC */
+    /* iput-wide-quick vA, vB, offset@CCCC */
     GET_OPA4(a0)                           #  a0 <- A(+)
     GET_OPB(a1)                            #  a1 <- B
     GET_VREG(a2, a1)                       #  a2 <- fp[B], the object pointer
@@ -9,6 +9,7 @@
     FETCH(a3, 1)                           #  a3 <- field byte offset
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     addu      a2, a2, a3                   #  obj.field (64 bits, aligned) <- a0/a1
-    STORE64(a0, a1, a2)                    #  obj.field (64 bits, aligned) <- a0/a1
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    GET_OPCODE_TARGET(t0)
+    STORE64(a0, a1, a2)                    #  obj.field (64 bits, aligned) <- a0/a1
+    JR(t0)                                 #  jump to next instruction
diff --git a/runtime/interpreter/mterp/mips/op_long_to_double.S b/runtime/interpreter/mterp/mips/op_long_to_double.S
index b83aaf4..153f582 100644
--- a/runtime/interpreter/mterp/mips/op_long_to_double.S
+++ b/runtime/interpreter/mterp/mips/op_long_to_double.S
@@ -1 +1,20 @@
-%include "mips/funopWide.S" {"instr":"JAL(__floatdidf)", "ld_arg":"LOAD64(rARG0, rARG1, a3)"}
+    /*
+     * long-to-double
+     */
+    /* unop vA, vB */
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
+    GET_OPB(a3)                            #  a3 <- B
+    EAS2(a3, rFP, a3)                      #  a3 <- &fp[B]
+
+#ifdef MIPS32REVGE6
+    LOAD64_F(fv0, fv0f, a3)
+    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
+    cvt.d.l   fv0, fv0
+#else
+    LOAD64(rARG0, rARG1, a3)
+    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
+    JAL(__floatdidf)                       #  a0/a1 <- op, a2-a3 changed
+#endif
+
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG64_F_GOTO(fv0, fv0f, rOBJ, t0) #  vA/vA+1 <- result
diff --git a/runtime/interpreter/mterp/mips/op_long_to_float.S b/runtime/interpreter/mterp/mips/op_long_to_float.S
index 27faba5..dd1ab81 100644
--- a/runtime/interpreter/mterp/mips/op_long_to_float.S
+++ b/runtime/interpreter/mterp/mips/op_long_to_float.S
@@ -1 +1,20 @@
-%include "mips/unopNarrower.S" {"instr":"JAL(__floatdisf)", "load":"LOAD64(rARG0, rARG1, a3)"}
+    /*
+     * long-to-float
+     */
+    /* unop vA, vB */
+    GET_OPB(a3)                            #  a3 <- B
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
+    EAS2(a3, rFP, a3)                      #  a3 <- &fp[B]
+
+#ifdef MIPS32REVGE6
+    LOAD64_F(fv0, fv0f, a3)
+    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
+    cvt.s.l   fv0, fv0
+#else
+    LOAD64(rARG0, rARG1, a3)
+    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
+    JAL(__floatdisf)
+#endif
+
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG_F_GOTO(fv0, rOBJ, t0)         #  vA <- fv0
diff --git a/runtime/interpreter/mterp/mips/op_move.S b/runtime/interpreter/mterp/mips/op_move.S
index 76588ba..547ea3a 100644
--- a/runtime/interpreter/mterp/mips/op_move.S
+++ b/runtime/interpreter/mterp/mips/op_move.S
@@ -7,8 +7,7 @@
     GET_VREG(a2, a1)                       #  a2 <- fp[B]
     GET_INST_OPCODE(t0)                    #  t0 <- opcode from rINST
     .if $is_object
-    SET_VREG_OBJECT(a2, a0)                #  fp[A] <- a2
+    SET_VREG_OBJECT_GOTO(a2, a0, t0)       #  fp[A] <- a2
     .else
-    SET_VREG(a2, a0)                       #  fp[A] <- a2
+    SET_VREG_GOTO(a2, a0, t0)              #  fp[A] <- a2
     .endif
-    GOTO_OPCODE(t0)                        #  jump to next instruction
diff --git a/runtime/interpreter/mterp/mips/op_move_16.S b/runtime/interpreter/mterp/mips/op_move_16.S
index f7de6c2..91b7399 100644
--- a/runtime/interpreter/mterp/mips/op_move_16.S
+++ b/runtime/interpreter/mterp/mips/op_move_16.S
@@ -7,8 +7,7 @@
     GET_VREG(a2, a1)                       #  a2 <- fp[BBBB]
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     .if $is_object
-    SET_VREG_OBJECT(a2, a0)                #  fp[AAAA] <- a2
+    SET_VREG_OBJECT_GOTO(a2, a0, t0)       #  fp[AAAA] <- a2
     .else
-    SET_VREG(a2, a0)                       #  fp[AAAA] <- a2
+    SET_VREG_GOTO(a2, a0, t0)              #  fp[AAAA] <- a2
     .endif
-    GOTO_OPCODE(t0)                        #  jump to next instruction
diff --git a/runtime/interpreter/mterp/mips/op_move_exception.S b/runtime/interpreter/mterp/mips/op_move_exception.S
index f04a035..f1bece7 100644
--- a/runtime/interpreter/mterp/mips/op_move_exception.S
+++ b/runtime/interpreter/mterp/mips/op_move_exception.S
@@ -2,7 +2,8 @@
     GET_OPA(a2)                                 #  a2 <- AA
     lw    a3, THREAD_EXCEPTION_OFFSET(rSELF)    #  get exception obj
     FETCH_ADVANCE_INST(1)                       #  advance rPC, load rINST
-    SET_VREG_OBJECT(a3, a2)                     #  fp[AA] <- exception obj
     GET_INST_OPCODE(t0)                         #  extract opcode from rINST
+    GET_OPCODE_TARGET(t0)
+    SET_VREG_OBJECT(a3, a2)                     #  fp[AA] <- exception obj
     sw    zero, THREAD_EXCEPTION_OFFSET(rSELF)  #  clear exception
-    GOTO_OPCODE(t0)                             #  jump to next instruction
+    JR(t0)                                      #  jump to next instruction
diff --git a/runtime/interpreter/mterp/mips/op_move_from16.S b/runtime/interpreter/mterp/mips/op_move_from16.S
index b8be741..90c25c9 100644
--- a/runtime/interpreter/mterp/mips/op_move_from16.S
+++ b/runtime/interpreter/mterp/mips/op_move_from16.S
@@ -7,8 +7,7 @@
     GET_VREG(a2, a1)                       #  a2 <- fp[BBBB]
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     .if $is_object
-    SET_VREG_OBJECT(a2, a0)                #  fp[AA] <- a2
+    SET_VREG_OBJECT_GOTO(a2, a0, t0)       #  fp[AA] <- a2
     .else
-    SET_VREG(a2, a0)                       #  fp[AA] <- a2
+    SET_VREG_GOTO(a2, a0, t0)              #  fp[AA] <- a2
     .endif
-    GOTO_OPCODE(t0)                        #  jump to next instruction
diff --git a/runtime/interpreter/mterp/mips/op_move_result.S b/runtime/interpreter/mterp/mips/op_move_result.S
index 315c68e..a4d5bfe 100644
--- a/runtime/interpreter/mterp/mips/op_move_result.S
+++ b/runtime/interpreter/mterp/mips/op_move_result.S
@@ -7,8 +7,7 @@
     lw    a0, 0(a0)                        #  a0 <- result.i
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     .if $is_object
-    SET_VREG_OBJECT(a0, a2)                #  fp[AA] <- a0
+    SET_VREG_OBJECT_GOTO(a0, a2, t0)       #  fp[AA] <- a0
     .else
-    SET_VREG(a0, a2)                       #  fp[AA] <- a0
+    SET_VREG_GOTO(a0, a2, t0)              #  fp[AA] <- a0
     .endif
-    GOTO_OPCODE(t0)                        #  jump to next instruction
diff --git a/runtime/interpreter/mterp/mips/op_move_result_wide.S b/runtime/interpreter/mterp/mips/op_move_result_wide.S
index 940c1ff..1259218 100644
--- a/runtime/interpreter/mterp/mips/op_move_result_wide.S
+++ b/runtime/interpreter/mterp/mips/op_move_result_wide.S
@@ -3,6 +3,5 @@
     lw    a3, OFF_FP_RESULT_REGISTER(rFP)  #  get pointer to result JType
     LOAD64(a0, a1, a3)                     #  a0/a1 <- retval.j
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
-    SET_VREG64(a0, a1, a2)                 #  fp[AA] <- a0/a1
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG64_GOTO(a0, a1, a2, t0)        #  fp[AA] <- a0/a1
diff --git a/runtime/interpreter/mterp/mips/op_move_wide.S b/runtime/interpreter/mterp/mips/op_move_wide.S
index dd224c3..01d0949 100644
--- a/runtime/interpreter/mterp/mips/op_move_wide.S
+++ b/runtime/interpreter/mterp/mips/op_move_wide.S
@@ -5,6 +5,5 @@
     EAS2(a3, rFP, a3)                      #  a3 <- &fp[B]
     LOAD64(a0, a1, a3)                     #  a0/a1 <- fp[B]
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
-    SET_VREG64(a0, a1, a2)                 #  fp[A] <- a0/a1
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG64_GOTO(a0, a1, a2, t0)        #  fp[A] <- a0/a1
diff --git a/runtime/interpreter/mterp/mips/op_move_wide_16.S b/runtime/interpreter/mterp/mips/op_move_wide_16.S
index d8761eb..587ba04 100644
--- a/runtime/interpreter/mterp/mips/op_move_wide_16.S
+++ b/runtime/interpreter/mterp/mips/op_move_wide_16.S
@@ -5,6 +5,5 @@
     EAS2(a3, rFP, a3)                      #  a3 <- &fp[BBBB]
     LOAD64(a0, a1, a3)                     #  a0/a1 <- fp[BBBB]
     FETCH_ADVANCE_INST(3)                  #  advance rPC, load rINST
-    SET_VREG64(a0, a1, a2)                 #  fp[AAAA] <- a0/a1
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG64_GOTO(a0, a1, a2, t0)        #  fp[AAAA] <- a0/a1
diff --git a/runtime/interpreter/mterp/mips/op_move_wide_from16.S b/runtime/interpreter/mterp/mips/op_move_wide_from16.S
index 2103fa1..5003fbd 100644
--- a/runtime/interpreter/mterp/mips/op_move_wide_from16.S
+++ b/runtime/interpreter/mterp/mips/op_move_wide_from16.S
@@ -5,6 +5,5 @@
     EAS2(a3, rFP, a3)                      #  a3 <- &fp[BBBB]
     LOAD64(a0, a1, a3)                     #  a0/a1 <- fp[BBBB]
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
-    SET_VREG64(a0, a1, a2)                 #  fp[AA] <- a0/a1
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG64_GOTO(a0, a1, a2, t0)        #  fp[AA] <- a0/a1
diff --git a/runtime/interpreter/mterp/mips/op_mul_long.S b/runtime/interpreter/mterp/mips/op_mul_long.S
index 803bbec..74b049a 100644
--- a/runtime/interpreter/mterp/mips/op_mul_long.S
+++ b/runtime/interpreter/mterp/mips/op_mul_long.S
@@ -39,5 +39,4 @@
 
 .L${opcode}_finish:
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG64(v0, v1, a0)                 #  vAA::vAA+1 <- v0(low) :: v1(high)
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG64_GOTO(v0, v1, a0, t0)        #  vAA/vAA+1 <- v0(low)/v1(high)
diff --git a/runtime/interpreter/mterp/mips/op_mul_long_2addr.S b/runtime/interpreter/mterp/mips/op_mul_long_2addr.S
index 6950b71..683b055 100644
--- a/runtime/interpreter/mterp/mips/op_mul_long_2addr.S
+++ b/runtime/interpreter/mterp/mips/op_mul_long_2addr.S
@@ -26,6 +26,4 @@
 
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
     GET_INST_OPCODE(t1)                    #  extract opcode from rINST
-    # vAA <- v0 (low)
-    SET_VREG64(v0, v1, rOBJ)               #  vAA+1 <- v1 (high)
-    GOTO_OPCODE(t1)                        #  jump to next instruction
+    SET_VREG64_GOTO(v0, v1, rOBJ, t1)      #  vA/vA+1 <- v0(low)/v1(high)
diff --git a/runtime/interpreter/mterp/mips/op_new_instance.S b/runtime/interpreter/mterp/mips/op_new_instance.S
index 51a09b2..3c9e83f 100644
--- a/runtime/interpreter/mterp/mips/op_new_instance.S
+++ b/runtime/interpreter/mterp/mips/op_new_instance.S
@@ -1,7 +1,7 @@
     /*
      * Create a new instance of a class.
      */
-    # new-instance vAA, class              /* BBBB */
+    /* new-instance vAA, class@BBBB */
     EXPORT_PC()
     addu   a0, rFP, OFF_FP_SHADOWFRAME
     move   a1, rSELF
diff --git a/runtime/interpreter/mterp/mips/op_packed_switch.S b/runtime/interpreter/mterp/mips/op_packed_switch.S
index ffa4f47..0a1ff98 100644
--- a/runtime/interpreter/mterp/mips/op_packed_switch.S
+++ b/runtime/interpreter/mterp/mips/op_packed_switch.S
@@ -12,8 +12,7 @@
     FETCH(a0, 1)                           #  a0 <- bbbb (lo)
     FETCH(a1, 2)                           #  a1 <- BBBB (hi)
     GET_OPA(a3)                            #  a3 <- AA
-    sll       t0, a1, 16
-    or        a0, a0, t0                   #  a0 <- BBBBbbbb
+    INSERT_HIGH_HALF(a0, a1)               #  a0 <- BBBBbbbb
     GET_VREG(a1, a3)                       #  a1 <- vAA
     EAS1(a0, rPC, a0)                      #  a0 <- PC + BBBBbbbb*2
     JAL($func)                             #  a0 <- code-unit branch offset
diff --git a/runtime/interpreter/mterp/mips/op_return.S b/runtime/interpreter/mterp/mips/op_return.S
index 894ae18..44b9395 100644
--- a/runtime/interpreter/mterp/mips/op_return.S
+++ b/runtime/interpreter/mterp/mips/op_return.S
@@ -8,7 +8,7 @@
     JAL(MterpThreadFenceForConstructor)
     lw        ra, THREAD_FLAGS_OFFSET(rSELF)
     move      a0, rSELF
-    and       ra, (THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    and       ra, THREAD_SUSPEND_OR_CHECKPOINT_REQUEST
     beqz      ra, 1f
     JAL(MterpSuspendCheck)                 # (self)
 1:
diff --git a/runtime/interpreter/mterp/mips/op_return_void.S b/runtime/interpreter/mterp/mips/op_return_void.S
index 35c1326..1f616ea 100644
--- a/runtime/interpreter/mterp/mips/op_return_void.S
+++ b/runtime/interpreter/mterp/mips/op_return_void.S
@@ -2,7 +2,7 @@
     JAL(MterpThreadFenceForConstructor)
     lw        ra, THREAD_FLAGS_OFFSET(rSELF)
     move      a0, rSELF
-    and       ra, (THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    and       ra, THREAD_SUSPEND_OR_CHECKPOINT_REQUEST
     beqz      ra, 1f
     JAL(MterpSuspendCheck)                 # (self)
 1:
diff --git a/runtime/interpreter/mterp/mips/op_return_void_no_barrier.S b/runtime/interpreter/mterp/mips/op_return_void_no_barrier.S
index 56968b5..e670c28 100644
--- a/runtime/interpreter/mterp/mips/op_return_void_no_barrier.S
+++ b/runtime/interpreter/mterp/mips/op_return_void_no_barrier.S
@@ -1,6 +1,6 @@
     lw     ra, THREAD_FLAGS_OFFSET(rSELF)
     move   a0, rSELF
-    and    ra, (THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    and    ra, THREAD_SUSPEND_OR_CHECKPOINT_REQUEST
     beqz   ra, 1f
     JAL(MterpSuspendCheck)                 # (self)
 1:
diff --git a/runtime/interpreter/mterp/mips/op_return_wide.S b/runtime/interpreter/mterp/mips/op_return_wide.S
index 91d62bf..f0f679d 100644
--- a/runtime/interpreter/mterp/mips/op_return_wide.S
+++ b/runtime/interpreter/mterp/mips/op_return_wide.S
@@ -6,7 +6,7 @@
     JAL(MterpThreadFenceForConstructor)
     lw        ra, THREAD_FLAGS_OFFSET(rSELF)
     move      a0, rSELF
-    and       ra, (THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    and       ra, THREAD_SUSPEND_OR_CHECKPOINT_REQUEST
     beqz      ra, 1f
     JAL(MterpSuspendCheck)                 # (self)
 1:
diff --git a/runtime/interpreter/mterp/mips/op_sget.S b/runtime/interpreter/mterp/mips/op_sget.S
index 3efcfbb..64ece1e 100644
--- a/runtime/interpreter/mterp/mips/op_sget.S
+++ b/runtime/interpreter/mterp/mips/op_sget.S
@@ -4,7 +4,7 @@
      *
      * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
      */
-    # op vAA, field                        /* BBBB */
+    /* op vAA, field@BBBB */
     .extern $helper
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field ref BBBB
@@ -15,11 +15,10 @@
     GET_OPA(a2)                            # a2 <- AA
     PREFETCH_INST(2)
     bnez  a3, MterpException               # bail out
-.if $is_object
-    SET_VREG_OBJECT(v0, a2)                # fp[AA] <- v0
-.else
-    SET_VREG(v0, a2)                       # fp[AA] <- v0
-.endif
     ADVANCE(2)
     GET_INST_OPCODE(t0)                    # extract opcode from rINST
-    GOTO_OPCODE(t0)                        # jump to next instruction
+.if $is_object
+    SET_VREG_OBJECT_GOTO(v0, a2, t0)       # fp[AA] <- v0
+.else
+    SET_VREG_GOTO(v0, a2, t0)              # fp[AA] <- v0
+.endif
diff --git a/runtime/interpreter/mterp/mips/op_sget_wide.S b/runtime/interpreter/mterp/mips/op_sget_wide.S
index 7aee386..c729250 100644
--- a/runtime/interpreter/mterp/mips/op_sget_wide.S
+++ b/runtime/interpreter/mterp/mips/op_sget_wide.S
@@ -1,7 +1,7 @@
     /*
      * 64-bit SGET handler.
      */
-    # sget-wide vAA, field                 /* BBBB */
+    /* sget-wide vAA, field@BBBB */
     .extern artGet64StaticFromCode
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field ref BBBB
@@ -12,6 +12,5 @@
     bnez  a3, MterpException
     GET_OPA(a1)                            # a1 <- AA
     FETCH_ADVANCE_INST(2)                  # advance rPC, load rINST
-    SET_VREG64(v0, v1, a1)                 # vAA/vAA+1 <- v0/v1
     GET_INST_OPCODE(t0)                    # extract opcode from rINST
-    GOTO_OPCODE(t0)                        # jump to next instruction
+    SET_VREG64_GOTO(v0, v1, a1, t0)        # vAA/vAA+1 <- v0/v1
diff --git a/runtime/interpreter/mterp/mips/op_shl_long.S b/runtime/interpreter/mterp/mips/op_shl_long.S
index 0121669..cc08112 100644
--- a/runtime/interpreter/mterp/mips/op_shl_long.S
+++ b/runtime/interpreter/mterp/mips/op_shl_long.S
@@ -24,7 +24,7 @@
     srl     a0, v1                         #  alo<- alo >> (32-(shift&31))
     sll     v1, a1, a2                     #  rhi<- ahi << (shift&31)
     or      v1, a0                         #  rhi<- rhi | alo
-    SET_VREG64_GOTO(v0, v1, t2, t0)        #  vAA/vAA+1 <- a0/a1
+    SET_VREG64_GOTO(v0, v1, t2, t0)        #  vAA/vAA+1 <- v0/v1
 %break
 
 .L${opcode}_finish:
diff --git a/runtime/interpreter/mterp/mips/op_shl_long_2addr.S b/runtime/interpreter/mterp/mips/op_shl_long_2addr.S
index 8ce6058..93c5783 100644
--- a/runtime/interpreter/mterp/mips/op_shl_long_2addr.S
+++ b/runtime/interpreter/mterp/mips/op_shl_long_2addr.S
@@ -7,7 +7,7 @@
     GET_OPB(a3)                            #  a3 <- B
     GET_VREG(a2, a3)                       #  a2 <- vB
     EAS2(t2, rFP, rOBJ)                    #  t2 <- &fp[A]
-    LOAD64(a0, a1, t2)                     #  a0/a1 <- vAA/vAA+1
+    LOAD64(a0, a1, t2)                     #  a0/a1 <- vA/vA+1
 
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
@@ -20,8 +20,8 @@
     srl     a0, v1                         #  alo<- alo >> (32-(shift&31))
     sll     v1, a1, a2                     #  rhi<- ahi << (shift&31)
     or      v1, a0                         #  rhi<- rhi | alo
-    SET_VREG64_GOTO(v0, v1, rOBJ, t0)      #  vAA/vAA+1 <- a0/a1
+    SET_VREG64_GOTO(v0, v1, rOBJ, t0)      #  vA/vA+1 <- v0/v1
 %break
 
 .L${opcode}_finish:
-    SET_VREG64_GOTO(zero, v0, rOBJ, t0)    #  vAA/vAA+1 <- rlo/rhi
+    SET_VREG64_GOTO(zero, v0, rOBJ, t0)    #  vA/vA+1 <- rlo/rhi
diff --git a/runtime/interpreter/mterp/mips/op_shr_long.S b/runtime/interpreter/mterp/mips/op_shr_long.S
index 4c42758..ea032fe 100644
--- a/runtime/interpreter/mterp/mips/op_shr_long.S
+++ b/runtime/interpreter/mterp/mips/op_shr_long.S
@@ -23,7 +23,7 @@
     sll     a1, 1
     sll     a1, a0                         #  ahi<- ahi << (32-(shift&31))
     or      v0, a1                         #  rlo<- rlo | ahi
-    SET_VREG64_GOTO(v0, v1, t3, t0)        #  vAA/VAA+1 <- v0/v0
+    SET_VREG64_GOTO(v0, v1, t3, t0)        #  vAA/VAA+1 <- v0/v1
 %break
 
 .L${opcode}_finish:
diff --git a/runtime/interpreter/mterp/mips/op_shr_long_2addr.S b/runtime/interpreter/mterp/mips/op_shr_long_2addr.S
index 3adc085..c805ea4 100644
--- a/runtime/interpreter/mterp/mips/op_shr_long_2addr.S
+++ b/runtime/interpreter/mterp/mips/op_shr_long_2addr.S
@@ -7,7 +7,7 @@
     GET_OPB(a3)                            #  a3 <- B
     GET_VREG(a2, a3)                       #  a2 <- vB
     EAS2(t0, rFP, t2)                      #  t0 <- &fp[A]
-    LOAD64(a0, a1, t0)                     #  a0/a1 <- vAA/vAA+1
+    LOAD64(a0, a1, t0)                     #  a0/a1 <- vA/vA+1
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
 
@@ -19,9 +19,9 @@
     sll     a1, 1
     sll     a1, a0                         #  ahi<- ahi << (32-(shift&31))
     or      v0, a1                         #  rlo<- rlo | ahi
-    SET_VREG64_GOTO(v0, v1, t2, t0)        #  vAA/vAA+1 <- a0/a1
+    SET_VREG64_GOTO(v0, v1, t2, t0)        #  vA/vA+1 <- v0/v1
 %break
 
 .L${opcode}_finish:
     sra     a3, a1, 31                     #  a3<- sign(ah)
-    SET_VREG64_GOTO(v1, a3, t2, t0)        #  vAA/vAA+1 <- rlo/rhi
+    SET_VREG64_GOTO(v1, a3, t2, t0)        #  vA/vA+1 <- rlo/rhi
diff --git a/runtime/interpreter/mterp/mips/op_sput.S b/runtime/interpreter/mterp/mips/op_sput.S
index ee313b9..7034a0e 100644
--- a/runtime/interpreter/mterp/mips/op_sput.S
+++ b/runtime/interpreter/mterp/mips/op_sput.S
@@ -4,7 +4,7 @@
      *
      * for: sput, sput-boolean, sput-byte, sput-char, sput-short
      */
-    # op vAA, field                        /* BBBB */
+    /* op vAA, field@BBBB */
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field ref BBBB
     GET_OPA(a3)                            # a3 <- AA
diff --git a/runtime/interpreter/mterp/mips/op_sput_wide.S b/runtime/interpreter/mterp/mips/op_sput_wide.S
index 1e11466..3b347fc 100644
--- a/runtime/interpreter/mterp/mips/op_sput_wide.S
+++ b/runtime/interpreter/mterp/mips/op_sput_wide.S
@@ -1,7 +1,7 @@
     /*
      * 64-bit SPUT handler.
      */
-    # sput-wide vAA, field                 /* BBBB */
+    /* sput-wide vAA, field@BBBB */
     .extern artSet64IndirectStaticFromMterp
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field ref CCCC
diff --git a/runtime/interpreter/mterp/mips/op_ushr_long_2addr.S b/runtime/interpreter/mterp/mips/op_ushr_long_2addr.S
index ccf1f7e..9e93f34 100644
--- a/runtime/interpreter/mterp/mips/op_ushr_long_2addr.S
+++ b/runtime/interpreter/mterp/mips/op_ushr_long_2addr.S
@@ -7,7 +7,7 @@
     GET_OPB(a3)                            #  a3 <- B
     GET_VREG(a2, a3)                       #  a2 <- vB
     EAS2(t0, rFP, t3)                      #  t0 <- &fp[A]
-    LOAD64(a0, a1, t0)                     #  a0/a1 <- vAA/vAA+1
+    LOAD64(a0, a1, t0)                     #  a0/a1 <- vA/vA+1
 
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
@@ -20,8 +20,8 @@
     sll       a1, 1
     sll       a1, a0                       #  ahi<- ahi << (32-(shift&31))
     or        v0, a1                       #  rlo<- rlo | ahi
-    SET_VREG64_GOTO(v0, v1, t3, t0)        #  vAA/vAA+1 <- a0/a1
+    SET_VREG64_GOTO(v0, v1, t3, t0)        #  vA/vA+1 <- v0/v1
 %break
 
 .L${opcode}_finish:
-    SET_VREG64_GOTO(v1, zero, t3, t0)      #  vAA/vAA+1 <- rlo/rhi
+    SET_VREG64_GOTO(v1, zero, t3, t0)      #  vA/vA+1 <- rlo/rhi
diff --git a/runtime/interpreter/mterp/mips/unop.S b/runtime/interpreter/mterp/mips/unop.S
index 52a8f0a..bc99263 100644
--- a/runtime/interpreter/mterp/mips/unop.S
+++ b/runtime/interpreter/mterp/mips/unop.S
@@ -1,11 +1,11 @@
 %default {"preinstr":"", "result0":"a0"}
     /*
      * Generic 32-bit unary operation.  Provide an "instr" line that
-     * specifies an instruction that performs "result = op a0".
+     * specifies an instruction that performs "result0 = op a0".
      * This could be a MIPS instruction or a function call.
      *
-     * for: neg-int, not-int, neg-float, int-to-float, float-to-int,
-     *      int-to-byte, int-to-char, int-to-short
+     * for: int-to-byte, int-to-char, int-to-short,
+     *      neg-int, not-int, neg-float
      */
     /* unop vA, vB */
     GET_OPB(a3)                            #  a3 <- B
@@ -15,5 +15,4 @@
     $preinstr                              #  optional op
     $instr                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t1)                    #  extract opcode from rINST
-    SET_VREG_GOTO($result0, t0, t1)        #  vAA <- result0
-    /* 9-10 instructions */
+    SET_VREG_GOTO($result0, t0, t1)        #  vA <- result0
diff --git a/runtime/interpreter/mterp/mips/unopNarrower.S b/runtime/interpreter/mterp/mips/unopNarrower.S
index 9c38bad..0196e27 100644
--- a/runtime/interpreter/mterp/mips/unopNarrower.S
+++ b/runtime/interpreter/mterp/mips/unopNarrower.S
@@ -1,24 +1,16 @@
 %default {"load":"LOAD64_F(fa0, fa0f, a3)"}
     /*
-     * Generic 64bit-to-32bit unary operation.  Provide an "instr" line
-     * that specifies an instruction that performs "result = op a0/a1", where
-     * "result" is a 32-bit quantity in a0.
+     * Generic 64bit-to-32bit floating-point unary operation.  Provide an "instr"
+     * line that specifies an instruction that performs "fv0 = op fa0".
      *
-     * For: long-to-float, double-to-int, double-to-float
-     * If hard floating point support is available, use fa0 as the parameter,
-     * except for long-to-float opcode.
-     * (This would work for long-to-int, but that instruction is actually
-     * an exact match for OP_MOVE.)
+     * For: double-to-float
      */
     /* unop vA, vB */
     GET_OPB(a3)                            #  a3 <- B
-    GET_OPA4(rOBJ)                         #  t1 <- A+
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
     EAS2(a3, rFP, a3)                      #  a3 <- &fp[B]
     $load
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
     $instr
-
-.L${opcode}_set_vreg_f:
-    SET_VREG_F(fv0, rOBJ)                  #  vA <- result0
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG_F_GOTO(fv0, rOBJ, t0)         #  vA <- fv0
diff --git a/runtime/interpreter/mterp/mips/unopWide.S b/runtime/interpreter/mterp/mips/unopWide.S
index fd25dff..135d9fa 100644
--- a/runtime/interpreter/mterp/mips/unopWide.S
+++ b/runtime/interpreter/mterp/mips/unopWide.S
@@ -1,7 +1,7 @@
 %default {"preinstr":"", "result0":"a0", "result1":"a1"}
     /*
      * Generic 64-bit unary operation.  Provide an "instr" line that
-     * specifies an instruction that performs "result = op a0/a1".
+     * specifies an instruction that performs "result0/result1 = op a0/a1".
      * This could be MIPS instruction or a function call.
      *
      * For: neg-long, not-long, neg-double,
@@ -10,11 +10,9 @@
     GET_OPA4(rOBJ)                         #  rOBJ <- A+
     GET_OPB(a3)                            #  a3 <- B
     EAS2(a3, rFP, a3)                      #  a3 <- &fp[B]
-    LOAD64(a0, a1, a3)                     #  a0/a1 <- vAA
+    LOAD64(a0, a1, a3)                     #  a0/a1 <- vA
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
     $preinstr                              #  optional op
     $instr                                 #  a0/a1 <- op, a2-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG64($result0, $result1, rOBJ)   #  vAA <- a0/a1
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-    /* 12-13 instructions */
+    SET_VREG64_GOTO($result0, $result1, rOBJ, t0)   #  vA/vA+1 <- a0/a1
diff --git a/runtime/interpreter/mterp/mips/unopWider.S b/runtime/interpreter/mterp/mips/unopWider.S
index 1c18837..ca888ad 100644
--- a/runtime/interpreter/mterp/mips/unopWider.S
+++ b/runtime/interpreter/mterp/mips/unopWider.S
@@ -1,8 +1,7 @@
 %default {"preinstr":"", "result0":"a0", "result1":"a1"}
     /*
      * Generic 32bit-to-64bit unary operation.  Provide an "instr" line
-     * that specifies an instruction that performs "result = op a0", where
-     * "result" is a 64-bit quantity in a0/a1.
+     * that specifies an instruction that performs "result0/result1 = op a0".
      *
      * For: int-to-long
      */
@@ -14,6 +13,4 @@
     $preinstr                              #  optional op
     $instr                                 #  result <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG64($result0, $result1, rOBJ)   #  vA/vA+1 <- a0/a1
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-    /* 10-11 instructions */
+    SET_VREG64_GOTO($result0, $result1, rOBJ, t0)   #  vA/vA+1 <- a0/a1
diff --git a/runtime/interpreter/mterp/mips64/footer.S b/runtime/interpreter/mterp/mips64/footer.S
index 4063162..64772c8 100644
--- a/runtime/interpreter/mterp/mips64/footer.S
+++ b/runtime/interpreter/mterp/mips64/footer.S
@@ -108,7 +108,7 @@
     REFRESH_IBASE
     daddu   a2, rINST, rINST            # a2<- byte offset
     FETCH_ADVANCE_INST_RB a2            # update rPC, load rINST
-    and     ra, (THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    and     ra, THREAD_SUSPEND_OR_CHECKPOINT_REQUEST
     bnezc   ra, .L_suspend_request_pending
     GET_INST_OPCODE v0                  # extract opcode from rINST
     GOTO_OPCODE v0                      # jump to next instruction
@@ -225,7 +225,7 @@
     lw      ra, THREAD_FLAGS_OFFSET(rSELF)
     sd      a0, 0(a2)
     move    a0, rSELF
-    and     ra, ra, (THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    and     ra, ra, THREAD_SUSPEND_OR_CHECKPOINT_REQUEST
     beqzc   ra, check2
     jal     MterpSuspendCheck                       # (self)
 check2:
diff --git a/runtime/interpreter/mterp/mips64/op_return.S b/runtime/interpreter/mterp/mips64/op_return.S
index b10c03f..edd795f 100644
--- a/runtime/interpreter/mterp/mips64/op_return.S
+++ b/runtime/interpreter/mterp/mips64/op_return.S
@@ -10,7 +10,7 @@
     jal     MterpThreadFenceForConstructor
     lw      ra, THREAD_FLAGS_OFFSET(rSELF)
     move    a0, rSELF
-    and     ra, ra, (THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    and     ra, ra, THREAD_SUSPEND_OR_CHECKPOINT_REQUEST
     beqzc   ra, 1f
     jal     MterpSuspendCheck           # (self)
 1:
diff --git a/runtime/interpreter/mterp/mips64/op_return_void.S b/runtime/interpreter/mterp/mips64/op_return_void.S
index 05253ae..f6eee91 100644
--- a/runtime/interpreter/mterp/mips64/op_return_void.S
+++ b/runtime/interpreter/mterp/mips64/op_return_void.S
@@ -3,7 +3,7 @@
     jal     MterpThreadFenceForConstructor
     lw      ra, THREAD_FLAGS_OFFSET(rSELF)
     move    a0, rSELF
-    and     ra, ra, (THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    and     ra, ra, THREAD_SUSPEND_OR_CHECKPOINT_REQUEST
     beqzc   ra, 1f
     jal     MterpSuspendCheck           # (self)
 1:
diff --git a/runtime/interpreter/mterp/mips64/op_return_void_no_barrier.S b/runtime/interpreter/mterp/mips64/op_return_void_no_barrier.S
index f67e811..4e9b640 100644
--- a/runtime/interpreter/mterp/mips64/op_return_void_no_barrier.S
+++ b/runtime/interpreter/mterp/mips64/op_return_void_no_barrier.S
@@ -1,7 +1,7 @@
     .extern MterpSuspendCheck
     lw      ra, THREAD_FLAGS_OFFSET(rSELF)
     move    a0, rSELF
-    and     ra, ra, (THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    and     ra, ra, THREAD_SUSPEND_OR_CHECKPOINT_REQUEST
     beqzc   ra, 1f
     jal     MterpSuspendCheck           # (self)
 1:
diff --git a/runtime/interpreter/mterp/mips64/op_return_wide.S b/runtime/interpreter/mterp/mips64/op_return_wide.S
index 544e027..91ca1fa 100644
--- a/runtime/interpreter/mterp/mips64/op_return_wide.S
+++ b/runtime/interpreter/mterp/mips64/op_return_wide.S
@@ -8,7 +8,7 @@
     jal     MterpThreadFenceForConstructor
     lw      ra, THREAD_FLAGS_OFFSET(rSELF)
     move    a0, rSELF
-    and     ra, ra, (THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    and     ra, ra, THREAD_SUSPEND_OR_CHECKPOINT_REQUEST
     beqzc   ra, 1f
     jal     MterpSuspendCheck           # (self)
 1:
diff --git a/runtime/interpreter/mterp/mterp.cc b/runtime/interpreter/mterp/mterp.cc
index 46d5af1..c8c1563 100644
--- a/runtime/interpreter/mterp/mterp.cc
+++ b/runtime/interpreter/mterp/mterp.cc
@@ -291,7 +291,7 @@
                                    ShadowFrame* shadow_frame,
                                    Thread* self)
     REQUIRES_SHARED(Locks::mutator_lock_) {
-  ObjPtr<mirror::String> s = ResolveString(self, *shadow_frame, index);
+  ObjPtr<mirror::String> s = ResolveString(self, *shadow_frame, dex::StringIndex(index));
   if (UNLIKELY(s == nullptr)) {
     return true;
   }
@@ -304,7 +304,11 @@
                                   ShadowFrame* shadow_frame,
                                   Thread* self)
     REQUIRES_SHARED(Locks::mutator_lock_) {
-  mirror::Class* c = ResolveVerifyAndClinit(index, shadow_frame->GetMethod(), self, false, false);
+  mirror::Class* c = ResolveVerifyAndClinit(dex::TypeIndex(index),
+                                            shadow_frame->GetMethod(),
+                                            self,
+                                            false,
+                                            false);
   if (UNLIKELY(c == nullptr)) {
     return true;
   }
@@ -317,7 +321,11 @@
                                  art::ArtMethod* method,
                                  Thread* self)
     REQUIRES_SHARED(Locks::mutator_lock_) {
-  ObjPtr<mirror::Class> c = ResolveVerifyAndClinit(index, method, self, false, false);
+  ObjPtr<mirror::Class> c = ResolveVerifyAndClinit(dex::TypeIndex(index),
+                                                   method,
+                                                   self,
+                                                   false,
+                                                   false);
   if (UNLIKELY(c == nullptr)) {
     return true;
   }
@@ -335,7 +343,11 @@
                                   art::ArtMethod* method,
                                   Thread* self)
     REQUIRES_SHARED(Locks::mutator_lock_) {
-  ObjPtr<mirror::Class> c = ResolveVerifyAndClinit(index, method, self, false, false);
+  ObjPtr<mirror::Class> c = ResolveVerifyAndClinit(dex::TypeIndex(index),
+                                                   method,
+                                                   self,
+                                                   false,
+                                                   false);
   if (UNLIKELY(c == nullptr)) {
     return false;  // Caller will check for pending exception.  Return value unimportant.
   }
@@ -353,7 +365,7 @@
     REQUIRES_SHARED(Locks::mutator_lock_) {
   const Instruction* inst = Instruction::At(shadow_frame->GetDexPCPtr());
   mirror::Object* obj = nullptr;
-  mirror::Class* c = ResolveVerifyAndClinit(inst->VRegB_21c(),
+  mirror::Class* c = ResolveVerifyAndClinit(dex::TypeIndex(inst->VRegB_21c()),
                                             shadow_frame->GetMethod(),
                                             self,
                                             false,
@@ -363,9 +375,10 @@
       gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator();
       obj = mirror::String::AllocEmptyString<true>(self, allocator_type);
     } else {
-      obj = AllocObjectFromCode<false, true>(
-        inst->VRegB_21c(), shadow_frame->GetMethod(), self,
-        Runtime::Current()->GetHeap()->GetCurrentAllocator());
+      obj = AllocObjectFromCode<false, true>(dex::TypeIndex(inst->VRegB_21c()),
+                                             shadow_frame->GetMethod(),
+                                             self,
+                                             Runtime::Current()->GetHeap()->GetCurrentAllocator());
     }
   }
   if (UNLIKELY(obj == nullptr)) {
@@ -446,7 +459,7 @@
   const Instruction* inst = Instruction::At(dex_pc_ptr);
   int32_t length = shadow_frame->GetVReg(inst->VRegB_22c(inst_data));
   mirror::Object* obj = AllocArrayFromCode<false, true>(
-      inst->VRegC_22c(), length, shadow_frame->GetMethod(), self,
+      dex::TypeIndex(inst->VRegC_22c()), length, shadow_frame->GetMethod(), self,
       Runtime::Current()->GetHeap()->GetCurrentAllocator());
   if (UNLIKELY(obj == nullptr)) {
       return false;
@@ -564,6 +577,8 @@
     LOG(INFO) << "Checkpoint fallback: " << inst->Opcode(inst_data);
   } else if (flags & kSuspendRequest) {
     LOG(INFO) << "Suspend fallback: " << inst->Opcode(inst_data);
+  } else if (flags & kEmptyCheckpointRequest) {
+    LOG(INFO) << "Empty checkpoint fallback: " << inst->Opcode(inst_data);
   }
 }
 
diff --git a/runtime/interpreter/mterp/out/mterp_arm.S b/runtime/interpreter/mterp/out/mterp_arm.S
index 78a90af..4d540d7 100644
--- a/runtime/interpreter/mterp/out/mterp_arm.S
+++ b/runtime/interpreter/mterp/out/mterp_arm.S
@@ -619,7 +619,7 @@
     bl      MterpThreadFenceForConstructor
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
     mov     r0, rSELF
-    ands    lr, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    ands    lr, #THREAD_SUSPEND_OR_CHECKPOINT_REQUEST
     blne    MterpSuspendCheck                       @ (self)
     mov    r0, #0
     mov    r1, #0
@@ -639,7 +639,7 @@
     bl      MterpThreadFenceForConstructor
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
     mov     r0, rSELF
-    ands    lr, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    ands    lr, #THREAD_SUSPEND_OR_CHECKPOINT_REQUEST
     blne    MterpSuspendCheck                       @ (self)
     mov     r2, rINST, lsr #8           @ r2<- AA
     GET_VREG r0, r2                     @ r0<- vAA
@@ -658,7 +658,7 @@
     bl      MterpThreadFenceForConstructor
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
     mov     r0, rSELF
-    ands    lr, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    ands    lr, #THREAD_SUSPEND_OR_CHECKPOINT_REQUEST
     blne    MterpSuspendCheck                       @ (self)
     mov     r2, rINST, lsr #8           @ r2<- AA
     VREG_INDEX_TO_ADDR r2, r2           @ r2<- &fp[AA]
@@ -680,7 +680,7 @@
     bl      MterpThreadFenceForConstructor
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
     mov     r0, rSELF
-    ands    lr, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    ands    lr, #THREAD_SUSPEND_OR_CHECKPOINT_REQUEST
     blne    MterpSuspendCheck                       @ (self)
     mov     r2, rINST, lsr #8           @ r2<- AA
     GET_VREG r0, r2                     @ r0<- vAA
@@ -3149,7 +3149,7 @@
 /* File: arm/op_return_void_no_barrier.S */
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
     mov     r0, rSELF
-    ands    lr, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    ands    lr, #THREAD_SUSPEND_OR_CHECKPOINT_REQUEST
     blne    MterpSuspendCheck                       @ (self)
     mov    r0, #0
     mov    r1, #0
@@ -11989,7 +11989,7 @@
     REFRESH_IBASE
     add     r2, rINST, rINST            @ r2<- byte offset
     FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
-    ands    lr, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    ands    lr, #THREAD_SUSPEND_OR_CHECKPOINT_REQUEST
     bne     .L_suspend_request_pending
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/out/mterp_arm64.S b/runtime/interpreter/mterp/out/mterp_arm64.S
index dafcc3e..34d99a8 100644
--- a/runtime/interpreter/mterp/out/mterp_arm64.S
+++ b/runtime/interpreter/mterp/out/mterp_arm64.S
@@ -300,6 +300,44 @@
   ldr     xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]
 .endm
 
+/*
+ * Save two registers to the stack.
+ */
+.macro SAVE_TWO_REGS reg1, reg2, offset
+    stp \reg1, \reg2, [sp, #(\offset)]
+    .cfi_rel_offset \reg1, (\offset)
+    .cfi_rel_offset \reg2, (\offset) + 8
+.endm
+
+/*
+ * Restore two registers from the stack.
+ */
+.macro RESTORE_TWO_REGS reg1, reg2, offset
+    ldp \reg1, \reg2, [sp, #(\offset)]
+    .cfi_restore \reg1
+    .cfi_restore \reg2
+.endm
+
+/*
+ * Increase frame size and save two registers to the bottom of the stack.
+ */
+.macro SAVE_TWO_REGS_INCREASE_FRAME reg1, reg2, frame_adjustment
+    stp \reg1, \reg2, [sp, #-(\frame_adjustment)]!
+    .cfi_adjust_cfa_offset (\frame_adjustment)
+    .cfi_rel_offset \reg1, 0
+    .cfi_rel_offset \reg2, 8
+.endm
+
+/*
+ * Restore two registers from the bottom of the stack and decrease frame size.
+ */
+.macro RESTORE_TWO_REGS_DECREASE_FRAME reg1, reg2, frame_adjustment
+    ldp \reg1, \reg2, [sp], #(\frame_adjustment)
+    .cfi_restore \reg1
+    .cfi_restore \reg2
+    .cfi_adjust_cfa_offset -(\frame_adjustment)
+.endm
+
 /* File: arm64/entry.S */
 /*
  * Copyright (C) 2016 The Android Open Source Project
@@ -334,11 +372,11 @@
 
 ExecuteMterpImpl:
     .cfi_startproc
-    stp     xPROFILE, x27, [sp, #-80]!
-    stp     xIBASE, xREFS, [sp, #16]
-    stp     xSELF, xINST, [sp, #32]
-    stp     xPC, xFP, [sp, #48]
-    stp     fp, lr, [sp, #64]
+    SAVE_TWO_REGS_INCREASE_FRAME xPROFILE, x27, 80
+    SAVE_TWO_REGS                xIBASE, xREFS, 16
+    SAVE_TWO_REGS                xSELF, xINST, 32
+    SAVE_TWO_REGS                xPC, xFP, 48
+    SAVE_TWO_REGS                fp, lr, 64
     add     fp, sp, #64
 
     /* Remember the return register */
@@ -616,7 +654,7 @@
     bl      MterpThreadFenceForConstructor
     ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
     mov     x0, xSELF
-    ands    w7, w7, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    ands    w7, w7, #THREAD_SUSPEND_OR_CHECKPOINT_REQUEST
     b.ne    .Lop_return_void_check
 .Lop_return_void_return:
     mov     x0, #0
@@ -639,7 +677,7 @@
     bl      MterpThreadFenceForConstructor
     ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
     mov     x0, xSELF
-    ands    w7, w7, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    ands    w7, w7, #THREAD_SUSPEND_OR_CHECKPOINT_REQUEST
     b.ne    .Lop_return_check
 .Lop_return_return:
     lsr     w2, wINST, #8               // r2<- AA
@@ -662,7 +700,7 @@
     bl      MterpThreadFenceForConstructor
     ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
     mov     x0, xSELF
-    ands    w7, w7, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    ands    w7, w7, #THREAD_SUSPEND_OR_CHECKPOINT_REQUEST
     b.ne    .Lop_return_wide_check
 .Lop_return_wide_return:
     lsr     w2, wINST, #8               // w2<- AA
@@ -687,7 +725,7 @@
     bl      MterpThreadFenceForConstructor
     ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
     mov     x0, xSELF
-    ands    w7, w7, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    ands    w7, w7, #THREAD_SUSPEND_OR_CHECKPOINT_REQUEST
     b.ne    .Lop_return_object_check
 .Lop_return_object_return:
     lsr     w2, wINST, #8               // r2<- AA
@@ -3033,7 +3071,7 @@
 /* File: arm64/op_return_void_no_barrier.S */
     ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
     mov     x0, xSELF
-    ands    w7, w7, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    ands    w7, w7, #THREAD_SUSPEND_OR_CHECKPOINT_REQUEST
     b.ne    .Lop_return_void_no_barrier_check
 .Lop_return_void_no_barrier_return:
     mov     x0, #0
@@ -7082,7 +7120,7 @@
     add     w2, wINST, wINST            // w2<- byte offset
     FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
     REFRESH_IBASE
-    ands    lr, lr, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    ands    lr, lr, #THREAD_SUSPEND_OR_CHECKPOINT_REQUEST
     b.ne    .L_suspend_request_pending
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
@@ -7156,7 +7194,7 @@
  */
 MterpCheckSuspendAndContinue:
     ldr     xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]  // refresh xIBASE
-    ands    w7, w7, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    ands    w7, w7, #THREAD_SUSPEND_OR_CHECKPOINT_REQUEST
     b.ne    check1
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
@@ -7211,7 +7249,7 @@
     ldr     lr, [xSELF, #THREAD_FLAGS_OFFSET]
     str     x0, [x2]
     mov     x0, xSELF
-    ands    lr, lr, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    ands    lr, lr, #THREAD_SUSPEND_OR_CHECKPOINT_REQUEST
     b.eq    check2
     bl      MterpSuspendCheck                       // (self)
 check2:
@@ -7226,12 +7264,15 @@
  */
     cmp     wPROFILE, #0
     bgt     MterpProfileActive                      // if > 0, we may have some counts to report.
-    ldp     fp, lr, [sp, #64]
-    ldp     xPC, xFP, [sp, #48]
-    ldp     xSELF, xINST, [sp, #32]
-    ldp     xIBASE, xREFS, [sp, #16]
-    ldp     xPROFILE, x27, [sp], #80
+    .cfi_remember_state
+    RESTORE_TWO_REGS                fp, lr, 64
+    RESTORE_TWO_REGS                xPC, xFP, 48
+    RESTORE_TWO_REGS                xSELF, xINST, 32
+    RESTORE_TWO_REGS                xIBASE, xREFS, 16
+    RESTORE_TWO_REGS_DECREASE_FRAME xPROFILE, x27, 80
     ret
+    .cfi_restore_state                              // Reset unwind info so following code unwinds.
+    .cfi_def_cfa_offset 80                          // workaround for clang bug: 31975598
 
 MterpProfileActive:
     mov     xINST, x0                               // stash return value
@@ -7242,11 +7283,11 @@
     strh    wPROFILE, [x1, #SHADOWFRAME_HOTNESS_COUNTDOWN_OFFSET]
     bl      MterpAddHotnessBatch                    // (method, shadow_frame, self)
     mov     x0, xINST                               // restore return value
-    ldp     fp, lr, [sp, #64]
-    ldp     xPC, xFP, [sp, #48]
-    ldp     xSELF, xINST, [sp, #32]
-    ldp     xIBASE, xREFS, [sp, #16]
-    ldp     xPROFILE, x27, [sp], #80
+    RESTORE_TWO_REGS                fp, lr, 64
+    RESTORE_TWO_REGS                xPC, xFP, 48
+    RESTORE_TWO_REGS                xSELF, xINST, 32
+    RESTORE_TWO_REGS                xIBASE, xREFS, 16
+    RESTORE_TWO_REGS_DECREASE_FRAME xPROFILE, x27, 80
     ret
 
     .cfi_endproc
diff --git a/runtime/interpreter/mterp/out/mterp_mips.S b/runtime/interpreter/mterp/out/mterp_mips.S
index c1ba794..e154e6c 100644
--- a/runtime/interpreter/mterp/out/mterp_mips.S
+++ b/runtime/interpreter/mterp/out/mterp_mips.S
@@ -160,6 +160,58 @@
 #define fcc1   $fcc1
 #endif
 
+#ifdef MIPS32REVGE2
+#define SEB(rd, rt) \
+    seb       rd, rt
+#define SEH(rd, rt) \
+    seh       rd, rt
+#define INSERT_HIGH_HALF(rd_lo, rt_hi) \
+    ins       rd_lo, rt_hi, 16, 16
+#else
+#define SEB(rd, rt) \
+    sll       rd, rt, 24; \
+    sra       rd, rd, 24
+#define SEH(rd, rt) \
+    sll       rd, rt, 16; \
+    sra       rd, rd, 16
+/* Clobbers rt_hi on pre-R2. */
+#define INSERT_HIGH_HALF(rd_lo, rt_hi) \
+    sll       rt_hi, rt_hi, 16; \
+    or        rd_lo, rt_hi
+#endif
+
+#ifdef FPU64
+#define MOVE_TO_FPU_HIGH(r, flo, fhi) \
+    mthc1     r, flo
+#else
+#define MOVE_TO_FPU_HIGH(r, flo, fhi) \
+    mtc1      r, fhi
+#endif
+
+#ifdef MIPS32REVGE6
+#define JR(rt) \
+    jic       rt, 0
+#define LSA(rd, rs, rt, sa) \
+    .if sa; \
+    lsa       rd, rs, rt, sa; \
+    .else; \
+    addu      rd, rs, rt; \
+    .endif
+#else
+#define JR(rt) \
+    jalr      zero, rt
+#define LSA(rd, rs, rt, sa) \
+    .if sa; \
+    .set      push; \
+    .set      noat; \
+    sll       AT, rs, sa; \
+    addu      rd, AT, rt; \
+    .set      pop; \
+    .else; \
+    addu      rd, rs, rt; \
+    .endif
+#endif
+
 /*
  * Instead of holding a pointer to the shadow frame, we keep rFP at the base of the vregs.  So,
  * to access other shadow frame fields, we need to use a backwards offset.  Define those here.
@@ -193,12 +245,12 @@
     sw        rPC, OFF_FP_DEX_PC_PTR(rFP)
 
 #define EXPORT_DEX_PC(tmp) \
-    lw   tmp, OFF_FP_CODE_ITEM(rFP) \
-    sw   rPC, OFF_FP_DEX_PC_PTR(rFP) \
-    addu tmp, CODEITEM_INSNS_OFFSET \
-    subu tmp, rPC, tmp \
-    sra  tmp, tmp, 1 \
-    sw   tmp, OFF_FP_DEX_PC(rFP)
+    lw        tmp, OFF_FP_CODE_ITEM(rFP); \
+    sw        rPC, OFF_FP_DEX_PC_PTR(rFP); \
+    addu      tmp, CODEITEM_INSNS_OFFSET; \
+    subu      tmp, rPC, tmp; \
+    sra       tmp, tmp, 1; \
+    sw        tmp, OFF_FP_DEX_PC(rFP)
 
 /*
  * Fetch the next instruction from rPC into rINST.  Does not advance rPC.
@@ -213,18 +265,11 @@
  * exception catch may miss.  (This also implies that it must come after
  * EXPORT_PC().)
  */
-#define FETCH_ADVANCE_INST(_count) lhu rINST, ((_count)*2)(rPC); \
+#define FETCH_ADVANCE_INST(_count) \
+    lhu       rINST, ((_count)*2)(rPC); \
     addu      rPC, rPC, ((_count) * 2)
 
 /*
- * The operation performed here is similar to FETCH_ADVANCE_INST, except the
- * src and dest registers are parameterized (not hard-wired to rPC and rINST).
- */
-#define PREFETCH_ADVANCE_INST(_dreg, _sreg, _count) \
-    lhu       _dreg, ((_count)*2)(_sreg) ;            \
-    addu      _sreg, _sreg, (_count)*2
-
-/*
  * Similar to FETCH_ADVANCE_INST, but does not update rPC.  Used to load
  * rINST ahead of possible exception point.  Be sure to manually advance rPC
  * later.
@@ -239,7 +284,8 @@
  * rPC to point to the next instruction.  "rd" must specify the distance
  * in bytes, *not* 16-bit code units, and may be a signed value.
  */
-#define FETCH_ADVANCE_INST_RB(rd) addu rPC, rPC, rd; \
+#define FETCH_ADVANCE_INST_RB(rd) \
+    addu      rPC, rPC, rd; \
     lhu       rINST, (rPC)
 
 /*
@@ -264,38 +310,75 @@
 #define GET_INST_OPCODE(rd) and rd, rINST, 0xFF
 
 /*
- * Put the prefetched instruction's opcode field into the specified register.
+ * Transform opcode into branch target address.
  */
-#define GET_PREFETCHED_OPCODE(dreg, sreg)   andi     dreg, sreg, 255
+#define GET_OPCODE_TARGET(rd) \
+    sll       rd, rd, 7; \
+    addu      rd, rIBASE, rd
 
 /*
  * Begin executing the opcode in rd.
  */
-#define GOTO_OPCODE(rd) sll rd, rd, 7; \
-    addu      rd, rIBASE, rd; \
-    jalr      zero, rd
-
-#define GOTO_OPCODE_BASE(_base, rd)  sll rd, rd, 7; \
-    addu      rd, _base, rd; \
-    jalr      zero, rd
+#define GOTO_OPCODE(rd) \
+    GET_OPCODE_TARGET(rd); \
+    JR(rd)
 
 /*
  * Get/set the 32-bit value from a Dalvik register.
  */
 #define GET_VREG(rd, rix) LOAD_eas2(rd, rFP, rix)
 
-#define GET_VREG_F(rd, rix) EAS2(AT, rFP, rix); \
-    .set noat; l.s rd, (AT); .set at
+#define GET_VREG_F(rd, rix) \
+    .set noat; \
+    EAS2(AT, rFP, rix); \
+    l.s       rd, (AT); \
+    .set at
 
-#define SET_VREG(rd, rix) .set noat; \
+#ifdef MIPS32REVGE6
+#define SET_VREG(rd, rix) \
+    lsa       t8, rix, rFP, 2; \
+    sw        rd, 0(t8); \
+    lsa       t8, rix, rREFS, 2; \
+    sw        zero, 0(t8)
+#else
+#define SET_VREG(rd, rix) \
+    .set noat; \
     sll       AT, rix, 2; \
     addu      t8, rFP, AT; \
     sw        rd, 0(t8); \
     addu      t8, rREFS, AT; \
     .set at; \
     sw        zero, 0(t8)
+#endif
 
-#define SET_VREG64(rlo, rhi, rix) .set noat; \
+#ifdef MIPS32REVGE6
+#define SET_VREG_OBJECT(rd, rix) \
+    lsa       t8, rix, rFP, 2; \
+    sw        rd, 0(t8); \
+    lsa       t8, rix, rREFS, 2; \
+    sw        rd, 0(t8)
+#else
+#define SET_VREG_OBJECT(rd, rix) \
+    .set noat; \
+    sll       AT, rix, 2; \
+    addu      t8, rFP, AT; \
+    sw        rd, 0(t8); \
+    addu      t8, rREFS, AT; \
+    .set at; \
+    sw        rd, 0(t8)
+#endif
+
+#ifdef MIPS32REVGE6
+#define SET_VREG64(rlo, rhi, rix) \
+    lsa       t8, rix, rFP, 2; \
+    sw        rlo, 0(t8); \
+    sw        rhi, 4(t8); \
+    lsa       t8, rix, rREFS, 2; \
+    sw        zero, 0(t8); \
+    sw        zero, 4(t8)
+#else
+#define SET_VREG64(rlo, rhi, rix) \
+    .set noat; \
     sll       AT, rix, 2; \
     addu      t8, rFP, AT; \
     sw        rlo, 0(t8); \
@@ -304,9 +387,39 @@
     .set at; \
     sw        zero, 0(t8); \
     sw        zero, 4(t8)
+#endif
 
-#ifdef FPU64
-#define SET_VREG64_F(rlo, rhi, rix) .set noat; \
+#ifdef MIPS32REVGE6
+#define SET_VREG_F(rd, rix) \
+    lsa       t8, rix, rFP, 2; \
+    s.s       rd, 0(t8); \
+    lsa       t8, rix, rREFS, 2; \
+    sw        zero, 0(t8)
+#else
+#define SET_VREG_F(rd, rix) \
+    .set noat; \
+    sll       AT, rix, 2; \
+    addu      t8, rFP, AT; \
+    s.s       rd, 0(t8); \
+    addu      t8, rREFS, AT; \
+    .set at; \
+    sw        zero, 0(t8)
+#endif
+
+#ifdef MIPS32REVGE6
+#define SET_VREG64_F(rlo, rhi, rix) \
+    lsa       t8, rix, rFP, 2; \
+    .set noat; \
+    mfhc1     AT, rlo; \
+    s.s       rlo, 0(t8); \
+    sw        AT, 4(t8); \
+    .set at; \
+    lsa       t8, rix, rREFS, 2; \
+    sw        zero, 0(t8); \
+    sw        zero, 4(t8)
+#elif defined(FPU64)
+#define SET_VREG64_F(rlo, rhi, rix) \
+    .set noat; \
     sll       AT, rix, 2; \
     addu      t8, rREFS, AT; \
     sw        zero, 0(t8); \
@@ -317,7 +430,8 @@
     .set at; \
     s.s       rlo, 0(t8)
 #else
-#define SET_VREG64_F(rlo, rhi, rix) .set noat; \
+#define SET_VREG64_F(rlo, rhi, rix) \
+    .set noat; \
     sll       AT, rix, 2; \
     addu      t8, rFP, AT; \
     s.s       rlo, 0(t8); \
@@ -328,18 +442,21 @@
     sw        zero, 4(t8)
 #endif
 
-#define SET_VREG_OBJECT(rd, rix) .set noat; \
-    sll       AT, rix, 2; \
-    addu      t8, rFP, AT; \
-    sw        rd, 0(t8); \
-    addu      t8, rREFS, AT; \
-    .set at; \
-    sw        rd, 0(t8)
-
 /* Combination of the SET_VREG and GOTO_OPCODE functions to save 1 instruction */
-#define SET_VREG_GOTO(rd, rix, dst) .set noreorder; \
-    sll       dst, dst, 7; \
-    addu      dst, rIBASE, dst; \
+#ifdef MIPS32REVGE6
+#define SET_VREG_GOTO(rd, rix, dst) \
+    .set noreorder; \
+    GET_OPCODE_TARGET(dst); \
+    lsa       t8, rix, rFP, 2; \
+    sw        rd, 0(t8); \
+    lsa       t8, rix, rREFS, 2; \
+    jalr      zero, dst; \
+    sw        zero, 0(t8); \
+    .set reorder
+#else
+#define SET_VREG_GOTO(rd, rix, dst) \
+    .set noreorder; \
+    GET_OPCODE_TARGET(dst); \
     .set noat; \
     sll       AT, rix, 2; \
     addu      t8, rFP, AT; \
@@ -349,11 +466,51 @@
     jalr      zero, dst; \
     sw        zero, 0(t8); \
     .set reorder
+#endif
+
+/* Combination of the SET_VREG_OBJECT and GOTO_OPCODE functions to save 1 instruction */
+#ifdef MIPS32REVGE6
+#define SET_VREG_OBJECT_GOTO(rd, rix, dst) \
+    .set noreorder; \
+    GET_OPCODE_TARGET(dst); \
+    lsa       t8, rix, rFP, 2; \
+    sw        rd, 0(t8); \
+    lsa       t8, rix, rREFS, 2; \
+    jalr      zero, dst; \
+    sw        rd, 0(t8); \
+    .set reorder
+#else
+#define SET_VREG_OBJECT_GOTO(rd, rix, dst) \
+    .set noreorder; \
+    GET_OPCODE_TARGET(dst); \
+    .set noat; \
+    sll       AT, rix, 2; \
+    addu      t8, rFP, AT; \
+    sw        rd, 0(t8); \
+    addu      t8, rREFS, AT; \
+    .set at; \
+    jalr      zero, dst; \
+    sw        rd, 0(t8); \
+    .set reorder
+#endif
 
 /* Combination of the SET_VREG64 and GOTO_OPCODE functions to save 1 instruction */
-#define SET_VREG64_GOTO(rlo, rhi, rix, dst) .set noreorder; \
-    sll       dst, dst, 7; \
-    addu      dst, rIBASE, dst; \
+#ifdef MIPS32REVGE6
+#define SET_VREG64_GOTO(rlo, rhi, rix, dst) \
+    .set noreorder; \
+    GET_OPCODE_TARGET(dst); \
+    lsa       t8, rix, rFP, 2; \
+    sw        rlo, 0(t8); \
+    sw        rhi, 4(t8); \
+    lsa       t8, rix, rREFS, 2; \
+    sw        zero, 0(t8); \
+    jalr      zero, dst; \
+    sw        zero, 4(t8); \
+    .set reorder
+#else
+#define SET_VREG64_GOTO(rlo, rhi, rix, dst) \
+    .set noreorder; \
+    GET_OPCODE_TARGET(dst); \
     .set noat; \
     sll       AT, rix, 2; \
     addu      t8, rFP, AT; \
@@ -365,14 +522,82 @@
     jalr      zero, dst; \
     sw        zero, 4(t8); \
     .set reorder
+#endif
 
-#define SET_VREG_F(rd, rix) .set noat; \
+/* Combination of the SET_VREG_F and GOTO_OPCODE functions to save 1 instruction */
+#ifdef MIPS32REVGE6
+#define SET_VREG_F_GOTO(rd, rix, dst) \
+    .set noreorder; \
+    GET_OPCODE_TARGET(dst); \
+    lsa       t8, rix, rFP, 2; \
+    s.s       rd, 0(t8); \
+    lsa       t8, rix, rREFS, 2; \
+    jalr      zero, dst; \
+    sw        zero, 0(t8); \
+    .set reorder
+#else
+#define SET_VREG_F_GOTO(rd, rix, dst) \
+    .set noreorder; \
+    GET_OPCODE_TARGET(dst); \
+    .set noat; \
     sll       AT, rix, 2; \
     addu      t8, rFP, AT; \
     s.s       rd, 0(t8); \
     addu      t8, rREFS, AT; \
     .set at; \
-    sw        zero, 0(t8)
+    jalr      zero, dst; \
+    sw        zero, 0(t8); \
+    .set reorder
+#endif
+
+/* Combination of the SET_VREG64_F and GOTO_OPCODE functions to save 1 instruction */
+#ifdef MIPS32REVGE6
+#define SET_VREG64_F_GOTO(rlo, rhi, rix, dst) \
+    .set noreorder; \
+    GET_OPCODE_TARGET(dst); \
+    lsa       t8, rix, rFP, 2; \
+    .set noat; \
+    mfhc1     AT, rlo; \
+    s.s       rlo, 0(t8); \
+    sw        AT, 4(t8); \
+    .set at; \
+    lsa       t8, rix, rREFS, 2; \
+    sw        zero, 0(t8); \
+    jalr      zero, dst; \
+    sw        zero, 4(t8); \
+    .set reorder
+#elif defined(FPU64)
+#define SET_VREG64_F_GOTO(rlo, rhi, rix, dst) \
+    .set noreorder; \
+    GET_OPCODE_TARGET(dst); \
+    .set noat; \
+    sll       AT, rix, 2; \
+    addu      t8, rREFS, AT; \
+    sw        zero, 0(t8); \
+    sw        zero, 4(t8); \
+    addu      t8, rFP, AT; \
+    mfhc1     AT, rlo; \
+    sw        AT, 4(t8); \
+    .set at; \
+    jalr      zero, dst; \
+    s.s       rlo, 0(t8); \
+    .set reorder
+#else
+#define SET_VREG64_F_GOTO(rlo, rhi, rix, dst) \
+    .set noreorder; \
+    GET_OPCODE_TARGET(dst); \
+    .set noat; \
+    sll       AT, rix, 2; \
+    addu      t8, rFP, AT; \
+    s.s       rlo, 0(t8); \
+    s.s       rhi, 4(t8); \
+    addu      t8, rREFS, AT; \
+    .set at; \
+    sw        zero, 0(t8); \
+    jalr      zero, dst; \
+    sw        zero, 4(t8); \
+    .set reorder
+#endif
 
 #define GET_OPA(rd) srl rd, rINST, 8
 #ifdef MIPS32REVGE2
@@ -383,60 +608,60 @@
 #define GET_OPB(rd) srl rd, rINST, 12
 
 /*
- * Form an Effective Address rd = rbase + roff<<n;
- * Uses reg AT
+ * Form an Effective Address rd = rbase + roff<<shift;
+ * Uses reg AT on pre-R6.
  */
-#define EASN(rd, rbase, roff, rshift) .set noat; \
-    sll       AT, roff, rshift; \
-    addu      rd, rbase, AT; \
-    .set at
+#define EASN(rd, rbase, roff, shift) LSA(rd, roff, rbase, shift)
 
 #define EAS1(rd, rbase, roff) EASN(rd, rbase, roff, 1)
 #define EAS2(rd, rbase, roff) EASN(rd, rbase, roff, 2)
 #define EAS3(rd, rbase, roff) EASN(rd, rbase, roff, 3)
 #define EAS4(rd, rbase, roff) EASN(rd, rbase, roff, 4)
 
-/*
- * Form an Effective Shift Right rd = rbase + roff>>n;
- * Uses reg AT
- */
-#define ESRN(rd, rbase, roff, rshift) .set noat; \
-    srl       AT, roff, rshift; \
-    addu      rd, rbase, AT; \
+#define LOAD_eas2(rd, rbase, roff) \
+    .set noat; \
+    EAS2(AT, rbase, roff); \
+    lw        rd, 0(AT); \
     .set at
 
-#define LOAD_eas2(rd, rbase, roff) EAS2(AT, rbase, roff); \
-    .set noat; lw rd, 0(AT); .set at
-
-#define STORE_eas2(rd, rbase, roff) EAS2(AT, rbase, roff); \
-    .set noat; sw rd, 0(AT); .set at
+#define STORE_eas2(rd, rbase, roff) \
+    .set noat; \
+    EAS2(AT, rbase, roff); \
+    sw        rd, 0(AT); \
+    .set at
 
 #define LOAD_RB_OFF(rd, rbase, off) lw rd, off(rbase)
 #define STORE_RB_OFF(rd, rbase, off) sw rd, off(rbase)
 
-#define STORE64_off(rlo, rhi, rbase, off) sw rlo, off(rbase); \
+#define STORE64_off(rlo, rhi, rbase, off) \
+    sw        rlo, off(rbase); \
     sw        rhi, (off+4)(rbase)
-#define LOAD64_off(rlo, rhi, rbase, off) lw rlo, off(rbase); \
+#define LOAD64_off(rlo, rhi, rbase, off) \
+    lw        rlo, off(rbase); \
     lw        rhi, (off+4)(rbase)
 
 #define STORE64(rlo, rhi, rbase) STORE64_off(rlo, rhi, rbase, 0)
 #define LOAD64(rlo, rhi, rbase) LOAD64_off(rlo, rhi, rbase, 0)
 
 #ifdef FPU64
-#define STORE64_off_F(rlo, rhi, rbase, off) s.s rlo, off(rbase); \
+#define STORE64_off_F(rlo, rhi, rbase, off) \
+    s.s       rlo, off(rbase); \
     .set noat; \
     mfhc1     AT, rlo; \
     sw        AT, (off+4)(rbase); \
     .set at
-#define LOAD64_off_F(rlo, rhi, rbase, off) l.s rlo, off(rbase); \
+#define LOAD64_off_F(rlo, rhi, rbase, off) \
+    l.s       rlo, off(rbase); \
     .set noat; \
     lw        AT, (off+4)(rbase); \
     mthc1     AT, rlo; \
     .set at
 #else
-#define STORE64_off_F(rlo, rhi, rbase, off) s.s rlo, off(rbase); \
+#define STORE64_off_F(rlo, rhi, rbase, off) \
+    s.s       rlo, off(rbase); \
     s.s       rhi, (off+4)(rbase)
-#define LOAD64_off_F(rlo, rhi, rbase, off) l.s rlo, off(rbase); \
+#define LOAD64_off_F(rlo, rhi, rbase, off) \
+    l.s       rlo, off(rbase); \
     l.s       rhi, (off+4)(rbase)
 #endif
 
@@ -498,6 +723,14 @@
 #define REFRESH_IBASE() \
     lw        rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)
 
+/* Constants for float/double_to_int/long conversions */
+#define INT_MIN                 0x80000000
+#define INT_MIN_AS_FLOAT        0xCF000000
+#define INT_MIN_AS_DOUBLE_HIGH  0xC1E00000
+#define LONG_MIN_HIGH           0x80000000
+#define LONG_MIN_AS_FLOAT       0xDF000000
+#define LONG_MIN_AS_DOUBLE_HIGH 0xC3E00000
+
 /* File: mips/entry.S */
 /*
  * Copyright (C) 2016 The Android Open Source Project
@@ -599,11 +832,10 @@
     GET_VREG(a2, a1)                       #  a2 <- fp[B]
     GET_INST_OPCODE(t0)                    #  t0 <- opcode from rINST
     .if 0
-    SET_VREG_OBJECT(a2, a0)                #  fp[A] <- a2
+    SET_VREG_OBJECT_GOTO(a2, a0, t0)       #  fp[A] <- a2
     .else
-    SET_VREG(a2, a0)                       #  fp[A] <- a2
+    SET_VREG_GOTO(a2, a0, t0)              #  fp[A] <- a2
     .endif
-    GOTO_OPCODE(t0)                        #  jump to next instruction
 
 /* ------------------------------ */
     .balign 128
@@ -617,11 +849,10 @@
     GET_VREG(a2, a1)                       #  a2 <- fp[BBBB]
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     .if 0
-    SET_VREG_OBJECT(a2, a0)                #  fp[AA] <- a2
+    SET_VREG_OBJECT_GOTO(a2, a0, t0)       #  fp[AA] <- a2
     .else
-    SET_VREG(a2, a0)                       #  fp[AA] <- a2
+    SET_VREG_GOTO(a2, a0, t0)              #  fp[AA] <- a2
     .endif
-    GOTO_OPCODE(t0)                        #  jump to next instruction
 
 /* ------------------------------ */
     .balign 128
@@ -635,11 +866,10 @@
     GET_VREG(a2, a1)                       #  a2 <- fp[BBBB]
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     .if 0
-    SET_VREG_OBJECT(a2, a0)                #  fp[AAAA] <- a2
+    SET_VREG_OBJECT_GOTO(a2, a0, t0)       #  fp[AAAA] <- a2
     .else
-    SET_VREG(a2, a0)                       #  fp[AAAA] <- a2
+    SET_VREG_GOTO(a2, a0, t0)              #  fp[AAAA] <- a2
     .endif
-    GOTO_OPCODE(t0)                        #  jump to next instruction
 
 /* ------------------------------ */
     .balign 128
@@ -652,9 +882,8 @@
     EAS2(a3, rFP, a3)                      #  a3 <- &fp[B]
     LOAD64(a0, a1, a3)                     #  a0/a1 <- fp[B]
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
-    SET_VREG64(a0, a1, a2)                 #  fp[A] <- a0/a1
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG64_GOTO(a0, a1, a2, t0)        #  fp[A] <- a0/a1
 
 /* ------------------------------ */
     .balign 128
@@ -667,9 +896,8 @@
     EAS2(a3, rFP, a3)                      #  a3 <- &fp[BBBB]
     LOAD64(a0, a1, a3)                     #  a0/a1 <- fp[BBBB]
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
-    SET_VREG64(a0, a1, a2)                 #  fp[AA] <- a0/a1
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG64_GOTO(a0, a1, a2, t0)        #  fp[AA] <- a0/a1
 
 /* ------------------------------ */
     .balign 128
@@ -682,9 +910,8 @@
     EAS2(a3, rFP, a3)                      #  a3 <- &fp[BBBB]
     LOAD64(a0, a1, a3)                     #  a0/a1 <- fp[BBBB]
     FETCH_ADVANCE_INST(3)                  #  advance rPC, load rINST
-    SET_VREG64(a0, a1, a2)                 #  fp[AAAA] <- a0/a1
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG64_GOTO(a0, a1, a2, t0)        #  fp[AAAA] <- a0/a1
 
 /* ------------------------------ */
     .balign 128
@@ -699,11 +926,10 @@
     GET_VREG(a2, a1)                       #  a2 <- fp[B]
     GET_INST_OPCODE(t0)                    #  t0 <- opcode from rINST
     .if 1
-    SET_VREG_OBJECT(a2, a0)                #  fp[A] <- a2
+    SET_VREG_OBJECT_GOTO(a2, a0, t0)       #  fp[A] <- a2
     .else
-    SET_VREG(a2, a0)                       #  fp[A] <- a2
+    SET_VREG_GOTO(a2, a0, t0)              #  fp[A] <- a2
     .endif
-    GOTO_OPCODE(t0)                        #  jump to next instruction
 
 
 /* ------------------------------ */
@@ -719,11 +945,10 @@
     GET_VREG(a2, a1)                       #  a2 <- fp[BBBB]
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     .if 1
-    SET_VREG_OBJECT(a2, a0)                #  fp[AA] <- a2
+    SET_VREG_OBJECT_GOTO(a2, a0, t0)       #  fp[AA] <- a2
     .else
-    SET_VREG(a2, a0)                       #  fp[AA] <- a2
+    SET_VREG_GOTO(a2, a0, t0)              #  fp[AA] <- a2
     .endif
-    GOTO_OPCODE(t0)                        #  jump to next instruction
 
 
 /* ------------------------------ */
@@ -739,11 +964,10 @@
     GET_VREG(a2, a1)                       #  a2 <- fp[BBBB]
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     .if 1
-    SET_VREG_OBJECT(a2, a0)                #  fp[AAAA] <- a2
+    SET_VREG_OBJECT_GOTO(a2, a0, t0)       #  fp[AAAA] <- a2
     .else
-    SET_VREG(a2, a0)                       #  fp[AAAA] <- a2
+    SET_VREG_GOTO(a2, a0, t0)              #  fp[AAAA] <- a2
     .endif
-    GOTO_OPCODE(t0)                        #  jump to next instruction
 
 
 /* ------------------------------ */
@@ -758,11 +982,10 @@
     lw    a0, 0(a0)                        #  a0 <- result.i
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     .if 0
-    SET_VREG_OBJECT(a0, a2)                #  fp[AA] <- a0
+    SET_VREG_OBJECT_GOTO(a0, a2, t0)       #  fp[AA] <- a0
     .else
-    SET_VREG(a0, a2)                       #  fp[AA] <- a0
+    SET_VREG_GOTO(a0, a2, t0)              #  fp[AA] <- a0
     .endif
-    GOTO_OPCODE(t0)                        #  jump to next instruction
 
 /* ------------------------------ */
     .balign 128
@@ -773,9 +996,8 @@
     lw    a3, OFF_FP_RESULT_REGISTER(rFP)  #  get pointer to result JType
     LOAD64(a0, a1, a3)                     #  a0/a1 <- retval.j
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
-    SET_VREG64(a0, a1, a2)                 #  fp[AA] <- a0/a1
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG64_GOTO(a0, a1, a2, t0)        #  fp[AA] <- a0/a1
 
 /* ------------------------------ */
     .balign 128
@@ -790,11 +1012,10 @@
     lw    a0, 0(a0)                        #  a0 <- result.i
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     .if 1
-    SET_VREG_OBJECT(a0, a2)                #  fp[AA] <- a0
+    SET_VREG_OBJECT_GOTO(a0, a2, t0)       #  fp[AA] <- a0
     .else
-    SET_VREG(a0, a2)                       #  fp[AA] <- a0
+    SET_VREG_GOTO(a0, a2, t0)              #  fp[AA] <- a0
     .endif
-    GOTO_OPCODE(t0)                        #  jump to next instruction
 
 
 /* ------------------------------ */
@@ -805,10 +1026,11 @@
     GET_OPA(a2)                                 #  a2 <- AA
     lw    a3, THREAD_EXCEPTION_OFFSET(rSELF)    #  get exception obj
     FETCH_ADVANCE_INST(1)                       #  advance rPC, load rINST
-    SET_VREG_OBJECT(a3, a2)                     #  fp[AA] <- exception obj
     GET_INST_OPCODE(t0)                         #  extract opcode from rINST
+    GET_OPCODE_TARGET(t0)
+    SET_VREG_OBJECT(a3, a2)                     #  fp[AA] <- exception obj
     sw    zero, THREAD_EXCEPTION_OFFSET(rSELF)  #  clear exception
-    GOTO_OPCODE(t0)                             #  jump to next instruction
+    JR(t0)                                      #  jump to next instruction
 
 /* ------------------------------ */
     .balign 128
@@ -818,7 +1040,7 @@
     JAL(MterpThreadFenceForConstructor)
     lw        ra, THREAD_FLAGS_OFFSET(rSELF)
     move      a0, rSELF
-    and       ra, (THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    and       ra, THREAD_SUSPEND_OR_CHECKPOINT_REQUEST
     beqz      ra, 1f
     JAL(MterpSuspendCheck)                 # (self)
 1:
@@ -840,7 +1062,7 @@
     JAL(MterpThreadFenceForConstructor)
     lw        ra, THREAD_FLAGS_OFFSET(rSELF)
     move      a0, rSELF
-    and       ra, (THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    and       ra, THREAD_SUSPEND_OR_CHECKPOINT_REQUEST
     beqz      ra, 1f
     JAL(MterpSuspendCheck)                 # (self)
 1:
@@ -861,7 +1083,7 @@
     JAL(MterpThreadFenceForConstructor)
     lw        ra, THREAD_FLAGS_OFFSET(rSELF)
     move      a0, rSELF
-    and       ra, (THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    and       ra, THREAD_SUSPEND_OR_CHECKPOINT_REQUEST
     beqz      ra, 1f
     JAL(MterpSuspendCheck)                 # (self)
 1:
@@ -885,7 +1107,7 @@
     JAL(MterpThreadFenceForConstructor)
     lw        ra, THREAD_FLAGS_OFFSET(rSELF)
     move      a0, rSELF
-    and       ra, (THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    and       ra, THREAD_SUSPEND_OR_CHECKPOINT_REQUEST
     beqz      ra, 1f
     JAL(MterpSuspendCheck)                 # (self)
 1:
@@ -899,7 +1121,7 @@
     .balign 128
 .L_op_const_4: /* 0x12 */
 /* File: mips/op_const_4.S */
-    # const/4 vA,                          /* +B */
+    /* const/4 vA, +B */
     sll       a1, rINST, 16                #  a1 <- Bxxx0000
     GET_OPA(a0)                            #  a0 <- A+
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
@@ -912,7 +1134,7 @@
     .balign 128
 .L_op_const_16: /* 0x13 */
 /* File: mips/op_const_16.S */
-    # const/16 vAA,                        /* +BBBB */
+    /* const/16 vAA, +BBBB */
     FETCH_S(a0, 1)                         #  a0 <- ssssBBBB (sign-extended)
     GET_OPA(a3)                            #  a3 <- AA
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
@@ -923,13 +1145,12 @@
     .balign 128
 .L_op_const: /* 0x14 */
 /* File: mips/op_const.S */
-    # const vAA,                           /* +BBBBbbbb */
+    /* const vAA, +BBBBbbbb */
     GET_OPA(a3)                            #  a3 <- AA
     FETCH(a0, 1)                           #  a0 <- bbbb (low)
     FETCH(a1, 2)                           #  a1 <- BBBB (high)
     FETCH_ADVANCE_INST(3)                  #  advance rPC, load rINST
-    sll       a1, a1, 16
-    or        a0, a1, a0                   #  a0 <- BBBBbbbb
+    INSERT_HIGH_HALF(a0, a1)               #  a0 <- BBBBbbbb
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     SET_VREG_GOTO(a0, a3, t0)              #  vAA <- a0
 
@@ -937,7 +1158,7 @@
     .balign 128
 .L_op_const_high16: /* 0x15 */
 /* File: mips/op_const_high16.S */
-    # const/high16 vAA,                    /* +BBBB0000 */
+    /* const/high16 vAA, +BBBB0000 */
     FETCH(a0, 1)                           #  a0 <- 0000BBBB (zero-extended)
     GET_OPA(a3)                            #  a3 <- AA
     sll       a0, a0, 16                   #  a0 <- BBBB0000
@@ -949,69 +1170,62 @@
     .balign 128
 .L_op_const_wide_16: /* 0x16 */
 /* File: mips/op_const_wide_16.S */
-    # const-wide/16 vAA,                   /* +BBBB */
+    /* const-wide/16 vAA, +BBBB */
     FETCH_S(a0, 1)                         #  a0 <- ssssBBBB (sign-extended)
     GET_OPA(a3)                            #  a3 <- AA
     sra       a1, a0, 31                   #  a1 <- ssssssss
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG64(a0, a1, a3)                 #  vAA <- a0/a1
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG64_GOTO(a0, a1, a3, t0)        #  vAA/vAA+1 <- a0/a1
 
 /* ------------------------------ */
     .balign 128
 .L_op_const_wide_32: /* 0x17 */
 /* File: mips/op_const_wide_32.S */
-    # const-wide/32 vAA,                   /* +BBBBbbbb */
+    /* const-wide/32 vAA, +BBBBbbbb */
     FETCH(a0, 1)                           #  a0 <- 0000bbbb (low)
     GET_OPA(a3)                            #  a3 <- AA
     FETCH_S(a2, 2)                         #  a2 <- ssssBBBB (high)
     FETCH_ADVANCE_INST(3)                  #  advance rPC, load rINST
-    sll       a2, a2, 16
-    or        a0, a0, a2                   #  a0 <- BBBBbbbb
+    INSERT_HIGH_HALF(a0, a2)               #  a0 <- BBBBbbbb
     sra       a1, a0, 31                   #  a1 <- ssssssss
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG64(a0, a1, a3)                 #  vAA <- a0/a1
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG64_GOTO(a0, a1, a3, t0)        #  vAA/vAA+1 <- a0/a1
 
 /* ------------------------------ */
     .balign 128
 .L_op_const_wide: /* 0x18 */
 /* File: mips/op_const_wide.S */
-    # const-wide vAA,                      /* +HHHHhhhhBBBBbbbb */
+    /* const-wide vAA, +HHHHhhhhBBBBbbbb */
     FETCH(a0, 1)                           #  a0 <- bbbb (low)
     FETCH(a1, 2)                           #  a1 <- BBBB (low middle)
     FETCH(a2, 3)                           #  a2 <- hhhh (high middle)
-    sll       a1, 16 #
-    or        a0, a1                       #  a0 <- BBBBbbbb (low word)
+    INSERT_HIGH_HALF(a0, a1)               #  a0 <- BBBBbbbb (low word)
     FETCH(a3, 4)                           #  a3 <- HHHH (high)
     GET_OPA(t1)                            #  t1 <- AA
-    sll       a3, 16
-    or        a1, a3, a2                   #  a1 <- HHHHhhhh (high word)
+    INSERT_HIGH_HALF(a2, a3)               #  a2 <- HHHHhhhh (high word)
     FETCH_ADVANCE_INST(5)                  #  advance rPC, load rINST
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG64(a0, a1, t1)                 #  vAA <- a0/a1
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG64_GOTO(a0, a2, t1, t0)        #  vAA/vAA+1 <- a0/a2
 
 /* ------------------------------ */
     .balign 128
 .L_op_const_wide_high16: /* 0x19 */
 /* File: mips/op_const_wide_high16.S */
-    # const-wide/high16 vAA,               /* +BBBB000000000000 */
+    /* const-wide/high16 vAA, +BBBB000000000000 */
     FETCH(a1, 1)                           #  a1 <- 0000BBBB (zero-extended)
     GET_OPA(a3)                            #  a3 <- AA
     li        a0, 0                        #  a0 <- 00000000
     sll       a1, 16                       #  a1 <- BBBB0000
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG64(a0, a1, a3)                 #  vAA <- a0/a1
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG64_GOTO(a0, a1, a3, t0)        #  vAA/vAA+1 <- a0/a1
 
 /* ------------------------------ */
     .balign 128
 .L_op_const_string: /* 0x1a */
 /* File: mips/op_const_string.S */
-    # const/string vAA, String             /* BBBB */
+    /* const/string vAA, string@BBBB */
     EXPORT_PC()
     FETCH(a0, 1)                        # a0 <- BBBB
     GET_OPA(a1)                         # a1 <- AA
@@ -1028,13 +1242,12 @@
     .balign 128
 .L_op_const_string_jumbo: /* 0x1b */
 /* File: mips/op_const_string_jumbo.S */
-    # const/string vAA, String          /* BBBBBBBB */
+    /* const/string vAA, string@BBBBBBBB */
     EXPORT_PC()
     FETCH(a0, 1)                        # a0 <- bbbb (low)
     FETCH(a2, 2)                        # a2 <- BBBB (high)
     GET_OPA(a1)                         # a1 <- AA
-    sll    a2, a2, 16
-    or     a0, a0, a2                   # a0 <- BBBBbbbb
+    INSERT_HIGH_HALF(a0, a2)            # a0 <- BBBBbbbb
     addu   a2, rFP, OFF_FP_SHADOWFRAME  # a2 <- shadow frame
     move   a3, rSELF
     JAL(MterpConstString)               # v0 <- Mterp(index, tgt_reg, shadow_frame, self)
@@ -1048,7 +1261,7 @@
     .balign 128
 .L_op_const_class: /* 0x1c */
 /* File: mips/op_const_class.S */
-    # const/class vAA, Class               /* BBBB */
+    /* const/class vAA, class@BBBB */
     EXPORT_PC()
     FETCH(a0, 1)                        # a0 <- BBBB
     GET_OPA(a1)                         # a1 <- AA
@@ -1108,7 +1321,7 @@
     /*
      * Check to see if a cast from one class to another is allowed.
      */
-    # check-cast vAA, class                /* BBBB */
+    /* check-cast vAA, class@BBBB */
     EXPORT_PC()
     FETCH(a0, 1)                           #  a0 <- BBBB
     GET_OPA(a1)                            #  a1 <- AA
@@ -1132,7 +1345,7 @@
      * Most common situation is a non-null object, being compared against
      * an already-resolved class.
      */
-    # instance-of vA, vB, class            /* CCCC */
+    /* instance-of vA, vB, class@CCCC */
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- CCCC
     GET_OPB(a1)                            # a1 <- B
@@ -1155,6 +1368,7 @@
     /*
      * Return the length of an array.
      */
+    /* array-length vA, vB */
     GET_OPB(a1)                            #  a1 <- B
     GET_OPA4(a2)                           #  a2 <- A+
     GET_VREG(a0, a1)                       #  a0 <- vB (object ref)
@@ -1172,7 +1386,7 @@
     /*
      * Create a new instance of a class.
      */
-    # new-instance vAA, class              /* BBBB */
+    /* new-instance vAA, class@BBBB */
     EXPORT_PC()
     addu   a0, rFP, OFF_FP_SHADOWFRAME
     move   a1, rSELF
@@ -1215,8 +1429,8 @@
      *
      * for: filled-new-array, filled-new-array/range
      */
-    # op vB, {vD, vE, vF, vG, vA}, class   /* CCCC */
-    # op {vCCCC..v(CCCC+AA-1)}, type       /* BBBB */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, type@BBBB */
     .extern MterpFilledNewArray
     EXPORT_PC()
     addu   a0, rFP, OFF_FP_SHADOWFRAME     # a0 <- shadow frame
@@ -1238,8 +1452,8 @@
      *
      * for: filled-new-array, filled-new-array/range
      */
-    # op vB, {vD, vE, vF, vG, vA}, class   /* CCCC */
-    # op {vCCCC..v(CCCC+AA-1)}, type       /* BBBB */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, type@BBBB */
     .extern MterpFilledNewArrayRange
     EXPORT_PC()
     addu   a0, rFP, OFF_FP_SHADOWFRAME     # a0 <- shadow frame
@@ -1258,11 +1472,10 @@
 /* File: mips/op_fill_array_data.S */
     /* fill-array-data vAA, +BBBBBBBB */
     EXPORT_PC()
-    FETCH(a0, 1)                           #  a0 <- bbbb (lo)
-    FETCH(a1, 2)                           #  a1 <- BBBB (hi)
+    FETCH(a1, 1)                           #  a1 <- bbbb (lo)
+    FETCH(a0, 2)                           #  a0 <- BBBB (hi)
     GET_OPA(a3)                            #  a3 <- AA
-    sll       a1, a1, 16                   #  a1 <- BBBBbbbb
-    or        a1, a0, a1                   #  a1 <- BBBBbbbb
+    INSERT_HIGH_HALF(a1, a0)               #  a1 <- BBBBbbbb
     GET_VREG(a0, a3)                       #  a0 <- vAA (array object)
     EAS1(a1, rPC, a1)                      #  a1 <- PC + BBBBbbbb*2 (array data off.)
     JAL(MterpFillArrayData)                #  v0 <- Mterp(obj, payload)
@@ -1330,10 +1543,9 @@
      * our "backward branch" test must be "<=0" instead of "<0".
      */
     /* goto/32 +AAAAAAAA */
-    FETCH(a0, 1)                           #  a0 <- aaaa (lo)
+    FETCH(rINST, 1)                        #  rINST <- aaaa (lo)
     FETCH(a1, 2)                           #  a1 <- AAAA (hi)
-    sll       a1, a1, 16
-    or        rINST, a0, a1                #  rINST <- AAAAaaaa
+    INSERT_HIGH_HALF(rINST, a1)            #  rINST <- AAAAaaaa
     b         MterpCommonTakenBranchNoFlags
 
 /* ------------------------------ */
@@ -1353,8 +1565,7 @@
     FETCH(a0, 1)                           #  a0 <- bbbb (lo)
     FETCH(a1, 2)                           #  a1 <- BBBB (hi)
     GET_OPA(a3)                            #  a3 <- AA
-    sll       t0, a1, 16
-    or        a0, a0, t0                   #  a0 <- BBBBbbbb
+    INSERT_HIGH_HALF(a0, a1)               #  a0 <- BBBBbbbb
     GET_VREG(a1, a3)                       #  a1 <- vAA
     EAS1(a0, rPC, a0)                      #  a0 <- PC + BBBBbbbb*2
     JAL(MterpDoPackedSwitch)                             #  a0 <- code-unit branch offset
@@ -1379,8 +1590,7 @@
     FETCH(a0, 1)                           #  a0 <- bbbb (lo)
     FETCH(a1, 2)                           #  a1 <- BBBB (hi)
     GET_OPA(a3)                            #  a3 <- AA
-    sll       t0, a1, 16
-    or        a0, a0, t0                   #  a0 <- BBBBbbbb
+    INSERT_HIGH_HALF(a0, a1)               #  a0 <- BBBBbbbb
     GET_VREG(a1, a3)                       #  a1 <- vAA
     EAS1(a0, rPC, a0)                      #  a0 <- PC + BBBBbbbb*2
     JAL(MterpDoSparseSwitch)                             #  a0 <- code-unit branch offset
@@ -1393,55 +1603,54 @@
 .L_op_cmpl_float: /* 0x2d */
 /* File: mips/op_cmpl_float.S */
     /*
-     * Compare two floating-point values.  Puts 0, 1, or -1 into the
-     * destination register rTEMP based on the results of the comparison.
-     *
-     * Provide a "naninst" instruction that puts 1 or -1 into rTEMP depending
-     * on what value we'd like to return when one of the operands is NaN.
-     *
-     * The operation we're implementing is:
-     *   if (x == y)
-     *     return 0;
-     *   else if (x < y)
-     *     return -1;
-     *   else if (x > y)
-     *     return 1;
-     *   else
-     *     return {-1 or 1};  // one or both operands was NaN
+     * Compare two floating-point values. Puts 0(==), 1(>), or -1(<)
+     * into the destination register based on the comparison results.
      *
      * for: cmpl-float, cmpg-float
      */
     /* op vAA, vBB, vCC */
 
-    /* "clasic" form */
     FETCH(a0, 1)                           #  a0 <- CCBB
     and       a2, a0, 255                  #  a2 <- BB
     srl       a3, a0, 8
     GET_VREG_F(ft0, a2)
     GET_VREG_F(ft1, a3)
 #ifdef MIPS32REVGE6
-    cmp.lt.s  ft2, ft0, ft1               # Is ft0 < ft1
-    li        rTEMP, -1
-    bc1nez    ft2, .Lop_cmpl_float_finish
-    cmp.lt.s  ft2, ft1, ft0
-    li        rTEMP, 1
-    bc1nez    ft2, .Lop_cmpl_float_finish
     cmp.eq.s  ft2, ft0, ft1
     li        rTEMP, 0
-    bc1nez    ft2, .Lop_cmpl_float_finish
-    b         .Lop_cmpl_float_nan
-#else
-    c.olt.s   fcc0, ft0, ft1               # Is ft0 < ft1
+    bc1nez    ft2, 1f                      # done if vBB == vCC (ordered)
+    .if 0
+    cmp.lt.s  ft2, ft0, ft1
     li        rTEMP, -1
-    bc1t      fcc0, .Lop_cmpl_float_finish
-    c.olt.s   fcc0, ft1, ft0
+    bc1nez    ft2, 1f                      # done if vBB < vCC (ordered)
+    li        rTEMP, 1                     # vBB > vCC or unordered
+    .else
+    cmp.lt.s  ft2, ft1, ft0
     li        rTEMP, 1
-    bc1t      fcc0, .Lop_cmpl_float_finish
+    bc1nez    ft2, 1f                      # done if vBB > vCC (ordered)
+    li        rTEMP, -1                    # vBB < vCC or unordered
+    .endif
+#else
     c.eq.s    fcc0, ft0, ft1
     li        rTEMP, 0
-    bc1t      fcc0, .Lop_cmpl_float_finish
-    b         .Lop_cmpl_float_nan
+    bc1t      fcc0, 1f                     # done if vBB == vCC (ordered)
+    .if 0
+    c.olt.s   fcc0, ft0, ft1
+    li        rTEMP, -1
+    bc1t      fcc0, 1f                     # done if vBB < vCC (ordered)
+    li        rTEMP, 1                     # vBB > vCC or unordered
+    .else
+    c.olt.s   fcc0, ft1, ft0
+    li        rTEMP, 1
+    bc1t      fcc0, 1f                     # done if vBB > vCC (ordered)
+    li        rTEMP, -1                    # vBB < vCC or unordered
+    .endif
 #endif
+1:
+    GET_OPA(rOBJ)
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG_GOTO(rTEMP, rOBJ, t0)         #  vAA <- rTEMP
 
 /* ------------------------------ */
     .balign 128
@@ -1449,55 +1658,54 @@
 /* File: mips/op_cmpg_float.S */
 /* File: mips/op_cmpl_float.S */
     /*
-     * Compare two floating-point values.  Puts 0, 1, or -1 into the
-     * destination register rTEMP based on the results of the comparison.
-     *
-     * Provide a "naninst" instruction that puts 1 or -1 into rTEMP depending
-     * on what value we'd like to return when one of the operands is NaN.
-     *
-     * The operation we're implementing is:
-     *   if (x == y)
-     *     return 0;
-     *   else if (x < y)
-     *     return -1;
-     *   else if (x > y)
-     *     return 1;
-     *   else
-     *     return {-1 or 1};  // one or both operands was NaN
+     * Compare two floating-point values. Puts 0(==), 1(>), or -1(<)
+     * into the destination register based on the comparison results.
      *
      * for: cmpl-float, cmpg-float
      */
     /* op vAA, vBB, vCC */
 
-    /* "clasic" form */
     FETCH(a0, 1)                           #  a0 <- CCBB
     and       a2, a0, 255                  #  a2 <- BB
     srl       a3, a0, 8
     GET_VREG_F(ft0, a2)
     GET_VREG_F(ft1, a3)
 #ifdef MIPS32REVGE6
-    cmp.lt.s  ft2, ft0, ft1               # Is ft0 < ft1
-    li        rTEMP, -1
-    bc1nez    ft2, .Lop_cmpg_float_finish
-    cmp.lt.s  ft2, ft1, ft0
-    li        rTEMP, 1
-    bc1nez    ft2, .Lop_cmpg_float_finish
     cmp.eq.s  ft2, ft0, ft1
     li        rTEMP, 0
-    bc1nez    ft2, .Lop_cmpg_float_finish
-    b         .Lop_cmpg_float_nan
-#else
-    c.olt.s   fcc0, ft0, ft1               # Is ft0 < ft1
+    bc1nez    ft2, 1f                      # done if vBB == vCC (ordered)
+    .if 1
+    cmp.lt.s  ft2, ft0, ft1
     li        rTEMP, -1
-    bc1t      fcc0, .Lop_cmpg_float_finish
-    c.olt.s   fcc0, ft1, ft0
+    bc1nez    ft2, 1f                      # done if vBB < vCC (ordered)
+    li        rTEMP, 1                     # vBB > vCC or unordered
+    .else
+    cmp.lt.s  ft2, ft1, ft0
     li        rTEMP, 1
-    bc1t      fcc0, .Lop_cmpg_float_finish
+    bc1nez    ft2, 1f                      # done if vBB > vCC (ordered)
+    li        rTEMP, -1                    # vBB < vCC or unordered
+    .endif
+#else
     c.eq.s    fcc0, ft0, ft1
     li        rTEMP, 0
-    bc1t      fcc0, .Lop_cmpg_float_finish
-    b         .Lop_cmpg_float_nan
+    bc1t      fcc0, 1f                     # done if vBB == vCC (ordered)
+    .if 1
+    c.olt.s   fcc0, ft0, ft1
+    li        rTEMP, -1
+    bc1t      fcc0, 1f                     # done if vBB < vCC (ordered)
+    li        rTEMP, 1                     # vBB > vCC or unordered
+    .else
+    c.olt.s   fcc0, ft1, ft0
+    li        rTEMP, 1
+    bc1t      fcc0, 1f                     # done if vBB > vCC (ordered)
+    li        rTEMP, -1                    # vBB < vCC or unordered
+    .endif
 #endif
+1:
+    GET_OPA(rOBJ)
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG_GOTO(rTEMP, rOBJ, t0)         #  vAA <- rTEMP
 
 
 /* ------------------------------ */
@@ -1506,47 +1714,55 @@
 /* File: mips/op_cmpl_double.S */
     /*
      * Compare two floating-point values. Puts 0(==), 1(>), or -1(<)
-     * into the destination register (rTEMP) based on the comparison results.
-     *
-     * Provide a "naninst" instruction that puts 1 or -1 into rTEMP depending
-     * on what value we'd like to return when one of the operands is NaN.
-     *
-     * See op_cmpl_float for more details.
+     * into the destination register based on the comparison results.
      *
      * For: cmpl-double, cmpg-double
      */
     /* op vAA, vBB, vCC */
 
     FETCH(a0, 1)                           #  a0 <- CCBB
-    and       rOBJ, a0, 255                #  s5 <- BB
+    and       rOBJ, a0, 255                #  rOBJ <- BB
     srl       t0, a0, 8                    #  t0 <- CC
-    EAS2(rOBJ, rFP, rOBJ)                  #  s5 <- &fp[BB]
+    EAS2(rOBJ, rFP, rOBJ)                  #  rOBJ <- &fp[BB]
     EAS2(t0, rFP, t0)                      #  t0 <- &fp[CC]
     LOAD64_F(ft0, ft0f, rOBJ)
     LOAD64_F(ft1, ft1f, t0)
 #ifdef MIPS32REVGE6
-    cmp.lt.d  ft2, ft0, ft1
-    li        rTEMP, -1
-    bc1nez    ft2, .Lop_cmpl_double_finish
-    cmp.lt.d  ft2, ft1, ft0
-    li        rTEMP, 1
-    bc1nez    ft2, .Lop_cmpl_double_finish
     cmp.eq.d  ft2, ft0, ft1
     li        rTEMP, 0
-    bc1nez    ft2, .Lop_cmpl_double_finish
-    b         .Lop_cmpl_double_nan
-#else
-    c.olt.d   fcc0, ft0, ft1
+    bc1nez    ft2, 1f                      # done if vBB == vCC (ordered)
+    .if 0
+    cmp.lt.d  ft2, ft0, ft1
     li        rTEMP, -1
-    bc1t      fcc0, .Lop_cmpl_double_finish
-    c.olt.d   fcc0, ft1, ft0
+    bc1nez    ft2, 1f                      # done if vBB < vCC (ordered)
+    li        rTEMP, 1                     # vBB > vCC or unordered
+    .else
+    cmp.lt.d  ft2, ft1, ft0
     li        rTEMP, 1
-    bc1t      fcc0, .Lop_cmpl_double_finish
+    bc1nez    ft2, 1f                      # done if vBB > vCC (ordered)
+    li        rTEMP, -1                    # vBB < vCC or unordered
+    .endif
+#else
     c.eq.d    fcc0, ft0, ft1
     li        rTEMP, 0
-    bc1t      fcc0, .Lop_cmpl_double_finish
-    b         .Lop_cmpl_double_nan
+    bc1t      fcc0, 1f                     # done if vBB == vCC (ordered)
+    .if 0
+    c.olt.d   fcc0, ft0, ft1
+    li        rTEMP, -1
+    bc1t      fcc0, 1f                     # done if vBB < vCC (ordered)
+    li        rTEMP, 1                     # vBB > vCC or unordered
+    .else
+    c.olt.d   fcc0, ft1, ft0
+    li        rTEMP, 1
+    bc1t      fcc0, 1f                     # done if vBB > vCC (ordered)
+    li        rTEMP, -1                    # vBB < vCC or unordered
+    .endif
 #endif
+1:
+    GET_OPA(rOBJ)
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG_GOTO(rTEMP, rOBJ, t0)         #  vAA <- rTEMP
 
 /* ------------------------------ */
     .balign 128
@@ -1555,47 +1771,55 @@
 /* File: mips/op_cmpl_double.S */
     /*
      * Compare two floating-point values. Puts 0(==), 1(>), or -1(<)
-     * into the destination register (rTEMP) based on the comparison results.
-     *
-     * Provide a "naninst" instruction that puts 1 or -1 into rTEMP depending
-     * on what value we'd like to return when one of the operands is NaN.
-     *
-     * See op_cmpl_float for more details.
+     * into the destination register based on the comparison results.
      *
      * For: cmpl-double, cmpg-double
      */
     /* op vAA, vBB, vCC */
 
     FETCH(a0, 1)                           #  a0 <- CCBB
-    and       rOBJ, a0, 255                #  s5 <- BB
+    and       rOBJ, a0, 255                #  rOBJ <- BB
     srl       t0, a0, 8                    #  t0 <- CC
-    EAS2(rOBJ, rFP, rOBJ)                  #  s5 <- &fp[BB]
+    EAS2(rOBJ, rFP, rOBJ)                  #  rOBJ <- &fp[BB]
     EAS2(t0, rFP, t0)                      #  t0 <- &fp[CC]
     LOAD64_F(ft0, ft0f, rOBJ)
     LOAD64_F(ft1, ft1f, t0)
 #ifdef MIPS32REVGE6
-    cmp.lt.d  ft2, ft0, ft1
-    li        rTEMP, -1
-    bc1nez    ft2, .Lop_cmpg_double_finish
-    cmp.lt.d  ft2, ft1, ft0
-    li        rTEMP, 1
-    bc1nez    ft2, .Lop_cmpg_double_finish
     cmp.eq.d  ft2, ft0, ft1
     li        rTEMP, 0
-    bc1nez    ft2, .Lop_cmpg_double_finish
-    b         .Lop_cmpg_double_nan
-#else
-    c.olt.d   fcc0, ft0, ft1
+    bc1nez    ft2, 1f                      # done if vBB == vCC (ordered)
+    .if 1
+    cmp.lt.d  ft2, ft0, ft1
     li        rTEMP, -1
-    bc1t      fcc0, .Lop_cmpg_double_finish
-    c.olt.d   fcc0, ft1, ft0
+    bc1nez    ft2, 1f                      # done if vBB < vCC (ordered)
+    li        rTEMP, 1                     # vBB > vCC or unordered
+    .else
+    cmp.lt.d  ft2, ft1, ft0
     li        rTEMP, 1
-    bc1t      fcc0, .Lop_cmpg_double_finish
+    bc1nez    ft2, 1f                      # done if vBB > vCC (ordered)
+    li        rTEMP, -1                    # vBB < vCC or unordered
+    .endif
+#else
     c.eq.d    fcc0, ft0, ft1
     li        rTEMP, 0
-    bc1t      fcc0, .Lop_cmpg_double_finish
-    b         .Lop_cmpg_double_nan
+    bc1t      fcc0, 1f                     # done if vBB == vCC (ordered)
+    .if 1
+    c.olt.d   fcc0, ft0, ft1
+    li        rTEMP, -1
+    bc1t      fcc0, 1f                     # done if vBB < vCC (ordered)
+    li        rTEMP, 1                     # vBB > vCC or unordered
+    .else
+    c.olt.d   fcc0, ft1, ft0
+    li        rTEMP, 1
+    bc1t      fcc0, 1f                     # done if vBB > vCC (ordered)
+    li        rTEMP, -1                    # vBB < vCC or unordered
+    .endif
 #endif
+1:
+    GET_OPA(rOBJ)
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG_GOTO(rTEMP, rOBJ, t0)         #  vAA <- rTEMP
 
 
 /* ------------------------------ */
@@ -2015,11 +2239,7 @@
     # null array object?
     beqz      a0, common_errNullObject     #  yes, bail
     LOAD_base_offMirrorArray_length(a3, a0) #  a3 <- arrayObj->length
-    .if 2
     EASN(a0, a0, a1, 2)               #  a0 <- arrayObj + index*width
-    .else
-    addu      a0, a0, a1
-    .endif
     # a1 >= a3; compare unsigned index
     bgeu      a1, a3, common_errArrayIndex #  index >= length, bail
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
@@ -2074,10 +2294,9 @@
     lw   a1, THREAD_EXCEPTION_OFFSET(rSELF)
     PREFETCH_INST(2)                       #  load rINST
     bnez a1, MterpException
-    SET_VREG_OBJECT(v0, rOBJ)              #  vAA <- v0
     ADVANCE(2)                             #  advance rPC
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG_OBJECT_GOTO(v0, rOBJ, t0)     #  vAA <- v0
 
 /* ------------------------------ */
     .balign 128
@@ -2104,11 +2323,7 @@
     # null array object?
     beqz      a0, common_errNullObject     #  yes, bail
     LOAD_base_offMirrorArray_length(a3, a0) #  a3 <- arrayObj->length
-    .if 0
     EASN(a0, a0, a1, 0)               #  a0 <- arrayObj + index*width
-    .else
-    addu      a0, a0, a1
-    .endif
     # a1 >= a3; compare unsigned index
     bgeu      a1, a3, common_errArrayIndex #  index >= length, bail
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
@@ -2142,11 +2357,7 @@
     # null array object?
     beqz      a0, common_errNullObject     #  yes, bail
     LOAD_base_offMirrorArray_length(a3, a0) #  a3 <- arrayObj->length
-    .if 0
     EASN(a0, a0, a1, 0)               #  a0 <- arrayObj + index*width
-    .else
-    addu      a0, a0, a1
-    .endif
     # a1 >= a3; compare unsigned index
     bgeu      a1, a3, common_errArrayIndex #  index >= length, bail
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
@@ -2180,11 +2391,7 @@
     # null array object?
     beqz      a0, common_errNullObject     #  yes, bail
     LOAD_base_offMirrorArray_length(a3, a0) #  a3 <- arrayObj->length
-    .if 1
     EASN(a0, a0, a1, 1)               #  a0 <- arrayObj + index*width
-    .else
-    addu      a0, a0, a1
-    .endif
     # a1 >= a3; compare unsigned index
     bgeu      a1, a3, common_errArrayIndex #  index >= length, bail
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
@@ -2218,11 +2425,7 @@
     # null array object?
     beqz      a0, common_errNullObject     #  yes, bail
     LOAD_base_offMirrorArray_length(a3, a0) #  a3 <- arrayObj->length
-    .if 1
     EASN(a0, a0, a1, 1)               #  a0 <- arrayObj + index*width
-    .else
-    addu      a0, a0, a1
-    .endif
     # a1 >= a3; compare unsigned index
     bgeu      a1, a3, common_errArrayIndex #  index >= length, bail
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
@@ -2253,17 +2456,14 @@
     # null array object?
     beqz      a0, common_errNullObject     #  yes, bail
     LOAD_base_offMirrorArray_length(a3, a0) #  a3 <- arrayObj->length
-    .if 2
     EASN(a0, a0, a1, 2)               #  a0 <- arrayObj + index*width
-    .else
-    addu      a0, a0, a1
-    .endif
     bgeu      a1, a3, common_errArrayIndex #  index >= length, bail
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     GET_VREG(a2, rOBJ)                     #  a2 <- vAA
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GET_OPCODE_TARGET(t0)
     sw a2, MIRROR_INT_ARRAY_DATA_OFFSET(a0)            #  vBB[vCC] <- a2
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    JR(t0)                                 #  jump to next instruction
 
 /* ------------------------------ */
     .balign 128
@@ -2271,8 +2471,6 @@
 /* File: mips/op_aput_wide.S */
     /*
      * Array put, 64 bits.  vBB[vCC] <- vAA.
-     *
-     * Arrays of long/double are 64-bit aligned, so it's okay to use STRD.
      */
     /* aput-wide vAA, vBB, vCC */
     FETCH(a0, 1)                           #  a0 <- CCBB
@@ -2292,8 +2490,9 @@
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     LOAD64(a2, a3, rOBJ)                   #  a2/a3 <- vAA/vAA+1
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GET_OPCODE_TARGET(t0)
     STORE64_off(a2, a3, a0, MIRROR_WIDE_ARRAY_DATA_OFFSET) #  a2/a3 <- vBB[vCC]
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    JR(t0)                                 #  jump to next instruction
 
 /* ------------------------------ */
     .balign 128
@@ -2337,17 +2536,14 @@
     # null array object?
     beqz      a0, common_errNullObject     #  yes, bail
     LOAD_base_offMirrorArray_length(a3, a0) #  a3 <- arrayObj->length
-    .if 0
     EASN(a0, a0, a1, 0)               #  a0 <- arrayObj + index*width
-    .else
-    addu      a0, a0, a1
-    .endif
     bgeu      a1, a3, common_errArrayIndex #  index >= length, bail
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     GET_VREG(a2, rOBJ)                     #  a2 <- vAA
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GET_OPCODE_TARGET(t0)
     sb a2, MIRROR_BOOLEAN_ARRAY_DATA_OFFSET(a0)            #  vBB[vCC] <- a2
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    JR(t0)                                 #  jump to next instruction
 
 
 /* ------------------------------ */
@@ -2373,17 +2569,14 @@
     # null array object?
     beqz      a0, common_errNullObject     #  yes, bail
     LOAD_base_offMirrorArray_length(a3, a0) #  a3 <- arrayObj->length
-    .if 0
     EASN(a0, a0, a1, 0)               #  a0 <- arrayObj + index*width
-    .else
-    addu      a0, a0, a1
-    .endif
     bgeu      a1, a3, common_errArrayIndex #  index >= length, bail
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     GET_VREG(a2, rOBJ)                     #  a2 <- vAA
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GET_OPCODE_TARGET(t0)
     sb a2, MIRROR_BYTE_ARRAY_DATA_OFFSET(a0)            #  vBB[vCC] <- a2
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    JR(t0)                                 #  jump to next instruction
 
 
 /* ------------------------------ */
@@ -2409,17 +2602,14 @@
     # null array object?
     beqz      a0, common_errNullObject     #  yes, bail
     LOAD_base_offMirrorArray_length(a3, a0) #  a3 <- arrayObj->length
-    .if 1
     EASN(a0, a0, a1, 1)               #  a0 <- arrayObj + index*width
-    .else
-    addu      a0, a0, a1
-    .endif
     bgeu      a1, a3, common_errArrayIndex #  index >= length, bail
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     GET_VREG(a2, rOBJ)                     #  a2 <- vAA
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GET_OPCODE_TARGET(t0)
     sh a2, MIRROR_CHAR_ARRAY_DATA_OFFSET(a0)            #  vBB[vCC] <- a2
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    JR(t0)                                 #  jump to next instruction
 
 
 /* ------------------------------ */
@@ -2445,17 +2635,14 @@
     # null array object?
     beqz      a0, common_errNullObject     #  yes, bail
     LOAD_base_offMirrorArray_length(a3, a0) #  a3 <- arrayObj->length
-    .if 1
     EASN(a0, a0, a1, 1)               #  a0 <- arrayObj + index*width
-    .else
-    addu      a0, a0, a1
-    .endif
     bgeu      a1, a3, common_errArrayIndex #  index >= length, bail
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     GET_VREG(a2, rOBJ)                     #  a2 <- vAA
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GET_OPCODE_TARGET(t0)
     sh a2, MIRROR_SHORT_ARRAY_DATA_OFFSET(a0)            #  vBB[vCC] <- a2
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    JR(t0)                                 #  jump to next instruction
 
 
 /* ------------------------------ */
@@ -2467,6 +2654,7 @@
      *
      * for: iget, iget-object, iget-boolean, iget-byte, iget-char, iget-short
      */
+    /* op vA, vB, field@CCCC */
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field ref CCCC
     GET_OPB(a1)                            # a1 <- B
@@ -2478,14 +2666,13 @@
     GET_OPA4(a2)                           # a2<- A+
     PREFETCH_INST(2)                       # load rINST
     bnez  a3, MterpPossibleException        # bail out
-    .if 0
-    SET_VREG_OBJECT(v0, a2)                # fp[A] <- v0
-    .else
-    SET_VREG(v0, a2)                       # fp[A] <- v0
-    .endif
     ADVANCE(2)                             #  advance rPC
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    .if 0
+    SET_VREG_OBJECT_GOTO(v0, a2, t0)       # fp[A] <- v0
+    .else
+    SET_VREG_GOTO(v0, a2, t0)              # fp[A] <- v0
+    .endif
 
 /* ------------------------------ */
     .balign 128
@@ -2496,6 +2683,7 @@
      *
      * for: iget-wide
      */
+    /* op vA, vB, field@CCCC */
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field byte offset
     GET_OPB(a1)                            # a1 <- B
@@ -2507,10 +2695,9 @@
     GET_OPA4(a2)                           # a2<- A+
     PREFETCH_INST(2)                       # load rINST
     bnez a3, MterpException                # bail out
-    SET_VREG64(v0, v1, a2)                 # fp[A] <- v0/v1
     ADVANCE(2)                             # advance rPC
     GET_INST_OPCODE(t0)                    # extract opcode from rINST
-    GOTO_OPCODE(t0)                        # jump to next instruction
+    SET_VREG64_GOTO(v0, v1, a2, t0)        # fp[A] <- v0/v1
 
 /* ------------------------------ */
     .balign 128
@@ -2522,6 +2709,7 @@
      *
      * for: iget, iget-object, iget-boolean, iget-byte, iget-char, iget-short
      */
+    /* op vA, vB, field@CCCC */
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field ref CCCC
     GET_OPB(a1)                            # a1 <- B
@@ -2533,14 +2721,13 @@
     GET_OPA4(a2)                           # a2<- A+
     PREFETCH_INST(2)                       # load rINST
     bnez  a3, MterpPossibleException        # bail out
-    .if 1
-    SET_VREG_OBJECT(v0, a2)                # fp[A] <- v0
-    .else
-    SET_VREG(v0, a2)                       # fp[A] <- v0
-    .endif
     ADVANCE(2)                             #  advance rPC
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    .if 1
+    SET_VREG_OBJECT_GOTO(v0, a2, t0)       # fp[A] <- v0
+    .else
+    SET_VREG_GOTO(v0, a2, t0)              # fp[A] <- v0
+    .endif
 
 
 /* ------------------------------ */
@@ -2553,6 +2740,7 @@
      *
      * for: iget, iget-object, iget-boolean, iget-byte, iget-char, iget-short
      */
+    /* op vA, vB, field@CCCC */
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field ref CCCC
     GET_OPB(a1)                            # a1 <- B
@@ -2564,14 +2752,13 @@
     GET_OPA4(a2)                           # a2<- A+
     PREFETCH_INST(2)                       # load rINST
     bnez  a3, MterpPossibleException        # bail out
-    .if 0
-    SET_VREG_OBJECT(v0, a2)                # fp[A] <- v0
-    .else
-    SET_VREG(v0, a2)                       # fp[A] <- v0
-    .endif
     ADVANCE(2)                             #  advance rPC
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    .if 0
+    SET_VREG_OBJECT_GOTO(v0, a2, t0)       # fp[A] <- v0
+    .else
+    SET_VREG_GOTO(v0, a2, t0)              # fp[A] <- v0
+    .endif
 
 
 /* ------------------------------ */
@@ -2584,6 +2771,7 @@
      *
      * for: iget, iget-object, iget-boolean, iget-byte, iget-char, iget-short
      */
+    /* op vA, vB, field@CCCC */
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field ref CCCC
     GET_OPB(a1)                            # a1 <- B
@@ -2595,14 +2783,13 @@
     GET_OPA4(a2)                           # a2<- A+
     PREFETCH_INST(2)                       # load rINST
     bnez  a3, MterpPossibleException        # bail out
-    .if 0
-    SET_VREG_OBJECT(v0, a2)                # fp[A] <- v0
-    .else
-    SET_VREG(v0, a2)                       # fp[A] <- v0
-    .endif
     ADVANCE(2)                             #  advance rPC
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    .if 0
+    SET_VREG_OBJECT_GOTO(v0, a2, t0)       # fp[A] <- v0
+    .else
+    SET_VREG_GOTO(v0, a2, t0)              # fp[A] <- v0
+    .endif
 
 
 /* ------------------------------ */
@@ -2615,6 +2802,7 @@
      *
      * for: iget, iget-object, iget-boolean, iget-byte, iget-char, iget-short
      */
+    /* op vA, vB, field@CCCC */
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field ref CCCC
     GET_OPB(a1)                            # a1 <- B
@@ -2626,14 +2814,13 @@
     GET_OPA4(a2)                           # a2<- A+
     PREFETCH_INST(2)                       # load rINST
     bnez  a3, MterpPossibleException        # bail out
-    .if 0
-    SET_VREG_OBJECT(v0, a2)                # fp[A] <- v0
-    .else
-    SET_VREG(v0, a2)                       # fp[A] <- v0
-    .endif
     ADVANCE(2)                             #  advance rPC
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    .if 0
+    SET_VREG_OBJECT_GOTO(v0, a2, t0)       # fp[A] <- v0
+    .else
+    SET_VREG_GOTO(v0, a2, t0)              # fp[A] <- v0
+    .endif
 
 
 /* ------------------------------ */
@@ -2646,6 +2833,7 @@
      *
      * for: iget, iget-object, iget-boolean, iget-byte, iget-char, iget-short
      */
+    /* op vA, vB, field@CCCC */
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field ref CCCC
     GET_OPB(a1)                            # a1 <- B
@@ -2657,14 +2845,13 @@
     GET_OPA4(a2)                           # a2<- A+
     PREFETCH_INST(2)                       # load rINST
     bnez  a3, MterpPossibleException        # bail out
-    .if 0
-    SET_VREG_OBJECT(v0, a2)                # fp[A] <- v0
-    .else
-    SET_VREG(v0, a2)                       # fp[A] <- v0
-    .endif
     ADVANCE(2)                             #  advance rPC
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    .if 0
+    SET_VREG_OBJECT_GOTO(v0, a2, t0)       # fp[A] <- v0
+    .else
+    SET_VREG_GOTO(v0, a2, t0)              # fp[A] <- v0
+    .endif
 
 
 /* ------------------------------ */
@@ -2676,7 +2863,7 @@
      *
      * for: iput, iput-boolean, iput-byte, iput-char, iput-short
      */
-    # op vA, vB, field                     /* CCCC */
+    /* op vA, vB, field@CCCC */
     .extern artSet32InstanceFromMterp
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field ref CCCC
@@ -2696,7 +2883,7 @@
     .balign 128
 .L_op_iput_wide: /* 0x5a */
 /* File: mips/op_iput_wide.S */
-    # iput-wide vA, vB, field              /* CCCC */
+    /* iput-wide vA, vB, field@CCCC */
     .extern artSet64InstanceFromMterp
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field ref CCCC
@@ -2721,7 +2908,7 @@
      *
      * for: iput-object, iput-object-volatile
      */
-    # op vA, vB, field                     /* CCCC */
+    /* op vA, vB, field@CCCC */
     EXPORT_PC()
     addu   a0, rFP, OFF_FP_SHADOWFRAME
     move   a1, rPC
@@ -2743,7 +2930,7 @@
      *
      * for: iput, iput-boolean, iput-byte, iput-char, iput-short
      */
-    # op vA, vB, field                     /* CCCC */
+    /* op vA, vB, field@CCCC */
     .extern artSet8InstanceFromMterp
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field ref CCCC
@@ -2770,7 +2957,7 @@
      *
      * for: iput, iput-boolean, iput-byte, iput-char, iput-short
      */
-    # op vA, vB, field                     /* CCCC */
+    /* op vA, vB, field@CCCC */
     .extern artSet8InstanceFromMterp
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field ref CCCC
@@ -2797,7 +2984,7 @@
      *
      * for: iput, iput-boolean, iput-byte, iput-char, iput-short
      */
-    # op vA, vB, field                     /* CCCC */
+    /* op vA, vB, field@CCCC */
     .extern artSet16InstanceFromMterp
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field ref CCCC
@@ -2824,7 +3011,7 @@
      *
      * for: iput, iput-boolean, iput-byte, iput-char, iput-short
      */
-    # op vA, vB, field                     /* CCCC */
+    /* op vA, vB, field@CCCC */
     .extern artSet16InstanceFromMterp
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field ref CCCC
@@ -2850,7 +3037,7 @@
      *
      * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
      */
-    # op vAA, field                        /* BBBB */
+    /* op vAA, field@BBBB */
     .extern artGet32StaticFromCode
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field ref BBBB
@@ -2861,14 +3048,13 @@
     GET_OPA(a2)                            # a2 <- AA
     PREFETCH_INST(2)
     bnez  a3, MterpException               # bail out
-.if 0
-    SET_VREG_OBJECT(v0, a2)                # fp[AA] <- v0
-.else
-    SET_VREG(v0, a2)                       # fp[AA] <- v0
-.endif
     ADVANCE(2)
     GET_INST_OPCODE(t0)                    # extract opcode from rINST
-    GOTO_OPCODE(t0)                        # jump to next instruction
+.if 0
+    SET_VREG_OBJECT_GOTO(v0, a2, t0)       # fp[AA] <- v0
+.else
+    SET_VREG_GOTO(v0, a2, t0)              # fp[AA] <- v0
+.endif
 
 /* ------------------------------ */
     .balign 128
@@ -2877,7 +3063,7 @@
     /*
      * 64-bit SGET handler.
      */
-    # sget-wide vAA, field                 /* BBBB */
+    /* sget-wide vAA, field@BBBB */
     .extern artGet64StaticFromCode
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field ref BBBB
@@ -2888,9 +3074,8 @@
     bnez  a3, MterpException
     GET_OPA(a1)                            # a1 <- AA
     FETCH_ADVANCE_INST(2)                  # advance rPC, load rINST
-    SET_VREG64(v0, v1, a1)                 # vAA/vAA+1 <- v0/v1
     GET_INST_OPCODE(t0)                    # extract opcode from rINST
-    GOTO_OPCODE(t0)                        # jump to next instruction
+    SET_VREG64_GOTO(v0, v1, a1, t0)        # vAA/vAA+1 <- v0/v1
 
 /* ------------------------------ */
     .balign 128
@@ -2902,7 +3087,7 @@
      *
      * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
      */
-    # op vAA, field                        /* BBBB */
+    /* op vAA, field@BBBB */
     .extern artGetObjStaticFromCode
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field ref BBBB
@@ -2913,14 +3098,13 @@
     GET_OPA(a2)                            # a2 <- AA
     PREFETCH_INST(2)
     bnez  a3, MterpException               # bail out
-.if 1
-    SET_VREG_OBJECT(v0, a2)                # fp[AA] <- v0
-.else
-    SET_VREG(v0, a2)                       # fp[AA] <- v0
-.endif
     ADVANCE(2)
     GET_INST_OPCODE(t0)                    # extract opcode from rINST
-    GOTO_OPCODE(t0)                        # jump to next instruction
+.if 1
+    SET_VREG_OBJECT_GOTO(v0, a2, t0)       # fp[AA] <- v0
+.else
+    SET_VREG_GOTO(v0, a2, t0)              # fp[AA] <- v0
+.endif
 
 
 /* ------------------------------ */
@@ -2933,7 +3117,7 @@
      *
      * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
      */
-    # op vAA, field                        /* BBBB */
+    /* op vAA, field@BBBB */
     .extern artGetBooleanStaticFromCode
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field ref BBBB
@@ -2944,14 +3128,13 @@
     GET_OPA(a2)                            # a2 <- AA
     PREFETCH_INST(2)
     bnez  a3, MterpException               # bail out
-.if 0
-    SET_VREG_OBJECT(v0, a2)                # fp[AA] <- v0
-.else
-    SET_VREG(v0, a2)                       # fp[AA] <- v0
-.endif
     ADVANCE(2)
     GET_INST_OPCODE(t0)                    # extract opcode from rINST
-    GOTO_OPCODE(t0)                        # jump to next instruction
+.if 0
+    SET_VREG_OBJECT_GOTO(v0, a2, t0)       # fp[AA] <- v0
+.else
+    SET_VREG_GOTO(v0, a2, t0)              # fp[AA] <- v0
+.endif
 
 
 /* ------------------------------ */
@@ -2964,7 +3147,7 @@
      *
      * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
      */
-    # op vAA, field                        /* BBBB */
+    /* op vAA, field@BBBB */
     .extern artGetByteStaticFromCode
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field ref BBBB
@@ -2975,14 +3158,13 @@
     GET_OPA(a2)                            # a2 <- AA
     PREFETCH_INST(2)
     bnez  a3, MterpException               # bail out
-.if 0
-    SET_VREG_OBJECT(v0, a2)                # fp[AA] <- v0
-.else
-    SET_VREG(v0, a2)                       # fp[AA] <- v0
-.endif
     ADVANCE(2)
     GET_INST_OPCODE(t0)                    # extract opcode from rINST
-    GOTO_OPCODE(t0)                        # jump to next instruction
+.if 0
+    SET_VREG_OBJECT_GOTO(v0, a2, t0)       # fp[AA] <- v0
+.else
+    SET_VREG_GOTO(v0, a2, t0)              # fp[AA] <- v0
+.endif
 
 
 /* ------------------------------ */
@@ -2995,7 +3177,7 @@
      *
      * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
      */
-    # op vAA, field                        /* BBBB */
+    /* op vAA, field@BBBB */
     .extern artGetCharStaticFromCode
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field ref BBBB
@@ -3006,14 +3188,13 @@
     GET_OPA(a2)                            # a2 <- AA
     PREFETCH_INST(2)
     bnez  a3, MterpException               # bail out
-.if 0
-    SET_VREG_OBJECT(v0, a2)                # fp[AA] <- v0
-.else
-    SET_VREG(v0, a2)                       # fp[AA] <- v0
-.endif
     ADVANCE(2)
     GET_INST_OPCODE(t0)                    # extract opcode from rINST
-    GOTO_OPCODE(t0)                        # jump to next instruction
+.if 0
+    SET_VREG_OBJECT_GOTO(v0, a2, t0)       # fp[AA] <- v0
+.else
+    SET_VREG_GOTO(v0, a2, t0)              # fp[AA] <- v0
+.endif
 
 
 /* ------------------------------ */
@@ -3026,7 +3207,7 @@
      *
      * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
      */
-    # op vAA, field                        /* BBBB */
+    /* op vAA, field@BBBB */
     .extern artGetShortStaticFromCode
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field ref BBBB
@@ -3037,14 +3218,13 @@
     GET_OPA(a2)                            # a2 <- AA
     PREFETCH_INST(2)
     bnez  a3, MterpException               # bail out
-.if 0
-    SET_VREG_OBJECT(v0, a2)                # fp[AA] <- v0
-.else
-    SET_VREG(v0, a2)                       # fp[AA] <- v0
-.endif
     ADVANCE(2)
     GET_INST_OPCODE(t0)                    # extract opcode from rINST
-    GOTO_OPCODE(t0)                        # jump to next instruction
+.if 0
+    SET_VREG_OBJECT_GOTO(v0, a2, t0)       # fp[AA] <- v0
+.else
+    SET_VREG_GOTO(v0, a2, t0)              # fp[AA] <- v0
+.endif
 
 
 /* ------------------------------ */
@@ -3056,7 +3236,7 @@
      *
      * for: sput, sput-boolean, sput-byte, sput-char, sput-short
      */
-    # op vAA, field                        /* BBBB */
+    /* op vAA, field@BBBB */
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field ref BBBB
     GET_OPA(a3)                            # a3 <- AA
@@ -3077,7 +3257,7 @@
     /*
      * 64-bit SPUT handler.
      */
-    # sput-wide vAA, field                 /* BBBB */
+    /* sput-wide vAA, field@BBBB */
     .extern artSet64IndirectStaticFromMterp
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field ref CCCC
@@ -3123,7 +3303,7 @@
      *
      * for: sput, sput-boolean, sput-byte, sput-char, sput-short
      */
-    # op vAA, field                        /* BBBB */
+    /* op vAA, field@BBBB */
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field ref BBBB
     GET_OPA(a3)                            # a3 <- AA
@@ -3148,7 +3328,7 @@
      *
      * for: sput, sput-boolean, sput-byte, sput-char, sput-short
      */
-    # op vAA, field                        /* BBBB */
+    /* op vAA, field@BBBB */
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field ref BBBB
     GET_OPA(a3)                            # a3 <- AA
@@ -3173,7 +3353,7 @@
      *
      * for: sput, sput-boolean, sput-byte, sput-char, sput-short
      */
-    # op vAA, field                        /* BBBB */
+    /* op vAA, field@BBBB */
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field ref BBBB
     GET_OPA(a3)                            # a3 <- AA
@@ -3198,7 +3378,7 @@
      *
      * for: sput, sput-boolean, sput-byte, sput-char, sput-short
      */
-    # op vAA, field                        /* BBBB */
+    /* op vAA, field@BBBB */
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field ref BBBB
     GET_OPA(a3)                            # a3 <- AA
@@ -3221,8 +3401,8 @@
     /*
      * Generic invoke handler wrapper.
      */
-    # op vB, {vD, vE, vF, vG, vA}, class   /* CCCC */
-    # op {vCCCC..v(CCCC+AA-1)}, meth       /* BBBB */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
     .extern MterpInvokeVirtual
     EXPORT_PC()
     move    a0, rSELF
@@ -3246,8 +3426,8 @@
     /*
      * Generic invoke handler wrapper.
      */
-    # op vB, {vD, vE, vF, vG, vA}, class   /* CCCC */
-    # op {vCCCC..v(CCCC+AA-1)}, meth       /* BBBB */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
     .extern MterpInvokeSuper
     EXPORT_PC()
     move    a0, rSELF
@@ -3271,8 +3451,8 @@
     /*
      * Generic invoke handler wrapper.
      */
-    # op vB, {vD, vE, vF, vG, vA}, class   /* CCCC */
-    # op {vCCCC..v(CCCC+AA-1)}, meth       /* BBBB */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
     .extern MterpInvokeDirect
     EXPORT_PC()
     move    a0, rSELF
@@ -3296,8 +3476,8 @@
     /*
      * Generic invoke handler wrapper.
      */
-    # op vB, {vD, vE, vF, vG, vA}, class   /* CCCC */
-    # op {vCCCC..v(CCCC+AA-1)}, meth       /* BBBB */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
     .extern MterpInvokeStatic
     EXPORT_PC()
     move    a0, rSELF
@@ -3321,8 +3501,8 @@
     /*
      * Generic invoke handler wrapper.
      */
-    # op vB, {vD, vE, vF, vG, vA}, class   /* CCCC */
-    # op {vCCCC..v(CCCC+AA-1)}, meth       /* BBBB */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
     .extern MterpInvokeInterface
     EXPORT_PC()
     move    a0, rSELF
@@ -3344,7 +3524,7 @@
 /* File: mips/op_return_void_no_barrier.S */
     lw     ra, THREAD_FLAGS_OFFSET(rSELF)
     move   a0, rSELF
-    and    ra, (THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    and    ra, THREAD_SUSPEND_OR_CHECKPOINT_REQUEST
     beqz   ra, 1f
     JAL(MterpSuspendCheck)                 # (self)
 1:
@@ -3360,8 +3540,8 @@
     /*
      * Generic invoke handler wrapper.
      */
-    # op vB, {vD, vE, vF, vG, vA}, class   /* CCCC */
-    # op {vCCCC..v(CCCC+AA-1)}, meth       /* BBBB */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
     .extern MterpInvokeVirtualRange
     EXPORT_PC()
     move    a0, rSELF
@@ -3385,8 +3565,8 @@
     /*
      * Generic invoke handler wrapper.
      */
-    # op vB, {vD, vE, vF, vG, vA}, class   /* CCCC */
-    # op {vCCCC..v(CCCC+AA-1)}, meth       /* BBBB */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
     .extern MterpInvokeSuperRange
     EXPORT_PC()
     move    a0, rSELF
@@ -3410,8 +3590,8 @@
     /*
      * Generic invoke handler wrapper.
      */
-    # op vB, {vD, vE, vF, vG, vA}, class   /* CCCC */
-    # op {vCCCC..v(CCCC+AA-1)}, meth       /* BBBB */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
     .extern MterpInvokeDirectRange
     EXPORT_PC()
     move    a0, rSELF
@@ -3435,8 +3615,8 @@
     /*
      * Generic invoke handler wrapper.
      */
-    # op vB, {vD, vE, vF, vG, vA}, class   /* CCCC */
-    # op {vCCCC..v(CCCC+AA-1)}, meth       /* BBBB */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
     .extern MterpInvokeStaticRange
     EXPORT_PC()
     move    a0, rSELF
@@ -3460,8 +3640,8 @@
     /*
      * Generic invoke handler wrapper.
      */
-    # op vB, {vD, vE, vF, vG, vA}, class   /* CCCC */
-    # op {vCCCC..v(CCCC+AA-1)}, meth       /* BBBB */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
     .extern MterpInvokeInterfaceRange
     EXPORT_PC()
     move    a0, rSELF
@@ -3506,11 +3686,11 @@
 /* File: mips/unop.S */
     /*
      * Generic 32-bit unary operation.  Provide an "instr" line that
-     * specifies an instruction that performs "result = op a0".
+     * specifies an instruction that performs "result0 = op a0".
      * This could be a MIPS instruction or a function call.
      *
-     * for: neg-int, not-int, neg-float, int-to-float, float-to-int,
-     *      int-to-byte, int-to-char, int-to-short
+     * for: int-to-byte, int-to-char, int-to-short,
+     *      neg-int, not-int, neg-float
      */
     /* unop vA, vB */
     GET_OPB(a3)                            #  a3 <- B
@@ -3520,8 +3700,7 @@
                                   #  optional op
     negu a0, a0                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t1)                    #  extract opcode from rINST
-    SET_VREG_GOTO(a0, t0, t1)        #  vAA <- result0
-    /* 9-10 instructions */
+    SET_VREG_GOTO(a0, t0, t1)        #  vA <- result0
 
 
 /* ------------------------------ */
@@ -3531,11 +3710,11 @@
 /* File: mips/unop.S */
     /*
      * Generic 32-bit unary operation.  Provide an "instr" line that
-     * specifies an instruction that performs "result = op a0".
+     * specifies an instruction that performs "result0 = op a0".
      * This could be a MIPS instruction or a function call.
      *
-     * for: neg-int, not-int, neg-float, int-to-float, float-to-int,
-     *      int-to-byte, int-to-char, int-to-short
+     * for: int-to-byte, int-to-char, int-to-short,
+     *      neg-int, not-int, neg-float
      */
     /* unop vA, vB */
     GET_OPB(a3)                            #  a3 <- B
@@ -3545,8 +3724,7 @@
                                   #  optional op
     not a0, a0                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t1)                    #  extract opcode from rINST
-    SET_VREG_GOTO(a0, t0, t1)        #  vAA <- result0
-    /* 9-10 instructions */
+    SET_VREG_GOTO(a0, t0, t1)        #  vA <- result0
 
 
 /* ------------------------------ */
@@ -3556,7 +3734,7 @@
 /* File: mips/unopWide.S */
     /*
      * Generic 64-bit unary operation.  Provide an "instr" line that
-     * specifies an instruction that performs "result = op a0/a1".
+     * specifies an instruction that performs "result0/result1 = op a0/a1".
      * This could be MIPS instruction or a function call.
      *
      * For: neg-long, not-long, neg-double,
@@ -3565,14 +3743,12 @@
     GET_OPA4(rOBJ)                         #  rOBJ <- A+
     GET_OPB(a3)                            #  a3 <- B
     EAS2(a3, rFP, a3)                      #  a3 <- &fp[B]
-    LOAD64(a0, a1, a3)                     #  a0/a1 <- vAA
+    LOAD64(a0, a1, a3)                     #  a0/a1 <- vA
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
     negu v0, a0                              #  optional op
     negu v1, a1; sltu a0, zero, v0; subu v1, v1, a0                                 #  a0/a1 <- op, a2-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG64(v0, v1, rOBJ)   #  vAA <- a0/a1
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-    /* 12-13 instructions */
+    SET_VREG64_GOTO(v0, v1, rOBJ, t0)   #  vA/vA+1 <- a0/a1
 
 
 /* ------------------------------ */
@@ -3582,7 +3758,7 @@
 /* File: mips/unopWide.S */
     /*
      * Generic 64-bit unary operation.  Provide an "instr" line that
-     * specifies an instruction that performs "result = op a0/a1".
+     * specifies an instruction that performs "result0/result1 = op a0/a1".
      * This could be MIPS instruction or a function call.
      *
      * For: neg-long, not-long, neg-double,
@@ -3591,14 +3767,12 @@
     GET_OPA4(rOBJ)                         #  rOBJ <- A+
     GET_OPB(a3)                            #  a3 <- B
     EAS2(a3, rFP, a3)                      #  a3 <- &fp[B]
-    LOAD64(a0, a1, a3)                     #  a0/a1 <- vAA
+    LOAD64(a0, a1, a3)                     #  a0/a1 <- vA
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
     not a0, a0                              #  optional op
     not a1, a1                                 #  a0/a1 <- op, a2-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG64(a0, a1, rOBJ)   #  vAA <- a0/a1
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-    /* 12-13 instructions */
+    SET_VREG64_GOTO(a0, a1, rOBJ, t0)   #  vA/vA+1 <- a0/a1
 
 
 /* ------------------------------ */
@@ -3608,11 +3782,11 @@
 /* File: mips/unop.S */
     /*
      * Generic 32-bit unary operation.  Provide an "instr" line that
-     * specifies an instruction that performs "result = op a0".
+     * specifies an instruction that performs "result0 = op a0".
      * This could be a MIPS instruction or a function call.
      *
-     * for: neg-int, not-int, neg-float, int-to-float, float-to-int,
-     *      int-to-byte, int-to-char, int-to-short
+     * for: int-to-byte, int-to-char, int-to-short,
+     *      neg-int, not-int, neg-float
      */
     /* unop vA, vB */
     GET_OPB(a3)                            #  a3 <- B
@@ -3622,8 +3796,7 @@
                                   #  optional op
     addu a0, a0, 0x80000000                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t1)                    #  extract opcode from rINST
-    SET_VREG_GOTO(a0, t0, t1)        #  vAA <- result0
-    /* 9-10 instructions */
+    SET_VREG_GOTO(a0, t0, t1)        #  vA <- result0
 
 
 /* ------------------------------ */
@@ -3633,7 +3806,7 @@
 /* File: mips/unopWide.S */
     /*
      * Generic 64-bit unary operation.  Provide an "instr" line that
-     * specifies an instruction that performs "result = op a0/a1".
+     * specifies an instruction that performs "result0/result1 = op a0/a1".
      * This could be MIPS instruction or a function call.
      *
      * For: neg-long, not-long, neg-double,
@@ -3642,14 +3815,12 @@
     GET_OPA4(rOBJ)                         #  rOBJ <- A+
     GET_OPB(a3)                            #  a3 <- B
     EAS2(a3, rFP, a3)                      #  a3 <- &fp[B]
-    LOAD64(a0, a1, a3)                     #  a0/a1 <- vAA
+    LOAD64(a0, a1, a3)                     #  a0/a1 <- vA
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
                                   #  optional op
     addu a1, a1, 0x80000000                                 #  a0/a1 <- op, a2-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG64(a0, a1, rOBJ)   #  vAA <- a0/a1
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-    /* 12-13 instructions */
+    SET_VREG64_GOTO(a0, a1, rOBJ, t0)   #  vA/vA+1 <- a0/a1
 
 
 /* ------------------------------ */
@@ -3659,8 +3830,7 @@
 /* File: mips/unopWider.S */
     /*
      * Generic 32bit-to-64bit unary operation.  Provide an "instr" line
-     * that specifies an instruction that performs "result = op a0", where
-     * "result" is a 64-bit quantity in a0/a1.
+     * that specifies an instruction that performs "result0/result1 = op a0".
      *
      * For: int-to-long
      */
@@ -3672,9 +3842,7 @@
                                   #  optional op
     sra a1, a0, 31                                 #  result <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG64(a0, a1, rOBJ)   #  vA/vA+1 <- a0/a1
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-    /* 10-11 instructions */
+    SET_VREG64_GOTO(a0, a1, rOBJ, t0)   #  vA/vA+1 <- a0/a1
 
 
 /* ------------------------------ */
@@ -3683,23 +3851,20 @@
 /* File: mips/op_int_to_float.S */
 /* File: mips/funop.S */
     /*
-     * Generic 32-bit unary operation.  Provide an "instr" line that
-     * specifies an instruction that performs "result = op a0".
+     * Generic 32-bit floating-point unary operation.  Provide an "instr"
+     * line that specifies an instruction that performs "fv0 = op fa0".
      * This could be a MIPS instruction or a function call.
      *
-     * for: int-to-float, float-to-int
+     * for: int-to-float
      */
     /* unop vA, vB */
     GET_OPB(a3)                            #  a3 <- B
-    GET_OPA4(rOBJ)                         #  t0 <- A+
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
     GET_VREG_F(fa0, a3)
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
     cvt.s.w fv0, fa0
-
-.Lop_int_to_float_set_vreg_f:
-    SET_VREG_F(fv0, rOBJ)
     GET_INST_OPCODE(t1)                    #  extract opcode from rINST
-    GOTO_OPCODE(t1)                        #  jump to next instruction
+    SET_VREG_F_GOTO(fv0, rOBJ, t1)         #  vA <- fv0
 
 
 /* ------------------------------ */
@@ -3708,11 +3873,10 @@
 /* File: mips/op_int_to_double.S */
 /* File: mips/funopWider.S */
     /*
-     * Generic 32bit-to-64bit unary operation.  Provide an "instr" line
-     * that specifies an instruction that performs "result = op a0", where
-     * "result" is a 64-bit quantity in a0/a1.
+     * Generic 32bit-to-64bit floating-point unary operation.  Provide an "instr"
+     * line that specifies an instruction that performs "fv0 = op fa0".
      *
-     * For: int-to-double, float-to-long, float-to-double
+     * For: int-to-double, float-to-double
      */
     /* unop vA, vB */
     GET_OPA4(rOBJ)                         #  rOBJ <- A+
@@ -3720,11 +3884,8 @@
     GET_VREG_F(fa0, a3)
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
     cvt.d.w fv0, fa0
-
-.Lop_int_to_double_set_vreg:
-    SET_VREG64_F(fv0, fv0f, rOBJ)                             #  vA/vA+1 <- a0/a1
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG64_F_GOTO(fv0, fv0f, rOBJ, t0) #  vA/vA+1 <- fv0
 
 
 /* ------------------------------ */
@@ -3741,120 +3902,157 @@
     GET_VREG(a2, a1)                       #  a2 <- fp[B]
     GET_INST_OPCODE(t0)                    #  t0 <- opcode from rINST
     .if 0
-    SET_VREG_OBJECT(a2, a0)                #  fp[A] <- a2
+    SET_VREG_OBJECT_GOTO(a2, a0, t0)       #  fp[A] <- a2
     .else
-    SET_VREG(a2, a0)                       #  fp[A] <- a2
+    SET_VREG_GOTO(a2, a0, t0)              #  fp[A] <- a2
     .endif
-    GOTO_OPCODE(t0)                        #  jump to next instruction
 
 
 /* ------------------------------ */
     .balign 128
 .L_op_long_to_float: /* 0x85 */
 /* File: mips/op_long_to_float.S */
-/* File: mips/unopNarrower.S */
     /*
-     * Generic 64bit-to-32bit unary operation.  Provide an "instr" line
-     * that specifies an instruction that performs "result = op a0/a1", where
-     * "result" is a 32-bit quantity in a0.
-     *
-     * For: long-to-float, double-to-int, double-to-float
-     * If hard floating point support is available, use fa0 as the parameter,
-     * except for long-to-float opcode.
-     * (This would work for long-to-int, but that instruction is actually
-     * an exact match for OP_MOVE.)
+     * long-to-float
      */
     /* unop vA, vB */
     GET_OPB(a3)                            #  a3 <- B
-    GET_OPA4(rOBJ)                         #  t1 <- A+
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
     EAS2(a3, rFP, a3)                      #  a3 <- &fp[B]
+
+#ifdef MIPS32REVGE6
+    LOAD64_F(fv0, fv0f, a3)
+    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
+    cvt.s.l   fv0, fv0
+#else
     LOAD64(rARG0, rARG1, a3)
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
     JAL(__floatdisf)
+#endif
 
-.Lop_long_to_float_set_vreg_f:
-    SET_VREG_F(fv0, rOBJ)                  #  vA <- result0
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-
+    SET_VREG_F_GOTO(fv0, rOBJ, t0)         #  vA <- fv0
 
 /* ------------------------------ */
     .balign 128
 .L_op_long_to_double: /* 0x86 */
 /* File: mips/op_long_to_double.S */
-/* File: mips/funopWide.S */
     /*
-     * Generic 64-bit unary operation.  Provide an "instr" line that
-     * specifies an instruction that performs "result = op a0/a1".
-     * This could be a MIPS instruction or a function call.
-     *
-     * long-to-double, double-to-long
+     * long-to-double
      */
     /* unop vA, vB */
-    GET_OPA4(rOBJ)                         #  t1 <- A+
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
     GET_OPB(a3)                            #  a3 <- B
     EAS2(a3, rFP, a3)                      #  a3 <- &fp[B]
+
+#ifdef MIPS32REVGE6
+    LOAD64_F(fv0, fv0f, a3)
+    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
+    cvt.d.l   fv0, fv0
+#else
     LOAD64(rARG0, rARG1, a3)
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
-                                  #  optional op
-    JAL(__floatdidf)                                 #  a0/a1 <- op, a2-a3 changed
+    JAL(__floatdidf)                       #  a0/a1 <- op, a2-a3 changed
+#endif
 
-.Lop_long_to_double_set_vreg:
-    SET_VREG64_F(fv0, fv0f, rOBJ)                             #  vAA <- a0/a1
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-    /* 12-13 instructions */
-
+    SET_VREG64_F_GOTO(fv0, fv0f, rOBJ, t0) #  vA/vA+1 <- result
 
 /* ------------------------------ */
     .balign 128
 .L_op_float_to_int: /* 0x87 */
 /* File: mips/op_float_to_int.S */
-/* File: mips/funop.S */
     /*
-     * Generic 32-bit unary operation.  Provide an "instr" line that
-     * specifies an instruction that performs "result = op a0".
-     * This could be a MIPS instruction or a function call.
+     * float-to-int
      *
-     * for: int-to-float, float-to-int
+     * We have to clip values to int min/max per the specification.  The
+     * expected common case is a "reasonable" value that converts directly
+     * to modest integer.  The EABI convert function isn't doing this for us.
      */
     /* unop vA, vB */
     GET_OPB(a3)                            #  a3 <- B
-    GET_OPA4(rOBJ)                         #  t0 <- A+
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
     GET_VREG_F(fa0, a3)
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
-    b f2i_doconv
 
-.Lop_float_to_int_set_vreg_f:
-    SET_VREG_F(fv0, rOBJ)
+    li        t0, INT_MIN_AS_FLOAT
+    mtc1      t0, fa1
+#ifdef MIPS32REVGE6
+    /*
+     * TODO: simplify this when the MIPS64R6 emulator
+     * supports NAN2008=1.
+     */
+    cmp.le.s  ft0, fa1, fa0
     GET_INST_OPCODE(t1)                    #  extract opcode from rINST
-    GOTO_OPCODE(t1)                        #  jump to next instruction
-
+    bc1nez    ft0, 1f                      #  if INT_MIN <= vB, proceed to truncation
+    cmp.eq.s  ft0, fa0, fa0
+    selnez.s  fa0, fa1, ft0                #  fa0 = ordered(vB) ? INT_MIN_AS_FLOAT : 0
+#else
+    c.ole.s   fcc0, fa1, fa0
+    GET_INST_OPCODE(t1)                    #  extract opcode from rINST
+    bc1t      fcc0, 1f                     #  if INT_MIN <= vB, proceed to truncation
+    c.eq.s    fcc0, fa0, fa0
+    mtc1      zero, fa0
+    movt.s    fa0, fa1, fcc0               #  fa0 = ordered(vB) ? INT_MIN_AS_FLOAT : 0
+#endif
+1:
+    trunc.w.s fa0, fa0
+    SET_VREG_F_GOTO(fa0, rOBJ, t1)         #  vA <- result
 
 /* ------------------------------ */
     .balign 128
 .L_op_float_to_long: /* 0x88 */
 /* File: mips/op_float_to_long.S */
-/* File: mips/funopWider.S */
     /*
-     * Generic 32bit-to-64bit unary operation.  Provide an "instr" line
-     * that specifies an instruction that performs "result = op a0", where
-     * "result" is a 64-bit quantity in a0/a1.
+     * float-to-long
      *
-     * For: int-to-double, float-to-long, float-to-double
+     * We have to clip values to long min/max per the specification.  The
+     * expected common case is a "reasonable" value that converts directly
+     * to modest integer.  The EABI convert function isn't doing this for us.
      */
     /* unop vA, vB */
     GET_OPA4(rOBJ)                         #  rOBJ <- A+
     GET_OPB(a3)                            #  a3 <- B
     GET_VREG_F(fa0, a3)
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
-    b f2l_doconv
 
-.Lop_float_to_long_set_vreg:
-    SET_VREG64(rRESULT0, rRESULT1, rOBJ)                             #  vA/vA+1 <- a0/a1
-    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+#ifdef MIPS32REVGE6
+    /*
+     * TODO: simplify this when the MIPS64R6 emulator
+     * supports NAN2008=1.
+     */
+    li        t0, LONG_MIN_AS_FLOAT
+    mtc1      t0, fa1
+    cmp.le.s  ft0, fa1, fa0
+    GET_INST_OPCODE(t1)                    #  extract opcode from rINST
+    bc1nez    ft0, 1f                      #  if LONG_MIN <= vB, proceed to truncation
+    cmp.eq.s  ft0, fa0, fa0
+    selnez.s  fa0, fa1, ft0                #  fa0 = ordered(vB) ? LONG_MIN_AS_FLOAT : 0
+1:
+    trunc.l.s fa0, fa0
+    SET_VREG64_F_GOTO(fa0, fa0f, rOBJ, t1) #  vA <- result
+#else
+    c.eq.s    fcc0, fa0, fa0
+    li        rRESULT0, 0
+    li        rRESULT1, 0
+    bc1f      fcc0, .Lop_float_to_long_get_opcode
 
+    li        t0, LONG_MIN_AS_FLOAT
+    mtc1      t0, fa1
+    c.ole.s   fcc0, fa0, fa1
+    li        rRESULT1, LONG_MIN_HIGH
+    bc1t      fcc0, .Lop_float_to_long_get_opcode
+
+    neg.s     fa1, fa1
+    c.ole.s   fcc0, fa1, fa0
+    nor       rRESULT0, rRESULT0, zero
+    nor       rRESULT1, rRESULT1, zero
+    bc1t      fcc0, .Lop_float_to_long_get_opcode
+
+    JAL(__fixsfdi)
+    GET_INST_OPCODE(t1)                    #  extract opcode from rINST
+    b         .Lop_float_to_long_set_vreg
+#endif
 
 /* ------------------------------ */
     .balign 128
@@ -3862,11 +4060,10 @@
 /* File: mips/op_float_to_double.S */
 /* File: mips/funopWider.S */
     /*
-     * Generic 32bit-to-64bit unary operation.  Provide an "instr" line
-     * that specifies an instruction that performs "result = op a0", where
-     * "result" is a 64-bit quantity in a0/a1.
+     * Generic 32bit-to-64bit floating-point unary operation.  Provide an "instr"
+     * line that specifies an instruction that performs "fv0 = op fa0".
      *
-     * For: int-to-double, float-to-long, float-to-double
+     * For: int-to-double, float-to-double
      */
     /* unop vA, vB */
     GET_OPA4(rOBJ)                         #  rOBJ <- A+
@@ -3874,77 +4071,111 @@
     GET_VREG_F(fa0, a3)
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
     cvt.d.s fv0, fa0
-
-.Lop_float_to_double_set_vreg:
-    SET_VREG64_F(fv0, fv0f, rOBJ)                             #  vA/vA+1 <- a0/a1
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG64_F_GOTO(fv0, fv0f, rOBJ, t0) #  vA/vA+1 <- fv0
 
 
 /* ------------------------------ */
     .balign 128
 .L_op_double_to_int: /* 0x8a */
 /* File: mips/op_double_to_int.S */
-/* File: mips/unopNarrower.S */
     /*
-     * Generic 64bit-to-32bit unary operation.  Provide an "instr" line
-     * that specifies an instruction that performs "result = op a0/a1", where
-     * "result" is a 32-bit quantity in a0.
+     * double-to-int
      *
-     * For: long-to-float, double-to-int, double-to-float
-     * If hard floating point support is available, use fa0 as the parameter,
-     * except for long-to-float opcode.
-     * (This would work for long-to-int, but that instruction is actually
-     * an exact match for OP_MOVE.)
+     * We have to clip values to int min/max per the specification.  The
+     * expected common case is a "reasonable" value that converts directly
+     * to modest integer.  The EABI convert function isn't doing this for us.
      */
     /* unop vA, vB */
     GET_OPB(a3)                            #  a3 <- B
-    GET_OPA4(rOBJ)                         #  t1 <- A+
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
     EAS2(a3, rFP, a3)                      #  a3 <- &fp[B]
     LOAD64_F(fa0, fa0f, a3)
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
-    b d2i_doconv
 
-.Lop_double_to_int_set_vreg_f:
-    SET_VREG_F(fv0, rOBJ)                  #  vA <- result0
-    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-
-/*
- * Convert the double in a0/a1 to an int in a0.
- *
- * We have to clip values to int min/max per the specification.  The
- * expected common case is a "reasonable" value that converts directly
- * to modest integer.  The EABI convert function isn't doing this for us.
- */
+    li        t0, INT_MIN_AS_DOUBLE_HIGH
+    mtc1      zero, fa1
+    MOVE_TO_FPU_HIGH(t0, fa1, fa1f)
+#ifdef MIPS32REVGE6
+    /*
+     * TODO: simplify this when the MIPS64R6 emulator
+     * supports NAN2008=1.
+     */
+    cmp.le.d  ft0, fa1, fa0
+    GET_INST_OPCODE(t1)                    #  extract opcode from rINST
+    bc1nez    ft0, 1f                      #  if INT_MIN <= vB, proceed to truncation
+    cmp.eq.d  ft0, fa0, fa0
+    selnez.d  fa0, fa1, ft0                #  fa0 = ordered(vB) ? INT_MIN_AS_DOUBLE : 0
+#else
+    c.ole.d   fcc0, fa1, fa0
+    GET_INST_OPCODE(t1)                    #  extract opcode from rINST
+    bc1t      fcc0, 1f                     #  if INT_MIN <= vB, proceed to truncation
+    c.eq.d    fcc0, fa0, fa0
+    mtc1      zero, fa0
+    MOVE_TO_FPU_HIGH(zero, fa0, fa0f)
+    movt.d    fa0, fa1, fcc0               #  fa0 = ordered(vB) ? INT_MIN_AS_DOUBLE : 0
+#endif
+1:
+    trunc.w.d fa0, fa0
+    SET_VREG_F_GOTO(fa0, rOBJ, t1)         #  vA <- result
 
 /* ------------------------------ */
     .balign 128
 .L_op_double_to_long: /* 0x8b */
 /* File: mips/op_double_to_long.S */
-/* File: mips/funopWide.S */
     /*
-     * Generic 64-bit unary operation.  Provide an "instr" line that
-     * specifies an instruction that performs "result = op a0/a1".
-     * This could be a MIPS instruction or a function call.
+     * double-to-long
      *
-     * long-to-double, double-to-long
+     * We have to clip values to long min/max per the specification.  The
+     * expected common case is a "reasonable" value that converts directly
+     * to modest integer.  The EABI convert function isn't doing this for us.
      */
     /* unop vA, vB */
-    GET_OPA4(rOBJ)                         #  t1 <- A+
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
     GET_OPB(a3)                            #  a3 <- B
     EAS2(a3, rFP, a3)                      #  a3 <- &fp[B]
     LOAD64_F(fa0, fa0f, a3)
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
-                                  #  optional op
-    b d2l_doconv                                 #  a0/a1 <- op, a2-a3 changed
 
-.Lop_double_to_long_set_vreg:
-    SET_VREG64(rRESULT0, rRESULT1, rOBJ)                             #  vAA <- a0/a1
-    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-    /* 12-13 instructions */
+#ifdef MIPS32REVGE6
+    /*
+     * TODO: simplify this when the MIPS64R6 emulator
+     * supports NAN2008=1.
+     */
+    li        t0, LONG_MIN_AS_DOUBLE_HIGH
+    mtc1      zero, fa1
+    mthc1     t0, fa1
+    cmp.le.d  ft0, fa1, fa0
+    GET_INST_OPCODE(t1)                    #  extract opcode from rINST
+    bc1nez    ft0, 1f                      #  if LONG_MIN <= vB, proceed to truncation
+    cmp.eq.d  ft0, fa0, fa0
+    selnez.d  fa0, fa1, ft0                #  fa0 = ordered(vB) ? LONG_MIN_AS_DOUBLE : 0
+1:
+    trunc.l.d fa0, fa0
+    SET_VREG64_F_GOTO(fa0, fa0f, rOBJ, t1) #  vA <- result
+#else
+    c.eq.d    fcc0, fa0, fa0
+    li        rRESULT0, 0
+    li        rRESULT1, 0
+    bc1f      fcc0, .Lop_double_to_long_get_opcode
 
+    li        t0, LONG_MIN_AS_DOUBLE_HIGH
+    mtc1      zero, fa1
+    MOVE_TO_FPU_HIGH(t0, fa1, fa1f)
+    c.ole.d   fcc0, fa0, fa1
+    li        rRESULT1, LONG_MIN_HIGH
+    bc1t      fcc0, .Lop_double_to_long_get_opcode
+
+    neg.d     fa1, fa1
+    c.ole.d   fcc0, fa1, fa0
+    nor       rRESULT0, rRESULT0, zero
+    nor       rRESULT1, rRESULT1, zero
+    bc1t      fcc0, .Lop_double_to_long_get_opcode
+
+    JAL(__fixdfdi)
+    GET_INST_OPCODE(t1)                    #  extract opcode from rINST
+    b         .Lop_double_to_long_set_vreg
+#endif
 
 /* ------------------------------ */
     .balign 128
@@ -3952,28 +4183,20 @@
 /* File: mips/op_double_to_float.S */
 /* File: mips/unopNarrower.S */
     /*
-     * Generic 64bit-to-32bit unary operation.  Provide an "instr" line
-     * that specifies an instruction that performs "result = op a0/a1", where
-     * "result" is a 32-bit quantity in a0.
+     * Generic 64bit-to-32bit floating-point unary operation.  Provide an "instr"
+     * line that specifies an instruction that performs "fv0 = op fa0".
      *
-     * For: long-to-float, double-to-int, double-to-float
-     * If hard floating point support is available, use fa0 as the parameter,
-     * except for long-to-float opcode.
-     * (This would work for long-to-int, but that instruction is actually
-     * an exact match for OP_MOVE.)
+     * For: double-to-float
      */
     /* unop vA, vB */
     GET_OPB(a3)                            #  a3 <- B
-    GET_OPA4(rOBJ)                         #  t1 <- A+
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
     EAS2(a3, rFP, a3)                      #  a3 <- &fp[B]
     LOAD64_F(fa0, fa0f, a3)
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
     cvt.s.d fv0, fa0
-
-.Lop_double_to_float_set_vreg_f:
-    SET_VREG_F(fv0, rOBJ)                  #  vA <- result0
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG_F_GOTO(fv0, rOBJ, t0)         #  vA <- fv0
 
 
 /* ------------------------------ */
@@ -3983,22 +4206,21 @@
 /* File: mips/unop.S */
     /*
      * Generic 32-bit unary operation.  Provide an "instr" line that
-     * specifies an instruction that performs "result = op a0".
+     * specifies an instruction that performs "result0 = op a0".
      * This could be a MIPS instruction or a function call.
      *
-     * for: neg-int, not-int, neg-float, int-to-float, float-to-int,
-     *      int-to-byte, int-to-char, int-to-short
+     * for: int-to-byte, int-to-char, int-to-short,
+     *      neg-int, not-int, neg-float
      */
     /* unop vA, vB */
     GET_OPB(a3)                            #  a3 <- B
     GET_OPA4(t0)                           #  t0 <- A+
     GET_VREG(a0, a3)                       #  a0 <- vB
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
-    sll a0, a0, 24                              #  optional op
-    sra a0, a0, 24                                 #  a0 <- op, a0-a3 changed
+                                  #  optional op
+    SEB(a0, a0)                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t1)                    #  extract opcode from rINST
-    SET_VREG_GOTO(a0, t0, t1)        #  vAA <- result0
-    /* 9-10 instructions */
+    SET_VREG_GOTO(a0, t0, t1)        #  vA <- result0
 
 
 /* ------------------------------ */
@@ -4008,11 +4230,11 @@
 /* File: mips/unop.S */
     /*
      * Generic 32-bit unary operation.  Provide an "instr" line that
-     * specifies an instruction that performs "result = op a0".
+     * specifies an instruction that performs "result0 = op a0".
      * This could be a MIPS instruction or a function call.
      *
-     * for: neg-int, not-int, neg-float, int-to-float, float-to-int,
-     *      int-to-byte, int-to-char, int-to-short
+     * for: int-to-byte, int-to-char, int-to-short,
+     *      neg-int, not-int, neg-float
      */
     /* unop vA, vB */
     GET_OPB(a3)                            #  a3 <- B
@@ -4022,8 +4244,7 @@
                                   #  optional op
     and a0, 0xffff                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t1)                    #  extract opcode from rINST
-    SET_VREG_GOTO(a0, t0, t1)        #  vAA <- result0
-    /* 9-10 instructions */
+    SET_VREG_GOTO(a0, t0, t1)        #  vA <- result0
 
 
 /* ------------------------------ */
@@ -4033,22 +4254,21 @@
 /* File: mips/unop.S */
     /*
      * Generic 32-bit unary operation.  Provide an "instr" line that
-     * specifies an instruction that performs "result = op a0".
+     * specifies an instruction that performs "result0 = op a0".
      * This could be a MIPS instruction or a function call.
      *
-     * for: neg-int, not-int, neg-float, int-to-float, float-to-int,
-     *      int-to-byte, int-to-char, int-to-short
+     * for: int-to-byte, int-to-char, int-to-short,
+     *      neg-int, not-int, neg-float
      */
     /* unop vA, vB */
     GET_OPB(a3)                            #  a3 <- B
     GET_OPA4(t0)                           #  t0 <- A+
     GET_VREG(a0, a3)                       #  a0 <- vB
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
-    sll a0, 16                              #  optional op
-    sra a0, 16                                 #  a0 <- op, a0-a3 changed
+                                  #  optional op
+    SEH(a0, a0)                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t1)                    #  extract opcode from rINST
-    SET_VREG_GOTO(a0, t0, t1)        #  vAA <- result0
-    /* 9-10 instructions */
+    SET_VREG_GOTO(a0, t0, t1)        #  vA <- result0
 
 
 /* ------------------------------ */
@@ -4087,7 +4307,6 @@
     addu a0, a0, a1                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 11-14 instructions */
 
 
 /* ------------------------------ */
@@ -4126,7 +4345,6 @@
     subu a0, a0, a1                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 11-14 instructions */
 
 
 /* ------------------------------ */
@@ -4165,7 +4383,6 @@
     mul a0, a0, a1                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 11-14 instructions */
 
 
 /* ------------------------------ */
@@ -4205,7 +4422,6 @@
     div a0, a0, a1                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 11-14 instructions */
 
 #else
 /* File: mips/binop.S */
@@ -4240,7 +4456,6 @@
     mflo a0                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 11-14 instructions */
 
 #endif
 
@@ -4281,7 +4496,6 @@
     mod a0, a0, a1                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 11-14 instructions */
 
 #else
 /* File: mips/binop.S */
@@ -4316,7 +4530,6 @@
     mfhi a0                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 11-14 instructions */
 
 #endif
 
@@ -4356,7 +4569,6 @@
     and a0, a0, a1                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 11-14 instructions */
 
 
 /* ------------------------------ */
@@ -4395,7 +4607,6 @@
     or a0, a0, a1                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 11-14 instructions */
 
 
 /* ------------------------------ */
@@ -4434,7 +4645,6 @@
     xor a0, a0, a1                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 11-14 instructions */
 
 
 /* ------------------------------ */
@@ -4473,7 +4683,6 @@
     sll a0, a0, a1                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 11-14 instructions */
 
 
 /* ------------------------------ */
@@ -4512,7 +4721,6 @@
     sra a0, a0, a1                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 11-14 instructions */
 
 
 /* ------------------------------ */
@@ -4551,7 +4759,6 @@
     srl a0, a0, a1                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 11-14 instructions */
 
 
 /* ------------------------------ */
@@ -4571,10 +4778,10 @@
      * Generic 64-bit binary operation.  Provide an "instr" line that
      * specifies an instruction that performs "result = a0-a1 op a2-a3".
      * This could be a MIPS instruction or a function call.  (If the result
-     * comes back in a register other than a0, you can override "result".)
+     * comes back in a register pair other than a0-a1, you can override "result".)
      *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
-     * vCC (a1).  Useful for integer division and modulus.
+     * vCC (a2-a3).  Useful for integer division and modulus.
      *
      * for: add-long, sub-long, div-long, rem-long, and-long, or-long,
      *      xor-long
@@ -4600,7 +4807,6 @@
     addu a1, a3, a1; sltu v1, v0, a2; addu v1, v1, a1                                 #  result <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     SET_VREG64_GOTO(v0, v1, rOBJ, t0)   #  vAA/vAA+1 <- v0/v1
-    /* 14-17 instructions */
 
 
 /* ------------------------------ */
@@ -4619,10 +4825,10 @@
      * Generic 64-bit binary operation.  Provide an "instr" line that
      * specifies an instruction that performs "result = a0-a1 op a2-a3".
      * This could be a MIPS instruction or a function call.  (If the result
-     * comes back in a register other than a0, you can override "result".)
+     * comes back in a register pair other than a0-a1, you can override "result".)
      *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
-     * vCC (a1).  Useful for integer division and modulus.
+     * vCC (a2-a3).  Useful for integer division and modulus.
      *
      * for: add-long, sub-long, div-long, rem-long, and-long, or-long,
      *      xor-long
@@ -4648,7 +4854,6 @@
     subu v1, a1, a3; sltu a0, a0, v0; subu v1, v1, a0                                 #  result <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     SET_VREG64_GOTO(v0, v1, rOBJ, t0)   #  vAA/vAA+1 <- v0/v1
-    /* 14-17 instructions */
 
 
 /* ------------------------------ */
@@ -4702,10 +4907,10 @@
      * Generic 64-bit binary operation.  Provide an "instr" line that
      * specifies an instruction that performs "result = a0-a1 op a2-a3".
      * This could be a MIPS instruction or a function call.  (If the result
-     * comes back in a register other than a0, you can override "result".)
+     * comes back in a register pair other than a0-a1, you can override "result".)
      *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
-     * vCC (a1).  Useful for integer division and modulus.
+     * vCC (a2-a3).  Useful for integer division and modulus.
      *
      * for: add-long, sub-long, div-long, rem-long, and-long, or-long,
      *      xor-long
@@ -4731,7 +4936,6 @@
     JAL(__divdi3)                                 #  result <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     SET_VREG64_GOTO(v0, v1, rOBJ, t0)   #  vAA/vAA+1 <- v0/v1
-    /* 14-17 instructions */
 
 
 /* ------------------------------ */
@@ -4743,10 +4947,10 @@
      * Generic 64-bit binary operation.  Provide an "instr" line that
      * specifies an instruction that performs "result = a0-a1 op a2-a3".
      * This could be a MIPS instruction or a function call.  (If the result
-     * comes back in a register other than a0, you can override "result".)
+     * comes back in a register pair other than a0-a1, you can override "result".)
      *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
-     * vCC (a1).  Useful for integer division and modulus.
+     * vCC (a2-a3).  Useful for integer division and modulus.
      *
      * for: add-long, sub-long, div-long, rem-long, and-long, or-long,
      *      xor-long
@@ -4772,7 +4976,6 @@
     JAL(__moddi3)                                 #  result <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     SET_VREG64_GOTO(v0, v1, rOBJ, t0)   #  vAA/vAA+1 <- v0/v1
-    /* 14-17 instructions */
 
 
 /* ------------------------------ */
@@ -4784,10 +4987,10 @@
      * Generic 64-bit binary operation.  Provide an "instr" line that
      * specifies an instruction that performs "result = a0-a1 op a2-a3".
      * This could be a MIPS instruction or a function call.  (If the result
-     * comes back in a register other than a0, you can override "result".)
+     * comes back in a register pair other than a0-a1, you can override "result".)
      *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
-     * vCC (a1).  Useful for integer division and modulus.
+     * vCC (a2-a3).  Useful for integer division and modulus.
      *
      * for: add-long, sub-long, div-long, rem-long, and-long, or-long,
      *      xor-long
@@ -4813,7 +5016,6 @@
     and a1, a1, a3                                 #  result <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     SET_VREG64_GOTO(a0, a1, rOBJ, t0)   #  vAA/vAA+1 <- a0/a1
-    /* 14-17 instructions */
 
 
 /* ------------------------------ */
@@ -4825,10 +5027,10 @@
      * Generic 64-bit binary operation.  Provide an "instr" line that
      * specifies an instruction that performs "result = a0-a1 op a2-a3".
      * This could be a MIPS instruction or a function call.  (If the result
-     * comes back in a register other than a0, you can override "result".)
+     * comes back in a register pair other than a0-a1, you can override "result".)
      *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
-     * vCC (a1).  Useful for integer division and modulus.
+     * vCC (a2-a3).  Useful for integer division and modulus.
      *
      * for: add-long, sub-long, div-long, rem-long, and-long, or-long,
      *      xor-long
@@ -4854,7 +5056,6 @@
     or a1, a1, a3                                 #  result <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     SET_VREG64_GOTO(a0, a1, rOBJ, t0)   #  vAA/vAA+1 <- a0/a1
-    /* 14-17 instructions */
 
 
 /* ------------------------------ */
@@ -4866,10 +5067,10 @@
      * Generic 64-bit binary operation.  Provide an "instr" line that
      * specifies an instruction that performs "result = a0-a1 op a2-a3".
      * This could be a MIPS instruction or a function call.  (If the result
-     * comes back in a register other than a0, you can override "result".)
+     * comes back in a register pair other than a0-a1, you can override "result".)
      *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
-     * vCC (a1).  Useful for integer division and modulus.
+     * vCC (a2-a3).  Useful for integer division and modulus.
      *
      * for: add-long, sub-long, div-long, rem-long, and-long, or-long,
      *      xor-long
@@ -4895,7 +5096,6 @@
     xor a1, a1, a3                                 #  result <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     SET_VREG64_GOTO(a0, a1, rOBJ, t0)   #  vAA/vAA+1 <- a0/a1
-    /* 14-17 instructions */
 
 
 /* ------------------------------ */
@@ -4928,7 +5128,7 @@
     srl     a0, v1                         #  alo<- alo >> (32-(shift&31))
     sll     v1, a1, a2                     #  rhi<- ahi << (shift&31)
     or      v1, a0                         #  rhi<- rhi | alo
-    SET_VREG64_GOTO(v0, v1, t2, t0)        #  vAA/vAA+1 <- a0/a1
+    SET_VREG64_GOTO(v0, v1, t2, t0)        #  vAA/vAA+1 <- v0/v1
 
 /* ------------------------------ */
     .balign 128
@@ -4959,7 +5159,7 @@
     sll     a1, 1
     sll     a1, a0                         #  ahi<- ahi << (32-(shift&31))
     or      v0, a1                         #  rlo<- rlo | ahi
-    SET_VREG64_GOTO(v0, v1, t3, t0)        #  vAA/VAA+1 <- v0/v0
+    SET_VREG64_GOTO(v0, v1, t3, t0)        #  vAA/VAA+1 <- v0/v1
 
 /* ------------------------------ */
     .balign 128
@@ -5006,7 +5206,7 @@
 
     /* binop vAA, vBB, vCC */
     FETCH(a0, 1)                           #  a0 <- CCBB
-    GET_OPA(rOBJ)                          #  s5 <- AA
+    GET_OPA(rOBJ)                          #  rOBJ <- AA
     srl       a3, a0, 8                    #  a3 <- CC
     and       a2, a0, 255                  #  a2 <- BB
     GET_VREG_F(fa1, a3)                    #  a1 <- vCC
@@ -5014,9 +5214,8 @@
 
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     add.s fv0, fa0, fa1                                 #  f0 = result
-    SET_VREG_F(fv0, rOBJ)                  #  vAA <- fv0
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG_F_GOTO(fv0, rOBJ, t0)         #  vAA <- fv0
 
 
 /* ------------------------------ */
@@ -5032,7 +5231,7 @@
 
     /* binop vAA, vBB, vCC */
     FETCH(a0, 1)                           #  a0 <- CCBB
-    GET_OPA(rOBJ)                          #  s5 <- AA
+    GET_OPA(rOBJ)                          #  rOBJ <- AA
     srl       a3, a0, 8                    #  a3 <- CC
     and       a2, a0, 255                  #  a2 <- BB
     GET_VREG_F(fa1, a3)                    #  a1 <- vCC
@@ -5040,9 +5239,8 @@
 
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     sub.s fv0, fa0, fa1                                 #  f0 = result
-    SET_VREG_F(fv0, rOBJ)                  #  vAA <- fv0
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG_F_GOTO(fv0, rOBJ, t0)         #  vAA <- fv0
 
 
 /* ------------------------------ */
@@ -5058,7 +5256,7 @@
 
     /* binop vAA, vBB, vCC */
     FETCH(a0, 1)                           #  a0 <- CCBB
-    GET_OPA(rOBJ)                          #  s5 <- AA
+    GET_OPA(rOBJ)                          #  rOBJ <- AA
     srl       a3, a0, 8                    #  a3 <- CC
     and       a2, a0, 255                  #  a2 <- BB
     GET_VREG_F(fa1, a3)                    #  a1 <- vCC
@@ -5066,9 +5264,8 @@
 
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     mul.s fv0, fa0, fa1                                 #  f0 = result
-    SET_VREG_F(fv0, rOBJ)                  #  vAA <- fv0
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG_F_GOTO(fv0, rOBJ, t0)         #  vAA <- fv0
 
 
 /* ------------------------------ */
@@ -5084,7 +5281,7 @@
 
     /* binop vAA, vBB, vCC */
     FETCH(a0, 1)                           #  a0 <- CCBB
-    GET_OPA(rOBJ)                          #  s5 <- AA
+    GET_OPA(rOBJ)                          #  rOBJ <- AA
     srl       a3, a0, 8                    #  a3 <- CC
     and       a2, a0, 255                  #  a2 <- BB
     GET_VREG_F(fa1, a3)                    #  a1 <- vCC
@@ -5092,9 +5289,8 @@
 
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     div.s fv0, fa0, fa1                                 #  f0 = result
-    SET_VREG_F(fv0, rOBJ)                  #  vAA <- fv0
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG_F_GOTO(fv0, rOBJ, t0)         #  vAA <- fv0
 
 
 /* ------------------------------ */
@@ -5110,7 +5306,7 @@
 
     /* binop vAA, vBB, vCC */
     FETCH(a0, 1)                           #  a0 <- CCBB
-    GET_OPA(rOBJ)                          #  s5 <- AA
+    GET_OPA(rOBJ)                          #  rOBJ <- AA
     srl       a3, a0, 8                    #  a3 <- CC
     and       a2, a0, 255                  #  a2 <- BB
     GET_VREG_F(fa1, a3)                    #  a1 <- vCC
@@ -5118,9 +5314,8 @@
 
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     JAL(fmodf)                                 #  f0 = result
-    SET_VREG_F(fv0, rOBJ)                  #  vAA <- fv0
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG_F_GOTO(fv0, rOBJ, t0)         #  vAA <- fv0
 
 
 /* ------------------------------ */
@@ -5129,8 +5324,8 @@
 /* File: mips/op_add_double.S */
 /* File: mips/fbinopWide.S */
     /*
-     * Generic 64-bit binary operation.  Provide an "instr" line that
-     * specifies an instruction that performs "result = a0-a1 op a2-a3".
+     * Generic 64-bit floating-point binary operation.  Provide an "instr"
+     * line that specifies an instruction that performs "fv0 = fa0 op fa1".
      * This could be an MIPS instruction or a function call.
      *
      * for: add-double, sub-double, mul-double, div-double,
@@ -5139,7 +5334,7 @@
      */
     /* binop vAA, vBB, vCC */
     FETCH(a0, 1)                           #  a0 <- CCBB
-    GET_OPA(rOBJ)                          #  s5 <- AA
+    GET_OPA(rOBJ)                          #  rOBJ <- AA
     and       a2, a0, 255                  #  a2 <- BB
     srl       a3, a0, 8                    #  a3 <- CC
     EAS2(a2, rFP, a2)                      #  a2 <- &fp[BB]
@@ -5149,8 +5344,8 @@
 
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     add.d fv0, fa0, fa1
-    SET_VREG64_F(fv0, fv0f, rOBJ)
-    b         .Lop_add_double_finish
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG64_F_GOTO(fv0, fv0f, rOBJ, t0)  #  vAA/vAA+1 <- fv0
 
 
 /* ------------------------------ */
@@ -5159,8 +5354,8 @@
 /* File: mips/op_sub_double.S */
 /* File: mips/fbinopWide.S */
     /*
-     * Generic 64-bit binary operation.  Provide an "instr" line that
-     * specifies an instruction that performs "result = a0-a1 op a2-a3".
+     * Generic 64-bit floating-point binary operation.  Provide an "instr"
+     * line that specifies an instruction that performs "fv0 = fa0 op fa1".
      * This could be an MIPS instruction or a function call.
      *
      * for: add-double, sub-double, mul-double, div-double,
@@ -5169,7 +5364,7 @@
      */
     /* binop vAA, vBB, vCC */
     FETCH(a0, 1)                           #  a0 <- CCBB
-    GET_OPA(rOBJ)                          #  s5 <- AA
+    GET_OPA(rOBJ)                          #  rOBJ <- AA
     and       a2, a0, 255                  #  a2 <- BB
     srl       a3, a0, 8                    #  a3 <- CC
     EAS2(a2, rFP, a2)                      #  a2 <- &fp[BB]
@@ -5179,8 +5374,8 @@
 
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     sub.d fv0, fa0, fa1
-    SET_VREG64_F(fv0, fv0f, rOBJ)
-    b         .Lop_sub_double_finish
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG64_F_GOTO(fv0, fv0f, rOBJ, t0)  #  vAA/vAA+1 <- fv0
 
 
 /* ------------------------------ */
@@ -5189,8 +5384,8 @@
 /* File: mips/op_mul_double.S */
 /* File: mips/fbinopWide.S */
     /*
-     * Generic 64-bit binary operation.  Provide an "instr" line that
-     * specifies an instruction that performs "result = a0-a1 op a2-a3".
+     * Generic 64-bit floating-point binary operation.  Provide an "instr"
+     * line that specifies an instruction that performs "fv0 = fa0 op fa1".
      * This could be an MIPS instruction or a function call.
      *
      * for: add-double, sub-double, mul-double, div-double,
@@ -5199,7 +5394,7 @@
      */
     /* binop vAA, vBB, vCC */
     FETCH(a0, 1)                           #  a0 <- CCBB
-    GET_OPA(rOBJ)                          #  s5 <- AA
+    GET_OPA(rOBJ)                          #  rOBJ <- AA
     and       a2, a0, 255                  #  a2 <- BB
     srl       a3, a0, 8                    #  a3 <- CC
     EAS2(a2, rFP, a2)                      #  a2 <- &fp[BB]
@@ -5209,8 +5404,8 @@
 
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     mul.d fv0, fa0, fa1
-    SET_VREG64_F(fv0, fv0f, rOBJ)
-    b         .Lop_mul_double_finish
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG64_F_GOTO(fv0, fv0f, rOBJ, t0)  #  vAA/vAA+1 <- fv0
 
 
 /* ------------------------------ */
@@ -5219,8 +5414,8 @@
 /* File: mips/op_div_double.S */
 /* File: mips/fbinopWide.S */
     /*
-     * Generic 64-bit binary operation.  Provide an "instr" line that
-     * specifies an instruction that performs "result = a0-a1 op a2-a3".
+     * Generic 64-bit floating-point binary operation.  Provide an "instr"
+     * line that specifies an instruction that performs "fv0 = fa0 op fa1".
      * This could be an MIPS instruction or a function call.
      *
      * for: add-double, sub-double, mul-double, div-double,
@@ -5229,7 +5424,7 @@
      */
     /* binop vAA, vBB, vCC */
     FETCH(a0, 1)                           #  a0 <- CCBB
-    GET_OPA(rOBJ)                          #  s5 <- AA
+    GET_OPA(rOBJ)                          #  rOBJ <- AA
     and       a2, a0, 255                  #  a2 <- BB
     srl       a3, a0, 8                    #  a3 <- CC
     EAS2(a2, rFP, a2)                      #  a2 <- &fp[BB]
@@ -5239,8 +5434,8 @@
 
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     div.d fv0, fa0, fa1
-    SET_VREG64_F(fv0, fv0f, rOBJ)
-    b         .Lop_div_double_finish
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG64_F_GOTO(fv0, fv0f, rOBJ, t0)  #  vAA/vAA+1 <- fv0
 
 
 /* ------------------------------ */
@@ -5249,8 +5444,8 @@
 /* File: mips/op_rem_double.S */
 /* File: mips/fbinopWide.S */
     /*
-     * Generic 64-bit binary operation.  Provide an "instr" line that
-     * specifies an instruction that performs "result = a0-a1 op a2-a3".
+     * Generic 64-bit floating-point binary operation.  Provide an "instr"
+     * line that specifies an instruction that performs "fv0 = fa0 op fa1".
      * This could be an MIPS instruction or a function call.
      *
      * for: add-double, sub-double, mul-double, div-double,
@@ -5259,7 +5454,7 @@
      */
     /* binop vAA, vBB, vCC */
     FETCH(a0, 1)                           #  a0 <- CCBB
-    GET_OPA(rOBJ)                          #  s5 <- AA
+    GET_OPA(rOBJ)                          #  rOBJ <- AA
     and       a2, a0, 255                  #  a2 <- BB
     srl       a3, a0, 8                    #  a3 <- CC
     EAS2(a2, rFP, a2)                      #  a2 <- &fp[BB]
@@ -5269,8 +5464,8 @@
 
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     JAL(fmod)
-    SET_VREG64_F(fv0, fv0f, rOBJ)
-    b         .Lop_rem_double_finish
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG64_F_GOTO(fv0, fv0f, rOBJ, t0)  #  vAA/vAA+1 <- fv0
 
 
 /* ------------------------------ */
@@ -5304,8 +5499,7 @@
                                   #  optional op
     addu a0, a0, a1                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 10-13 instructions */
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vA <- a0
 
 
 /* ------------------------------ */
@@ -5339,8 +5533,7 @@
                                   #  optional op
     subu a0, a0, a1                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 10-13 instructions */
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vA <- a0
 
 
 /* ------------------------------ */
@@ -5374,8 +5567,7 @@
                                   #  optional op
     mul a0, a0, a1                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 10-13 instructions */
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vA <- a0
 
 
 /* ------------------------------ */
@@ -5410,8 +5602,7 @@
                                   #  optional op
     div a0, a0, a1                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 10-13 instructions */
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vA <- a0
 
 #else
 /* File: mips/binop2addr.S */
@@ -5441,8 +5632,7 @@
     div zero, a0, a1                              #  optional op
     mflo a0                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 10-13 instructions */
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vA <- a0
 
 #endif
 
@@ -5478,8 +5668,7 @@
                                   #  optional op
     mod a0, a0, a1                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 10-13 instructions */
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vA <- a0
 
 #else
 /* File: mips/binop2addr.S */
@@ -5509,8 +5698,7 @@
     div zero, a0, a1                              #  optional op
     mfhi a0                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 10-13 instructions */
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vA <- a0
 
 #endif
 
@@ -5545,8 +5733,7 @@
                                   #  optional op
     and a0, a0, a1                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 10-13 instructions */
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vA <- a0
 
 
 /* ------------------------------ */
@@ -5580,8 +5767,7 @@
                                   #  optional op
     or a0, a0, a1                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 10-13 instructions */
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vA <- a0
 
 
 /* ------------------------------ */
@@ -5615,8 +5801,7 @@
                                   #  optional op
     xor a0, a0, a1                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 10-13 instructions */
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vA <- a0
 
 
 /* ------------------------------ */
@@ -5650,8 +5835,7 @@
                                   #  optional op
     sll a0, a0, a1                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 10-13 instructions */
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vA <- a0
 
 
 /* ------------------------------ */
@@ -5685,8 +5869,7 @@
                                   #  optional op
     sra a0, a0, a1                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 10-13 instructions */
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vA <- a0
 
 
 /* ------------------------------ */
@@ -5720,8 +5903,7 @@
                                   #  optional op
     srl a0, a0, a1                                  #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 10-13 instructions */
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vA <- a0
 
 
 /* ------------------------------ */
@@ -5736,22 +5918,21 @@
      * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
      * that specifies an instruction that performs "result = a0-a1 op a2-a3".
      * This could be a MIPS instruction or a function call.  (If the result
-     * comes back in a register other than a0, you can override "result".)
+     * comes back in a register pair other than a0-a1, you can override "result".)
      *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
-     * vCC (a1).  Useful for integer division and modulus.
+     * vB (a2-a3).  Useful for integer division and modulus.
      *
      * For: add-long/2addr, sub-long/2addr, div-long/2addr, rem-long/2addr,
      *      and-long/2addr, or-long/2addr, xor-long/2addr
-     *      rem-double/2addr
      */
     /* binop/2addr vA, vB */
     GET_OPA4(rOBJ)                         #  rOBJ <- A+
     GET_OPB(a1)                            #  a1 <- B
     EAS2(a1, rFP, a1)                      #  a1 <- &fp[B]
     EAS2(t0, rFP, rOBJ)                    #  t0 <- &fp[A]
-    LOAD64(a2, a3, a1)               #  a2/a3 <- vBB/vBB+1
-    LOAD64(a0, a1, t0)               #  a0/a1 <- vAA/vAA+1
+    LOAD64(a2, a3, a1)               #  a2/a3 <- vB/vB+1
+    LOAD64(a0, a1, t0)               #  a0/a1 <- vA/vA+1
     .if 0
     or        t0, a2, a3             #  second arg (a2-a3) is zero?
     beqz      t0, common_errDivideByZero
@@ -5761,9 +5942,7 @@
     addu v0, a2, a0                              #  optional op
     addu a1, a3, a1; sltu v1, v0, a2; addu v1, v1, a1                                 #  result <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG64(v0, v1, rOBJ)   #  vAA/vAA+1 <- v0/v1
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-    /* 12-15 instructions */
+    SET_VREG64_GOTO(v0, v1, rOBJ, t0)   #  vA/vA+1 <- v0/v1
 
 
 /* ------------------------------ */
@@ -5778,22 +5957,21 @@
      * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
      * that specifies an instruction that performs "result = a0-a1 op a2-a3".
      * This could be a MIPS instruction or a function call.  (If the result
-     * comes back in a register other than a0, you can override "result".)
+     * comes back in a register pair other than a0-a1, you can override "result".)
      *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
-     * vCC (a1).  Useful for integer division and modulus.
+     * vB (a2-a3).  Useful for integer division and modulus.
      *
      * For: add-long/2addr, sub-long/2addr, div-long/2addr, rem-long/2addr,
      *      and-long/2addr, or-long/2addr, xor-long/2addr
-     *      rem-double/2addr
      */
     /* binop/2addr vA, vB */
     GET_OPA4(rOBJ)                         #  rOBJ <- A+
     GET_OPB(a1)                            #  a1 <- B
     EAS2(a1, rFP, a1)                      #  a1 <- &fp[B]
     EAS2(t0, rFP, rOBJ)                    #  t0 <- &fp[A]
-    LOAD64(a2, a3, a1)               #  a2/a3 <- vBB/vBB+1
-    LOAD64(a0, a1, t0)               #  a0/a1 <- vAA/vAA+1
+    LOAD64(a2, a3, a1)               #  a2/a3 <- vB/vB+1
+    LOAD64(a0, a1, t0)               #  a0/a1 <- vA/vA+1
     .if 0
     or        t0, a2, a3             #  second arg (a2-a3) is zero?
     beqz      t0, common_errDivideByZero
@@ -5803,9 +5981,7 @@
     subu v0, a0, a2                              #  optional op
     subu v1, a1, a3; sltu a0, a0, v0; subu v1, v1, a0                                 #  result <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG64(v0, v1, rOBJ)   #  vAA/vAA+1 <- v0/v1
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-    /* 12-15 instructions */
+    SET_VREG64_GOTO(v0, v1, rOBJ, t0)   #  vA/vA+1 <- v0/v1
 
 
 /* ------------------------------ */
@@ -5840,9 +6016,7 @@
 
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
     GET_INST_OPCODE(t1)                    #  extract opcode from rINST
-    # vAA <- v0 (low)
-    SET_VREG64(v0, v1, rOBJ)               #  vAA+1 <- v1 (high)
-    GOTO_OPCODE(t1)                        #  jump to next instruction
+    SET_VREG64_GOTO(v0, v1, rOBJ, t1)      #  vA/vA+1 <- v0(low)/v1(high)
 
 /* ------------------------------ */
     .balign 128
@@ -5853,22 +6027,21 @@
      * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
      * that specifies an instruction that performs "result = a0-a1 op a2-a3".
      * This could be a MIPS instruction or a function call.  (If the result
-     * comes back in a register other than a0, you can override "result".)
+     * comes back in a register pair other than a0-a1, you can override "result".)
      *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
-     * vCC (a1).  Useful for integer division and modulus.
+     * vB (a2-a3).  Useful for integer division and modulus.
      *
      * For: add-long/2addr, sub-long/2addr, div-long/2addr, rem-long/2addr,
      *      and-long/2addr, or-long/2addr, xor-long/2addr
-     *      rem-double/2addr
      */
     /* binop/2addr vA, vB */
     GET_OPA4(rOBJ)                         #  rOBJ <- A+
     GET_OPB(a1)                            #  a1 <- B
     EAS2(a1, rFP, a1)                      #  a1 <- &fp[B]
     EAS2(t0, rFP, rOBJ)                    #  t0 <- &fp[A]
-    LOAD64(a2, a3, a1)               #  a2/a3 <- vBB/vBB+1
-    LOAD64(a0, a1, t0)               #  a0/a1 <- vAA/vAA+1
+    LOAD64(a2, a3, a1)               #  a2/a3 <- vB/vB+1
+    LOAD64(a0, a1, t0)               #  a0/a1 <- vA/vA+1
     .if 1
     or        t0, a2, a3             #  second arg (a2-a3) is zero?
     beqz      t0, common_errDivideByZero
@@ -5878,9 +6051,7 @@
                                   #  optional op
     JAL(__divdi3)                                 #  result <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG64(v0, v1, rOBJ)   #  vAA/vAA+1 <- v0/v1
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-    /* 12-15 instructions */
+    SET_VREG64_GOTO(v0, v1, rOBJ, t0)   #  vA/vA+1 <- v0/v1
 
 
 /* ------------------------------ */
@@ -5892,22 +6063,21 @@
      * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
      * that specifies an instruction that performs "result = a0-a1 op a2-a3".
      * This could be a MIPS instruction or a function call.  (If the result
-     * comes back in a register other than a0, you can override "result".)
+     * comes back in a register pair other than a0-a1, you can override "result".)
      *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
-     * vCC (a1).  Useful for integer division and modulus.
+     * vB (a2-a3).  Useful for integer division and modulus.
      *
      * For: add-long/2addr, sub-long/2addr, div-long/2addr, rem-long/2addr,
      *      and-long/2addr, or-long/2addr, xor-long/2addr
-     *      rem-double/2addr
      */
     /* binop/2addr vA, vB */
     GET_OPA4(rOBJ)                         #  rOBJ <- A+
     GET_OPB(a1)                            #  a1 <- B
     EAS2(a1, rFP, a1)                      #  a1 <- &fp[B]
     EAS2(t0, rFP, rOBJ)                    #  t0 <- &fp[A]
-    LOAD64(a2, a3, a1)               #  a2/a3 <- vBB/vBB+1
-    LOAD64(a0, a1, t0)               #  a0/a1 <- vAA/vAA+1
+    LOAD64(a2, a3, a1)               #  a2/a3 <- vB/vB+1
+    LOAD64(a0, a1, t0)               #  a0/a1 <- vA/vA+1
     .if 1
     or        t0, a2, a3             #  second arg (a2-a3) is zero?
     beqz      t0, common_errDivideByZero
@@ -5917,9 +6087,7 @@
                                   #  optional op
     JAL(__moddi3)                                 #  result <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG64(v0, v1, rOBJ)   #  vAA/vAA+1 <- v0/v1
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-    /* 12-15 instructions */
+    SET_VREG64_GOTO(v0, v1, rOBJ, t0)   #  vA/vA+1 <- v0/v1
 
 
 /* ------------------------------ */
@@ -5931,22 +6099,21 @@
      * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
      * that specifies an instruction that performs "result = a0-a1 op a2-a3".
      * This could be a MIPS instruction or a function call.  (If the result
-     * comes back in a register other than a0, you can override "result".)
+     * comes back in a register pair other than a0-a1, you can override "result".)
      *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
-     * vCC (a1).  Useful for integer division and modulus.
+     * vB (a2-a3).  Useful for integer division and modulus.
      *
      * For: add-long/2addr, sub-long/2addr, div-long/2addr, rem-long/2addr,
      *      and-long/2addr, or-long/2addr, xor-long/2addr
-     *      rem-double/2addr
      */
     /* binop/2addr vA, vB */
     GET_OPA4(rOBJ)                         #  rOBJ <- A+
     GET_OPB(a1)                            #  a1 <- B
     EAS2(a1, rFP, a1)                      #  a1 <- &fp[B]
     EAS2(t0, rFP, rOBJ)                    #  t0 <- &fp[A]
-    LOAD64(a2, a3, a1)               #  a2/a3 <- vBB/vBB+1
-    LOAD64(a0, a1, t0)               #  a0/a1 <- vAA/vAA+1
+    LOAD64(a2, a3, a1)               #  a2/a3 <- vB/vB+1
+    LOAD64(a0, a1, t0)               #  a0/a1 <- vA/vA+1
     .if 0
     or        t0, a2, a3             #  second arg (a2-a3) is zero?
     beqz      t0, common_errDivideByZero
@@ -5956,9 +6123,7 @@
     and a0, a0, a2                              #  optional op
     and a1, a1, a3                                 #  result <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG64(a0, a1, rOBJ)   #  vAA/vAA+1 <- a0/a1
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-    /* 12-15 instructions */
+    SET_VREG64_GOTO(a0, a1, rOBJ, t0)   #  vA/vA+1 <- a0/a1
 
 
 /* ------------------------------ */
@@ -5970,22 +6135,21 @@
      * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
      * that specifies an instruction that performs "result = a0-a1 op a2-a3".
      * This could be a MIPS instruction or a function call.  (If the result
-     * comes back in a register other than a0, you can override "result".)
+     * comes back in a register pair other than a0-a1, you can override "result".)
      *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
-     * vCC (a1).  Useful for integer division and modulus.
+     * vB (a2-a3).  Useful for integer division and modulus.
      *
      * For: add-long/2addr, sub-long/2addr, div-long/2addr, rem-long/2addr,
      *      and-long/2addr, or-long/2addr, xor-long/2addr
-     *      rem-double/2addr
      */
     /* binop/2addr vA, vB */
     GET_OPA4(rOBJ)                         #  rOBJ <- A+
     GET_OPB(a1)                            #  a1 <- B
     EAS2(a1, rFP, a1)                      #  a1 <- &fp[B]
     EAS2(t0, rFP, rOBJ)                    #  t0 <- &fp[A]
-    LOAD64(a2, a3, a1)               #  a2/a3 <- vBB/vBB+1
-    LOAD64(a0, a1, t0)               #  a0/a1 <- vAA/vAA+1
+    LOAD64(a2, a3, a1)               #  a2/a3 <- vB/vB+1
+    LOAD64(a0, a1, t0)               #  a0/a1 <- vA/vA+1
     .if 0
     or        t0, a2, a3             #  second arg (a2-a3) is zero?
     beqz      t0, common_errDivideByZero
@@ -5995,9 +6159,7 @@
     or a0, a0, a2                              #  optional op
     or a1, a1, a3                                 #  result <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG64(a0, a1, rOBJ)   #  vAA/vAA+1 <- a0/a1
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-    /* 12-15 instructions */
+    SET_VREG64_GOTO(a0, a1, rOBJ, t0)   #  vA/vA+1 <- a0/a1
 
 
 /* ------------------------------ */
@@ -6009,22 +6171,21 @@
      * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
      * that specifies an instruction that performs "result = a0-a1 op a2-a3".
      * This could be a MIPS instruction or a function call.  (If the result
-     * comes back in a register other than a0, you can override "result".)
+     * comes back in a register pair other than a0-a1, you can override "result".)
      *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
-     * vCC (a1).  Useful for integer division and modulus.
+     * vB (a2-a3).  Useful for integer division and modulus.
      *
      * For: add-long/2addr, sub-long/2addr, div-long/2addr, rem-long/2addr,
      *      and-long/2addr, or-long/2addr, xor-long/2addr
-     *      rem-double/2addr
      */
     /* binop/2addr vA, vB */
     GET_OPA4(rOBJ)                         #  rOBJ <- A+
     GET_OPB(a1)                            #  a1 <- B
     EAS2(a1, rFP, a1)                      #  a1 <- &fp[B]
     EAS2(t0, rFP, rOBJ)                    #  t0 <- &fp[A]
-    LOAD64(a2, a3, a1)               #  a2/a3 <- vBB/vBB+1
-    LOAD64(a0, a1, t0)               #  a0/a1 <- vAA/vAA+1
+    LOAD64(a2, a3, a1)               #  a2/a3 <- vB/vB+1
+    LOAD64(a0, a1, t0)               #  a0/a1 <- vA/vA+1
     .if 0
     or        t0, a2, a3             #  second arg (a2-a3) is zero?
     beqz      t0, common_errDivideByZero
@@ -6034,9 +6195,7 @@
     xor a0, a0, a2                              #  optional op
     xor a1, a1, a3                                 #  result <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG64(a0, a1, rOBJ)   #  vAA/vAA+1 <- a0/a1
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-    /* 12-15 instructions */
+    SET_VREG64_GOTO(a0, a1, rOBJ, t0)   #  vA/vA+1 <- a0/a1
 
 
 /* ------------------------------ */
@@ -6052,7 +6211,7 @@
     GET_OPB(a3)                            #  a3 <- B
     GET_VREG(a2, a3)                       #  a2 <- vB
     EAS2(t2, rFP, rOBJ)                    #  t2 <- &fp[A]
-    LOAD64(a0, a1, t2)                     #  a0/a1 <- vAA/vAA+1
+    LOAD64(a0, a1, t2)                     #  a0/a1 <- vA/vA+1
 
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
@@ -6065,7 +6224,7 @@
     srl     a0, v1                         #  alo<- alo >> (32-(shift&31))
     sll     v1, a1, a2                     #  rhi<- ahi << (shift&31)
     or      v1, a0                         #  rhi<- rhi | alo
-    SET_VREG64_GOTO(v0, v1, rOBJ, t0)      #  vAA/vAA+1 <- a0/a1
+    SET_VREG64_GOTO(v0, v1, rOBJ, t0)      #  vA/vA+1 <- v0/v1
 
 /* ------------------------------ */
     .balign 128
@@ -6080,7 +6239,7 @@
     GET_OPB(a3)                            #  a3 <- B
     GET_VREG(a2, a3)                       #  a2 <- vB
     EAS2(t0, rFP, t2)                      #  t0 <- &fp[A]
-    LOAD64(a0, a1, t0)                     #  a0/a1 <- vAA/vAA+1
+    LOAD64(a0, a1, t0)                     #  a0/a1 <- vA/vA+1
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
 
@@ -6092,7 +6251,7 @@
     sll     a1, 1
     sll     a1, a0                         #  ahi<- ahi << (32-(shift&31))
     or      v0, a1                         #  rlo<- rlo | ahi
-    SET_VREG64_GOTO(v0, v1, t2, t0)        #  vAA/vAA+1 <- a0/a1
+    SET_VREG64_GOTO(v0, v1, t2, t0)        #  vA/vA+1 <- v0/v1
 
 /* ------------------------------ */
     .balign 128
@@ -6107,7 +6266,7 @@
     GET_OPB(a3)                            #  a3 <- B
     GET_VREG(a2, a3)                       #  a2 <- vB
     EAS2(t0, rFP, t3)                      #  t0 <- &fp[A]
-    LOAD64(a0, a1, t0)                     #  a0/a1 <- vAA/vAA+1
+    LOAD64(a0, a1, t0)                     #  a0/a1 <- vA/vA+1
 
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
@@ -6120,7 +6279,7 @@
     sll       a1, 1
     sll       a1, a0                       #  ahi<- ahi << (32-(shift&31))
     or        v0, a1                       #  rlo<- rlo | ahi
-    SET_VREG64_GOTO(v0, v1, t3, t0)        #  vAA/vAA+1 <- a0/a1
+    SET_VREG64_GOTO(v0, v1, t3, t0)        #  vA/vA+1 <- v0/v1
 
 /* ------------------------------ */
     .balign 128
@@ -6129,23 +6288,22 @@
 /* File: mips/fbinop2addr.S */
     /*
      * Generic 32-bit "/2addr" binary operation.  Provide an "instr"
-     * that specifies an instruction that performs "result = a0 op a1".
+     * that specifies an instruction that performs "fv0 = fa0 op fa1".
      * This could be an MIPS instruction or a function call.
      *
      * For: add-float/2addr, sub-float/2addr, mul-float/2addr,
-     * div-float/2addr, rem-float/2addr
+     *      div-float/2addr, rem-float/2addr
      */
     /* binop/2addr vA, vB */
-    GET_OPA4(rOBJ)                         #  t1 <- A+
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
     GET_OPB(a3)                            #  a3 <- B
     GET_VREG_F(fa0, rOBJ)
     GET_VREG_F(fa1, a3)
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
 
     add.s fv0, fa0, fa1
-    SET_VREG_F(fv0, rOBJ)                  #  vAA <- result
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG_F_GOTO(fv0, rOBJ, t0)         #  vA <- result
 
 
 /* ------------------------------ */
@@ -6155,23 +6313,22 @@
 /* File: mips/fbinop2addr.S */
     /*
      * Generic 32-bit "/2addr" binary operation.  Provide an "instr"
-     * that specifies an instruction that performs "result = a0 op a1".
+     * that specifies an instruction that performs "fv0 = fa0 op fa1".
      * This could be an MIPS instruction or a function call.
      *
      * For: add-float/2addr, sub-float/2addr, mul-float/2addr,
-     * div-float/2addr, rem-float/2addr
+     *      div-float/2addr, rem-float/2addr
      */
     /* binop/2addr vA, vB */
-    GET_OPA4(rOBJ)                         #  t1 <- A+
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
     GET_OPB(a3)                            #  a3 <- B
     GET_VREG_F(fa0, rOBJ)
     GET_VREG_F(fa1, a3)
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
 
     sub.s fv0, fa0, fa1
-    SET_VREG_F(fv0, rOBJ)                  #  vAA <- result
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG_F_GOTO(fv0, rOBJ, t0)         #  vA <- result
 
 
 /* ------------------------------ */
@@ -6181,23 +6338,22 @@
 /* File: mips/fbinop2addr.S */
     /*
      * Generic 32-bit "/2addr" binary operation.  Provide an "instr"
-     * that specifies an instruction that performs "result = a0 op a1".
+     * that specifies an instruction that performs "fv0 = fa0 op fa1".
      * This could be an MIPS instruction or a function call.
      *
      * For: add-float/2addr, sub-float/2addr, mul-float/2addr,
-     * div-float/2addr, rem-float/2addr
+     *      div-float/2addr, rem-float/2addr
      */
     /* binop/2addr vA, vB */
-    GET_OPA4(rOBJ)                         #  t1 <- A+
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
     GET_OPB(a3)                            #  a3 <- B
     GET_VREG_F(fa0, rOBJ)
     GET_VREG_F(fa1, a3)
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
 
     mul.s fv0, fa0, fa1
-    SET_VREG_F(fv0, rOBJ)                  #  vAA <- result
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG_F_GOTO(fv0, rOBJ, t0)         #  vA <- result
 
 
 /* ------------------------------ */
@@ -6207,23 +6363,22 @@
 /* File: mips/fbinop2addr.S */
     /*
      * Generic 32-bit "/2addr" binary operation.  Provide an "instr"
-     * that specifies an instruction that performs "result = a0 op a1".
+     * that specifies an instruction that performs "fv0 = fa0 op fa1".
      * This could be an MIPS instruction or a function call.
      *
      * For: add-float/2addr, sub-float/2addr, mul-float/2addr,
-     * div-float/2addr, rem-float/2addr
+     *      div-float/2addr, rem-float/2addr
      */
     /* binop/2addr vA, vB */
-    GET_OPA4(rOBJ)                         #  t1 <- A+
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
     GET_OPB(a3)                            #  a3 <- B
     GET_VREG_F(fa0, rOBJ)
     GET_VREG_F(fa1, a3)
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
 
     div.s fv0, fa0, fa1
-    SET_VREG_F(fv0, rOBJ)                  #  vAA <- result
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG_F_GOTO(fv0, rOBJ, t0)         #  vA <- result
 
 
 /* ------------------------------ */
@@ -6233,23 +6388,22 @@
 /* File: mips/fbinop2addr.S */
     /*
      * Generic 32-bit "/2addr" binary operation.  Provide an "instr"
-     * that specifies an instruction that performs "result = a0 op a1".
+     * that specifies an instruction that performs "fv0 = fa0 op fa1".
      * This could be an MIPS instruction or a function call.
      *
      * For: add-float/2addr, sub-float/2addr, mul-float/2addr,
-     * div-float/2addr, rem-float/2addr
+     *      div-float/2addr, rem-float/2addr
      */
     /* binop/2addr vA, vB */
-    GET_OPA4(rOBJ)                         #  t1 <- A+
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
     GET_OPB(a3)                            #  a3 <- B
     GET_VREG_F(fa0, rOBJ)
     GET_VREG_F(fa1, a3)
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
 
     JAL(fmodf)
-    SET_VREG_F(fv0, rOBJ)                  #  vAA <- result
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG_F_GOTO(fv0, rOBJ, t0)         #  vA <- result
 
 
 /* ------------------------------ */
@@ -6258,12 +6412,13 @@
 /* File: mips/op_add_double_2addr.S */
 /* File: mips/fbinopWide2addr.S */
     /*
-     * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
-     * that specifies an instruction that performs "result = a0-a1 op a2-a3".
+     * Generic 64-bit floating-point "/2addr" binary operation.
+     * Provide an "instr" line that specifies an instruction that
+     * performs "fv0 = fa0 op fa1".
      * This could be an MIPS instruction or a function call.
      *
      * For: add-double/2addr, sub-double/2addr, mul-double/2addr,
-     *  div-double/2addr, rem-double/2addr
+     *      div-double/2addr, rem-double/2addr
      */
     /* binop/2addr vA, vB */
     GET_OPA4(rOBJ)                         #  rOBJ <- A+
@@ -6275,9 +6430,8 @@
 
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
     add.d fv0, fa0, fa1
-    SET_VREG64_F(fv0, fv0f, rOBJ)
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG64_F_GOTO(fv0, fv0f, rOBJ, t0)  #  vA/vA+1 <- fv0
 
 
 /* ------------------------------ */
@@ -6286,12 +6440,13 @@
 /* File: mips/op_sub_double_2addr.S */
 /* File: mips/fbinopWide2addr.S */
     /*
-     * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
-     * that specifies an instruction that performs "result = a0-a1 op a2-a3".
+     * Generic 64-bit floating-point "/2addr" binary operation.
+     * Provide an "instr" line that specifies an instruction that
+     * performs "fv0 = fa0 op fa1".
      * This could be an MIPS instruction or a function call.
      *
      * For: add-double/2addr, sub-double/2addr, mul-double/2addr,
-     *  div-double/2addr, rem-double/2addr
+     *      div-double/2addr, rem-double/2addr
      */
     /* binop/2addr vA, vB */
     GET_OPA4(rOBJ)                         #  rOBJ <- A+
@@ -6303,9 +6458,8 @@
 
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
     sub.d fv0, fa0, fa1
-    SET_VREG64_F(fv0, fv0f, rOBJ)
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG64_F_GOTO(fv0, fv0f, rOBJ, t0)  #  vA/vA+1 <- fv0
 
 
 /* ------------------------------ */
@@ -6314,12 +6468,13 @@
 /* File: mips/op_mul_double_2addr.S */
 /* File: mips/fbinopWide2addr.S */
     /*
-     * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
-     * that specifies an instruction that performs "result = a0-a1 op a2-a3".
+     * Generic 64-bit floating-point "/2addr" binary operation.
+     * Provide an "instr" line that specifies an instruction that
+     * performs "fv0 = fa0 op fa1".
      * This could be an MIPS instruction or a function call.
      *
      * For: add-double/2addr, sub-double/2addr, mul-double/2addr,
-     *  div-double/2addr, rem-double/2addr
+     *      div-double/2addr, rem-double/2addr
      */
     /* binop/2addr vA, vB */
     GET_OPA4(rOBJ)                         #  rOBJ <- A+
@@ -6331,9 +6486,8 @@
 
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
     mul.d fv0, fa0, fa1
-    SET_VREG64_F(fv0, fv0f, rOBJ)
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG64_F_GOTO(fv0, fv0f, rOBJ, t0)  #  vA/vA+1 <- fv0
 
 
 /* ------------------------------ */
@@ -6342,12 +6496,13 @@
 /* File: mips/op_div_double_2addr.S */
 /* File: mips/fbinopWide2addr.S */
     /*
-     * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
-     * that specifies an instruction that performs "result = a0-a1 op a2-a3".
+     * Generic 64-bit floating-point "/2addr" binary operation.
+     * Provide an "instr" line that specifies an instruction that
+     * performs "fv0 = fa0 op fa1".
      * This could be an MIPS instruction or a function call.
      *
      * For: add-double/2addr, sub-double/2addr, mul-double/2addr,
-     *  div-double/2addr, rem-double/2addr
+     *      div-double/2addr, rem-double/2addr
      */
     /* binop/2addr vA, vB */
     GET_OPA4(rOBJ)                         #  rOBJ <- A+
@@ -6359,9 +6514,8 @@
 
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
     div.d fv0, fa0, fa1
-    SET_VREG64_F(fv0, fv0f, rOBJ)
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG64_F_GOTO(fv0, fv0f, rOBJ, t0)  #  vA/vA+1 <- fv0
 
 
 /* ------------------------------ */
@@ -6370,12 +6524,13 @@
 /* File: mips/op_rem_double_2addr.S */
 /* File: mips/fbinopWide2addr.S */
     /*
-     * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
-     * that specifies an instruction that performs "result = a0-a1 op a2-a3".
+     * Generic 64-bit floating-point "/2addr" binary operation.
+     * Provide an "instr" line that specifies an instruction that
+     * performs "fv0 = fa0 op fa1".
      * This could be an MIPS instruction or a function call.
      *
      * For: add-double/2addr, sub-double/2addr, mul-double/2addr,
-     *  div-double/2addr, rem-double/2addr
+     *      div-double/2addr, rem-double/2addr
      */
     /* binop/2addr vA, vB */
     GET_OPA4(rOBJ)                         #  rOBJ <- A+
@@ -6387,9 +6542,8 @@
 
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
     JAL(fmod)
-    SET_VREG64_F(fv0, fv0f, rOBJ)
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG64_F_GOTO(fv0, fv0f, rOBJ, t0)  #  vA/vA+1 <- fv0
 
 
 /* ------------------------------ */
@@ -6409,12 +6563,11 @@
      * For: add-int/lit16, rsub-int, mul-int/lit16, div-int/lit16,
      *      rem-int/lit16, and-int/lit16, or-int/lit16, xor-int/lit16
      */
-    # binop/lit16 vA, vB,                  /* +CCCC */
+    /* binop/lit16 vA, vB, +CCCC */
     FETCH_S(a1, 1)                         #  a1 <- ssssCCCC (sign-extended)
     GET_OPB(a2)                            #  a2 <- B
-    GET_OPA(rOBJ)                          #  rOBJ <- A+
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
     GET_VREG(a0, a2)                       #  a0 <- vB
-    and       rOBJ, rOBJ, 15
     .if 0
     # cmp a1, 0; is second operand zero?
     beqz      a1, common_errDivideByZero
@@ -6424,8 +6577,7 @@
                                   #  optional op
     addu a0, a0, a1                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 10-13 instructions */
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vA <- a0
 
 
 /* ------------------------------ */
@@ -6446,12 +6598,11 @@
      * For: add-int/lit16, rsub-int, mul-int/lit16, div-int/lit16,
      *      rem-int/lit16, and-int/lit16, or-int/lit16, xor-int/lit16
      */
-    # binop/lit16 vA, vB,                  /* +CCCC */
+    /* binop/lit16 vA, vB, +CCCC */
     FETCH_S(a1, 1)                         #  a1 <- ssssCCCC (sign-extended)
     GET_OPB(a2)                            #  a2 <- B
-    GET_OPA(rOBJ)                          #  rOBJ <- A+
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
     GET_VREG(a0, a2)                       #  a0 <- vB
-    and       rOBJ, rOBJ, 15
     .if 0
     # cmp a1, 0; is second operand zero?
     beqz      a1, common_errDivideByZero
@@ -6461,8 +6612,7 @@
                                   #  optional op
     subu a0, a1, a0                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 10-13 instructions */
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vA <- a0
 
 
 /* ------------------------------ */
@@ -6482,12 +6632,11 @@
      * For: add-int/lit16, rsub-int, mul-int/lit16, div-int/lit16,
      *      rem-int/lit16, and-int/lit16, or-int/lit16, xor-int/lit16
      */
-    # binop/lit16 vA, vB,                  /* +CCCC */
+    /* binop/lit16 vA, vB, +CCCC */
     FETCH_S(a1, 1)                         #  a1 <- ssssCCCC (sign-extended)
     GET_OPB(a2)                            #  a2 <- B
-    GET_OPA(rOBJ)                          #  rOBJ <- A+
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
     GET_VREG(a0, a2)                       #  a0 <- vB
-    and       rOBJ, rOBJ, 15
     .if 0
     # cmp a1, 0; is second operand zero?
     beqz      a1, common_errDivideByZero
@@ -6497,8 +6646,7 @@
                                   #  optional op
     mul a0, a0, a1                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 10-13 instructions */
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vA <- a0
 
 
 /* ------------------------------ */
@@ -6519,12 +6667,11 @@
      * For: add-int/lit16, rsub-int, mul-int/lit16, div-int/lit16,
      *      rem-int/lit16, and-int/lit16, or-int/lit16, xor-int/lit16
      */
-    # binop/lit16 vA, vB,                  /* +CCCC */
+    /* binop/lit16 vA, vB, +CCCC */
     FETCH_S(a1, 1)                         #  a1 <- ssssCCCC (sign-extended)
     GET_OPB(a2)                            #  a2 <- B
-    GET_OPA(rOBJ)                          #  rOBJ <- A+
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
     GET_VREG(a0, a2)                       #  a0 <- vB
-    and       rOBJ, rOBJ, 15
     .if 1
     # cmp a1, 0; is second operand zero?
     beqz      a1, common_errDivideByZero
@@ -6534,8 +6681,7 @@
                                   #  optional op
     div a0, a0, a1                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 10-13 instructions */
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vA <- a0
 
 #else
 /* File: mips/binopLit16.S */
@@ -6551,12 +6697,11 @@
      * For: add-int/lit16, rsub-int, mul-int/lit16, div-int/lit16,
      *      rem-int/lit16, and-int/lit16, or-int/lit16, xor-int/lit16
      */
-    # binop/lit16 vA, vB,                  /* +CCCC */
+    /* binop/lit16 vA, vB, +CCCC */
     FETCH_S(a1, 1)                         #  a1 <- ssssCCCC (sign-extended)
     GET_OPB(a2)                            #  a2 <- B
-    GET_OPA(rOBJ)                          #  rOBJ <- A+
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
     GET_VREG(a0, a2)                       #  a0 <- vB
-    and       rOBJ, rOBJ, 15
     .if 1
     # cmp a1, 0; is second operand zero?
     beqz      a1, common_errDivideByZero
@@ -6566,8 +6711,7 @@
     div zero, a0, a1                              #  optional op
     mflo a0                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 10-13 instructions */
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vA <- a0
 
 #endif
 
@@ -6589,12 +6733,11 @@
      * For: add-int/lit16, rsub-int, mul-int/lit16, div-int/lit16,
      *      rem-int/lit16, and-int/lit16, or-int/lit16, xor-int/lit16
      */
-    # binop/lit16 vA, vB,                  /* +CCCC */
+    /* binop/lit16 vA, vB, +CCCC */
     FETCH_S(a1, 1)                         #  a1 <- ssssCCCC (sign-extended)
     GET_OPB(a2)                            #  a2 <- B
-    GET_OPA(rOBJ)                          #  rOBJ <- A+
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
     GET_VREG(a0, a2)                       #  a0 <- vB
-    and       rOBJ, rOBJ, 15
     .if 1
     # cmp a1, 0; is second operand zero?
     beqz      a1, common_errDivideByZero
@@ -6604,8 +6747,7 @@
                                   #  optional op
     mod a0, a0, a1                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 10-13 instructions */
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vA <- a0
 
 #else
 /* File: mips/binopLit16.S */
@@ -6621,12 +6763,11 @@
      * For: add-int/lit16, rsub-int, mul-int/lit16, div-int/lit16,
      *      rem-int/lit16, and-int/lit16, or-int/lit16, xor-int/lit16
      */
-    # binop/lit16 vA, vB,                  /* +CCCC */
+    /* binop/lit16 vA, vB, +CCCC */
     FETCH_S(a1, 1)                         #  a1 <- ssssCCCC (sign-extended)
     GET_OPB(a2)                            #  a2 <- B
-    GET_OPA(rOBJ)                          #  rOBJ <- A+
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
     GET_VREG(a0, a2)                       #  a0 <- vB
-    and       rOBJ, rOBJ, 15
     .if 1
     # cmp a1, 0; is second operand zero?
     beqz      a1, common_errDivideByZero
@@ -6636,8 +6777,7 @@
     div zero, a0, a1                              #  optional op
     mfhi a0                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 10-13 instructions */
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vA <- a0
 
 #endif
 
@@ -6658,12 +6798,11 @@
      * For: add-int/lit16, rsub-int, mul-int/lit16, div-int/lit16,
      *      rem-int/lit16, and-int/lit16, or-int/lit16, xor-int/lit16
      */
-    # binop/lit16 vA, vB,                  /* +CCCC */
+    /* binop/lit16 vA, vB, +CCCC */
     FETCH_S(a1, 1)                         #  a1 <- ssssCCCC (sign-extended)
     GET_OPB(a2)                            #  a2 <- B
-    GET_OPA(rOBJ)                          #  rOBJ <- A+
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
     GET_VREG(a0, a2)                       #  a0 <- vB
-    and       rOBJ, rOBJ, 15
     .if 0
     # cmp a1, 0; is second operand zero?
     beqz      a1, common_errDivideByZero
@@ -6673,8 +6812,7 @@
                                   #  optional op
     and a0, a0, a1                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 10-13 instructions */
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vA <- a0
 
 
 /* ------------------------------ */
@@ -6694,12 +6832,11 @@
      * For: add-int/lit16, rsub-int, mul-int/lit16, div-int/lit16,
      *      rem-int/lit16, and-int/lit16, or-int/lit16, xor-int/lit16
      */
-    # binop/lit16 vA, vB,                  /* +CCCC */
+    /* binop/lit16 vA, vB, +CCCC */
     FETCH_S(a1, 1)                         #  a1 <- ssssCCCC (sign-extended)
     GET_OPB(a2)                            #  a2 <- B
-    GET_OPA(rOBJ)                          #  rOBJ <- A+
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
     GET_VREG(a0, a2)                       #  a0 <- vB
-    and       rOBJ, rOBJ, 15
     .if 0
     # cmp a1, 0; is second operand zero?
     beqz      a1, common_errDivideByZero
@@ -6709,8 +6846,7 @@
                                   #  optional op
     or a0, a0, a1                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 10-13 instructions */
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vA <- a0
 
 
 /* ------------------------------ */
@@ -6730,12 +6866,11 @@
      * For: add-int/lit16, rsub-int, mul-int/lit16, div-int/lit16,
      *      rem-int/lit16, and-int/lit16, or-int/lit16, xor-int/lit16
      */
-    # binop/lit16 vA, vB,                  /* +CCCC */
+    /* binop/lit16 vA, vB, +CCCC */
     FETCH_S(a1, 1)                         #  a1 <- ssssCCCC (sign-extended)
     GET_OPB(a2)                            #  a2 <- B
-    GET_OPA(rOBJ)                          #  rOBJ <- A+
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
     GET_VREG(a0, a2)                       #  a0 <- vB
-    and       rOBJ, rOBJ, 15
     .if 0
     # cmp a1, 0; is second operand zero?
     beqz      a1, common_errDivideByZero
@@ -6745,8 +6880,7 @@
                                   #  optional op
     xor a0, a0, a1                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 10-13 instructions */
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vA <- a0
 
 
 /* ------------------------------ */
@@ -6767,7 +6901,7 @@
      *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
      *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
      */
-    # binop/lit8 vAA, vBB,                 /* +CC */
+    /* binop/lit8 vAA, vBB, +CC */
     FETCH_S(a3, 1)                         #  a3 <- ssssCCBB (sign-extended for CC)
     GET_OPA(rOBJ)                          #  rOBJ <- AA
     and       a2, a3, 255                  #  a2 <- BB
@@ -6783,7 +6917,6 @@
     addu a0, a0, a1                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 10-12 instructions */
 
 
 /* ------------------------------ */
@@ -6804,7 +6937,7 @@
      *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
      *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
      */
-    # binop/lit8 vAA, vBB,                 /* +CC */
+    /* binop/lit8 vAA, vBB, +CC */
     FETCH_S(a3, 1)                         #  a3 <- ssssCCBB (sign-extended for CC)
     GET_OPA(rOBJ)                          #  rOBJ <- AA
     and       a2, a3, 255                  #  a2 <- BB
@@ -6820,7 +6953,6 @@
     subu a0, a1, a0                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 10-12 instructions */
 
 
 /* ------------------------------ */
@@ -6841,7 +6973,7 @@
      *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
      *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
      */
-    # binop/lit8 vAA, vBB,                 /* +CC */
+    /* binop/lit8 vAA, vBB, +CC */
     FETCH_S(a3, 1)                         #  a3 <- ssssCCBB (sign-extended for CC)
     GET_OPA(rOBJ)                          #  rOBJ <- AA
     and       a2, a3, 255                  #  a2 <- BB
@@ -6857,7 +6989,6 @@
     mul a0, a0, a1                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 10-12 instructions */
 
 
 /* ------------------------------ */
@@ -6879,7 +7010,7 @@
      *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
      *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
      */
-    # binop/lit8 vAA, vBB,                 /* +CC */
+    /* binop/lit8 vAA, vBB, +CC */
     FETCH_S(a3, 1)                         #  a3 <- ssssCCBB (sign-extended for CC)
     GET_OPA(rOBJ)                          #  rOBJ <- AA
     and       a2, a3, 255                  #  a2 <- BB
@@ -6895,7 +7026,6 @@
     div a0, a0, a1                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 10-12 instructions */
 
 #else
 /* File: mips/binopLit8.S */
@@ -6912,7 +7042,7 @@
      *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
      *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
      */
-    # binop/lit8 vAA, vBB,                 /* +CC */
+    /* binop/lit8 vAA, vBB, +CC */
     FETCH_S(a3, 1)                         #  a3 <- ssssCCBB (sign-extended for CC)
     GET_OPA(rOBJ)                          #  rOBJ <- AA
     and       a2, a3, 255                  #  a2 <- BB
@@ -6928,7 +7058,6 @@
     mflo a0                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 10-12 instructions */
 
 #endif
 
@@ -6951,7 +7080,7 @@
      *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
      *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
      */
-    # binop/lit8 vAA, vBB,                 /* +CC */
+    /* binop/lit8 vAA, vBB, +CC */
     FETCH_S(a3, 1)                         #  a3 <- ssssCCBB (sign-extended for CC)
     GET_OPA(rOBJ)                          #  rOBJ <- AA
     and       a2, a3, 255                  #  a2 <- BB
@@ -6967,7 +7096,6 @@
     mod a0, a0, a1                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 10-12 instructions */
 
 #else
 /* File: mips/binopLit8.S */
@@ -6984,7 +7112,7 @@
      *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
      *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
      */
-    # binop/lit8 vAA, vBB,                 /* +CC */
+    /* binop/lit8 vAA, vBB, +CC */
     FETCH_S(a3, 1)                         #  a3 <- ssssCCBB (sign-extended for CC)
     GET_OPA(rOBJ)                          #  rOBJ <- AA
     and       a2, a3, 255                  #  a2 <- BB
@@ -7000,7 +7128,6 @@
     mfhi a0                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 10-12 instructions */
 
 #endif
 
@@ -7022,7 +7149,7 @@
      *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
      *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
      */
-    # binop/lit8 vAA, vBB,                 /* +CC */
+    /* binop/lit8 vAA, vBB, +CC */
     FETCH_S(a3, 1)                         #  a3 <- ssssCCBB (sign-extended for CC)
     GET_OPA(rOBJ)                          #  rOBJ <- AA
     and       a2, a3, 255                  #  a2 <- BB
@@ -7038,7 +7165,6 @@
     and a0, a0, a1                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 10-12 instructions */
 
 
 /* ------------------------------ */
@@ -7059,7 +7185,7 @@
      *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
      *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
      */
-    # binop/lit8 vAA, vBB,                 /* +CC */
+    /* binop/lit8 vAA, vBB, +CC */
     FETCH_S(a3, 1)                         #  a3 <- ssssCCBB (sign-extended for CC)
     GET_OPA(rOBJ)                          #  rOBJ <- AA
     and       a2, a3, 255                  #  a2 <- BB
@@ -7075,7 +7201,6 @@
     or a0, a0, a1                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 10-12 instructions */
 
 
 /* ------------------------------ */
@@ -7096,7 +7221,7 @@
      *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
      *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
      */
-    # binop/lit8 vAA, vBB,                 /* +CC */
+    /* binop/lit8 vAA, vBB, +CC */
     FETCH_S(a3, 1)                         #  a3 <- ssssCCBB (sign-extended for CC)
     GET_OPA(rOBJ)                          #  rOBJ <- AA
     and       a2, a3, 255                  #  a2 <- BB
@@ -7112,7 +7237,6 @@
     xor a0, a0, a1                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 10-12 instructions */
 
 
 /* ------------------------------ */
@@ -7133,7 +7257,7 @@
      *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
      *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
      */
-    # binop/lit8 vAA, vBB,                 /* +CC */
+    /* binop/lit8 vAA, vBB, +CC */
     FETCH_S(a3, 1)                         #  a3 <- ssssCCBB (sign-extended for CC)
     GET_OPA(rOBJ)                          #  rOBJ <- AA
     and       a2, a3, 255                  #  a2 <- BB
@@ -7149,7 +7273,6 @@
     sll a0, a0, a1                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 10-12 instructions */
 
 
 /* ------------------------------ */
@@ -7170,7 +7293,7 @@
      *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
      *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
      */
-    # binop/lit8 vAA, vBB,                 /* +CC */
+    /* binop/lit8 vAA, vBB, +CC */
     FETCH_S(a3, 1)                         #  a3 <- ssssCCBB (sign-extended for CC)
     GET_OPA(rOBJ)                          #  rOBJ <- AA
     and       a2, a3, 255                  #  a2 <- BB
@@ -7186,7 +7309,6 @@
     sra a0, a0, a1                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 10-12 instructions */
 
 
 /* ------------------------------ */
@@ -7207,7 +7329,7 @@
      *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
      *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
      */
-    # binop/lit8 vAA, vBB,                 /* +CC */
+    /* binop/lit8 vAA, vBB, +CC */
     FETCH_S(a3, 1)                         #  a3 <- ssssCCBB (sign-extended for CC)
     GET_OPA(rOBJ)                          #  rOBJ <- AA
     and       a2, a3, 255                  #  a2 <- BB
@@ -7223,7 +7345,6 @@
     srl a0, a0, a1                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 10-12 instructions */
 
 
 /* ------------------------------ */
@@ -7231,7 +7352,7 @@
 .L_op_iget_quick: /* 0xe3 */
 /* File: mips/op_iget_quick.S */
     /* For: iget-quick, iget-boolean-quick, iget-byte-quick, iget-char-quick, iget-short-quick */
-    # op vA, vB, offset                    /* CCCC */
+    /* op vA, vB, offset@CCCC */
     GET_OPB(a2)                            #  a2 <- B
     GET_VREG(a3, a2)                       #  a3 <- object we're operating on
     FETCH(a1, 1)                           #  a1 <- field byte offset
@@ -7248,7 +7369,7 @@
     .balign 128
 .L_op_iget_wide_quick: /* 0xe4 */
 /* File: mips/op_iget_wide_quick.S */
-    # iget-wide-quick vA, vB, offset       /* CCCC */
+    /* iget-wide-quick vA, vB, offset@CCCC */
     GET_OPB(a2)                            #  a2 <- B
     GET_VREG(a3, a2)                       #  a3 <- object we're operating on
     FETCH(a1, 1)                           #  a1 <- field byte offset
@@ -7259,8 +7380,7 @@
     LOAD64(a0, a1, t0)                     #  a0 <- obj.field (64 bits, aligned)
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG64(a0, a1, a2)                 #  fp[A] <- a0/a1
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG64_GOTO(a0, a1, a2, t0)        #  fp[A] <- a0/a1
 
 /* ------------------------------ */
     .balign 128
@@ -7277,17 +7397,16 @@
     GET_OPA4(a2)                           #  a2<- A+
     PREFETCH_INST(2)                       #  load rINST
     bnez a3, MterpPossibleException        #  bail out
-    SET_VREG_OBJECT(v0, a2)                #  fp[A] <- v0
     ADVANCE(2)                             #  advance rPC
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG_OBJECT_GOTO(v0, a2, t0)       #  fp[A] <- v0
 
 /* ------------------------------ */
     .balign 128
 .L_op_iput_quick: /* 0xe6 */
 /* File: mips/op_iput_quick.S */
     /* For: iput-quick, iput-object-quick */
-    # op vA, vB, offset                    /* CCCC */
+    /* op vA, vB, offset@CCCC */
     GET_OPB(a2)                            #  a2 <- B
     GET_VREG(a3, a2)                       #  a3 <- fp[B], the object pointer
     FETCH(a1, 1)                           #  a1 <- field byte offset
@@ -7296,15 +7415,16 @@
     GET_VREG(a0, a2)                       #  a0 <- fp[A]
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     addu      t0, a3, a1
+    GET_INST_OPCODE(t1)                    #  extract opcode from rINST
+    GET_OPCODE_TARGET(t1)
     sw    a0, 0(t0)                    #  obj.field (8/16/32 bits) <- a0
-    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    JR(t1)                                 #  jump to next instruction
 
 /* ------------------------------ */
     .balign 128
 .L_op_iput_wide_quick: /* 0xe7 */
 /* File: mips/op_iput_wide_quick.S */
-    # iput-wide-quick vA, vB, offset       /* CCCC */
+    /* iput-wide-quick vA, vB, offset@CCCC */
     GET_OPA4(a0)                           #  a0 <- A(+)
     GET_OPB(a1)                            #  a1 <- B
     GET_VREG(a2, a1)                       #  a2 <- fp[B], the object pointer
@@ -7315,16 +7435,17 @@
     FETCH(a3, 1)                           #  a3 <- field byte offset
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     addu      a2, a2, a3                   #  obj.field (64 bits, aligned) <- a0/a1
-    STORE64(a0, a1, a2)                    #  obj.field (64 bits, aligned) <- a0/a1
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    GET_OPCODE_TARGET(t0)
+    STORE64(a0, a1, a2)                    #  obj.field (64 bits, aligned) <- a0/a1
+    JR(t0)                                 #  jump to next instruction
 
 /* ------------------------------ */
     .balign 128
 .L_op_iput_object_quick: /* 0xe8 */
 /* File: mips/op_iput_object_quick.S */
     /* For: iput-object-quick */
-    # op vA, vB, offset                 /* CCCC */
+    /* op vA, vB, offset@CCCC */
     EXPORT_PC()
     addu   a0, rFP, OFF_FP_SHADOWFRAME
     move   a1, rPC
@@ -7343,8 +7464,8 @@
     /*
      * Generic invoke handler wrapper.
      */
-    # op vB, {vD, vE, vF, vG, vA}, class   /* CCCC */
-    # op {vCCCC..v(CCCC+AA-1)}, meth       /* BBBB */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
     .extern MterpInvokeVirtualQuick
     EXPORT_PC()
     move    a0, rSELF
@@ -7368,8 +7489,8 @@
     /*
      * Generic invoke handler wrapper.
      */
-    # op vB, {vD, vE, vF, vG, vA}, class   /* CCCC */
-    # op {vCCCC..v(CCCC+AA-1)}, meth       /* BBBB */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
     .extern MterpInvokeVirtualQuickRange
     EXPORT_PC()
     move    a0, rSELF
@@ -7391,7 +7512,7 @@
 /* File: mips/op_iput_boolean_quick.S */
 /* File: mips/op_iput_quick.S */
     /* For: iput-quick, iput-object-quick */
-    # op vA, vB, offset                    /* CCCC */
+    /* op vA, vB, offset@CCCC */
     GET_OPB(a2)                            #  a2 <- B
     GET_VREG(a3, a2)                       #  a3 <- fp[B], the object pointer
     FETCH(a1, 1)                           #  a1 <- field byte offset
@@ -7400,9 +7521,10 @@
     GET_VREG(a0, a2)                       #  a0 <- fp[A]
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     addu      t0, a3, a1
+    GET_INST_OPCODE(t1)                    #  extract opcode from rINST
+    GET_OPCODE_TARGET(t1)
     sb    a0, 0(t0)                    #  obj.field (8/16/32 bits) <- a0
-    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    JR(t1)                                 #  jump to next instruction
 
 
 /* ------------------------------ */
@@ -7411,7 +7533,7 @@
 /* File: mips/op_iput_byte_quick.S */
 /* File: mips/op_iput_quick.S */
     /* For: iput-quick, iput-object-quick */
-    # op vA, vB, offset                    /* CCCC */
+    /* op vA, vB, offset@CCCC */
     GET_OPB(a2)                            #  a2 <- B
     GET_VREG(a3, a2)                       #  a3 <- fp[B], the object pointer
     FETCH(a1, 1)                           #  a1 <- field byte offset
@@ -7420,9 +7542,10 @@
     GET_VREG(a0, a2)                       #  a0 <- fp[A]
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     addu      t0, a3, a1
+    GET_INST_OPCODE(t1)                    #  extract opcode from rINST
+    GET_OPCODE_TARGET(t1)
     sb    a0, 0(t0)                    #  obj.field (8/16/32 bits) <- a0
-    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    JR(t1)                                 #  jump to next instruction
 
 
 /* ------------------------------ */
@@ -7431,7 +7554,7 @@
 /* File: mips/op_iput_char_quick.S */
 /* File: mips/op_iput_quick.S */
     /* For: iput-quick, iput-object-quick */
-    # op vA, vB, offset                    /* CCCC */
+    /* op vA, vB, offset@CCCC */
     GET_OPB(a2)                            #  a2 <- B
     GET_VREG(a3, a2)                       #  a3 <- fp[B], the object pointer
     FETCH(a1, 1)                           #  a1 <- field byte offset
@@ -7440,9 +7563,10 @@
     GET_VREG(a0, a2)                       #  a0 <- fp[A]
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     addu      t0, a3, a1
+    GET_INST_OPCODE(t1)                    #  extract opcode from rINST
+    GET_OPCODE_TARGET(t1)
     sh    a0, 0(t0)                    #  obj.field (8/16/32 bits) <- a0
-    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    JR(t1)                                 #  jump to next instruction
 
 
 /* ------------------------------ */
@@ -7451,7 +7575,7 @@
 /* File: mips/op_iput_short_quick.S */
 /* File: mips/op_iput_quick.S */
     /* For: iput-quick, iput-object-quick */
-    # op vA, vB, offset                    /* CCCC */
+    /* op vA, vB, offset@CCCC */
     GET_OPB(a2)                            #  a2 <- B
     GET_VREG(a3, a2)                       #  a3 <- fp[B], the object pointer
     FETCH(a1, 1)                           #  a1 <- field byte offset
@@ -7460,9 +7584,10 @@
     GET_VREG(a0, a2)                       #  a0 <- fp[A]
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     addu      t0, a3, a1
+    GET_INST_OPCODE(t1)                    #  extract opcode from rINST
+    GET_OPCODE_TARGET(t1)
     sh    a0, 0(t0)                    #  obj.field (8/16/32 bits) <- a0
-    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    JR(t1)                                 #  jump to next instruction
 
 
 /* ------------------------------ */
@@ -7471,7 +7596,7 @@
 /* File: mips/op_iget_boolean_quick.S */
 /* File: mips/op_iget_quick.S */
     /* For: iget-quick, iget-boolean-quick, iget-byte-quick, iget-char-quick, iget-short-quick */
-    # op vA, vB, offset                    /* CCCC */
+    /* op vA, vB, offset@CCCC */
     GET_OPB(a2)                            #  a2 <- B
     GET_VREG(a3, a2)                       #  a3 <- object we're operating on
     FETCH(a1, 1)                           #  a1 <- field byte offset
@@ -7491,7 +7616,7 @@
 /* File: mips/op_iget_byte_quick.S */
 /* File: mips/op_iget_quick.S */
     /* For: iget-quick, iget-boolean-quick, iget-byte-quick, iget-char-quick, iget-short-quick */
-    # op vA, vB, offset                    /* CCCC */
+    /* op vA, vB, offset@CCCC */
     GET_OPB(a2)                            #  a2 <- B
     GET_VREG(a3, a2)                       #  a3 <- object we're operating on
     FETCH(a1, 1)                           #  a1 <- field byte offset
@@ -7511,7 +7636,7 @@
 /* File: mips/op_iget_char_quick.S */
 /* File: mips/op_iget_quick.S */
     /* For: iget-quick, iget-boolean-quick, iget-byte-quick, iget-char-quick, iget-short-quick */
-    # op vA, vB, offset                    /* CCCC */
+    /* op vA, vB, offset@CCCC */
     GET_OPB(a2)                            #  a2 <- B
     GET_VREG(a3, a2)                       #  a3 <- object we're operating on
     FETCH(a1, 1)                           #  a1 <- field byte offset
@@ -7531,7 +7656,7 @@
 /* File: mips/op_iget_short_quick.S */
 /* File: mips/op_iget_quick.S */
     /* For: iget-quick, iget-boolean-quick, iget-byte-quick, iget-char-quick, iget-short-quick */
-    # op vA, vB, offset                    /* CCCC */
+    /* op vA, vB, offset@CCCC */
     GET_OPB(a2)                            #  a2 <- B
     GET_VREG(a3, a2)                       #  a3 <- object we're operating on
     FETCH(a1, 1)                           #  a1 <- field byte offset
@@ -7694,264 +7819,29 @@
     .balign 4
 artMterpAsmSisterStart:
 
-/* continuation for op_cmpl_float */
-
-.Lop_cmpl_float_nan:
-    li rTEMP, -1
-
-.Lop_cmpl_float_finish:
-    GET_OPA(rOBJ)
-    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
-    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG_GOTO(rTEMP, rOBJ, t0)         #  vAA <- rTEMP
-
-/* continuation for op_cmpg_float */
-
-.Lop_cmpg_float_nan:
-    li rTEMP, 1
-
-.Lop_cmpg_float_finish:
-    GET_OPA(rOBJ)
-    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
-    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG_GOTO(rTEMP, rOBJ, t0)         #  vAA <- rTEMP
-
-/* continuation for op_cmpl_double */
-
-.Lop_cmpl_double_nan:
-    li rTEMP, -1
-
-.Lop_cmpl_double_finish:
-    GET_OPA(rOBJ)
-    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
-    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG_GOTO(rTEMP, rOBJ, t0)         #  vAA <- rTEMP
-
-/* continuation for op_cmpg_double */
-
-.Lop_cmpg_double_nan:
-    li rTEMP, 1
-
-.Lop_cmpg_double_finish:
-    GET_OPA(rOBJ)
-    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
-    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG_GOTO(rTEMP, rOBJ, t0)         #  vAA <- rTEMP
-
-/* continuation for op_float_to_int */
-
-/*
- * Not an entry point as it is used only once !!
- */
-f2i_doconv:
-#ifdef MIPS32REVGE6
-    l.s       fa1, .LFLOAT_TO_INT_max
-    cmp.le.s  ft2, fa1, fa0
-    l.s       fv0, .LFLOAT_TO_INT_ret_max
-    bc1nez    ft2, .Lop_float_to_int_set_vreg_f
-
-    l.s       fa1, .LFLOAT_TO_INT_min
-    cmp.le.s  ft2, fa0, fa1
-    l.s       fv0, .LFLOAT_TO_INT_ret_min
-    bc1nez    ft2, .Lop_float_to_int_set_vreg_f
-
-    mov.s     fa1, fa0
-    cmp.un.s  ft2, fa0, fa1
-    li.s      fv0, 0
-    bc1nez    ft2, .Lop_float_to_int_set_vreg_f
-#else
-    l.s       fa1, .LFLOAT_TO_INT_max
-    c.ole.s   fcc0, fa1, fa0
-    l.s       fv0, .LFLOAT_TO_INT_ret_max
-    bc1t      .Lop_float_to_int_set_vreg_f
-
-    l.s       fa1, .LFLOAT_TO_INT_min
-    c.ole.s   fcc0, fa0, fa1
-    l.s       fv0, .LFLOAT_TO_INT_ret_min
-    bc1t      .Lop_float_to_int_set_vreg_f
-
-    mov.s     fa1, fa0
-    c.un.s    fcc0, fa0, fa1
-    li.s      fv0, 0
-    bc1t      .Lop_float_to_int_set_vreg_f
-#endif
-
-    trunc.w.s  fv0, fa0
-    b         .Lop_float_to_int_set_vreg_f
-
-.LFLOAT_TO_INT_max:
-    .word 0x4f000000
-.LFLOAT_TO_INT_min:
-    .word 0xcf000000
-.LFLOAT_TO_INT_ret_max:
-    .word 0x7fffffff
-.LFLOAT_TO_INT_ret_min:
-    .word 0x80000000
-
 /* continuation for op_float_to_long */
 
-f2l_doconv:
-#ifdef MIPS32REVGE6
-    l.s       fa1, .LLONG_TO_max
-    cmp.le.s  ft2, fa1, fa0
-    li        rRESULT0, ~0
-    li        rRESULT1, ~0x80000000
-    bc1nez    ft2, .Lop_float_to_long_set_vreg
-
-    l.s       fa1, .LLONG_TO_min
-    cmp.le.s  ft2, fa0, fa1
-    li        rRESULT0, 0
-    li        rRESULT1, 0x80000000
-    bc1nez    ft2, .Lop_float_to_long_set_vreg
-
-    mov.s     fa1, fa0
-    cmp.un.s  ft2, fa0, fa1
-    li        rRESULT0, 0
-    li        rRESULT1, 0
-    bc1nez    ft2, .Lop_float_to_long_set_vreg
-#else
-    l.s       fa1, .LLONG_TO_max
-    c.ole.s   fcc0, fa1, fa0
-    li        rRESULT0, ~0
-    li        rRESULT1, ~0x80000000
-    bc1t      .Lop_float_to_long_set_vreg
-
-    l.s       fa1, .LLONG_TO_min
-    c.ole.s   fcc0, fa0, fa1
-    li        rRESULT0, 0
-    li        rRESULT1, 0x80000000
-    bc1t      .Lop_float_to_long_set_vreg
-
-    mov.s     fa1, fa0
-    c.un.s    fcc0, fa0, fa1
-    li        rRESULT0, 0
-    li        rRESULT1, 0
-    bc1t      .Lop_float_to_long_set_vreg
+#ifndef MIPS32REVGE6
+.Lop_float_to_long_get_opcode:
+    GET_INST_OPCODE(t1)                    #  extract opcode from rINST
+.Lop_float_to_long_set_vreg:
+    SET_VREG64_GOTO(rRESULT0, rRESULT1, rOBJ, t1)   #  vA/vA+1 <- v0/v1
 #endif
 
-    JAL(__fixsfdi)
-
-    b         .Lop_float_to_long_set_vreg
-
-.LLONG_TO_max:
-    .word 0x5f000000
-
-.LLONG_TO_min:
-    .word 0xdf000000
-
-/* continuation for op_double_to_int */
-
-d2i_doconv:
-#ifdef MIPS32REVGE6
-    la        t0, .LDOUBLE_TO_INT_max
-    LOAD64_F(fa1, fa1f, t0)
-    cmp.le.d  ft2, fa1, fa0
-    l.s       fv0, .LDOUBLE_TO_INT_maxret
-    bc1nez    ft2, .Lop_double_to_int_set_vreg_f
-
-    la        t0, .LDOUBLE_TO_INT_min
-    LOAD64_F(fa1, fa1f, t0)
-    cmp.le.d  ft2, fa0, fa1
-    l.s       fv0, .LDOUBLE_TO_INT_minret
-    bc1nez    ft2, .Lop_double_to_int_set_vreg_f
-
-    mov.d     fa1, fa0
-    cmp.un.d  ft2, fa0, fa1
-    li.s      fv0, 0
-    bc1nez    ft2, .Lop_double_to_int_set_vreg_f
-#else
-    la        t0, .LDOUBLE_TO_INT_max
-    LOAD64_F(fa1, fa1f, t0)
-    c.ole.d   fcc0, fa1, fa0
-    l.s       fv0, .LDOUBLE_TO_INT_maxret
-    bc1t      .Lop_double_to_int_set_vreg_f
-
-    la        t0, .LDOUBLE_TO_INT_min
-    LOAD64_F(fa1, fa1f, t0)
-    c.ole.d   fcc0, fa0, fa1
-    l.s       fv0, .LDOUBLE_TO_INT_minret
-    bc1t      .Lop_double_to_int_set_vreg_f
-
-    mov.d     fa1, fa0
-    c.un.d    fcc0, fa0, fa1
-    li.s      fv0, 0
-    bc1t      .Lop_double_to_int_set_vreg_f
-#endif
-
-    trunc.w.d  fv0, fa0
-    b         .Lop_double_to_int_set_vreg_f
-
-.LDOUBLE_TO_INT_max:
-    .dword 0x41dfffffffc00000
-.LDOUBLE_TO_INT_min:
-    .dword 0xc1e0000000000000              #  minint, as a double (high word)
-.LDOUBLE_TO_INT_maxret:
-    .word 0x7fffffff
-.LDOUBLE_TO_INT_minret:
-    .word 0x80000000
-
 /* continuation for op_double_to_long */
 
-d2l_doconv:
-#ifdef MIPS32REVGE6
-    la        t0, .LDOUBLE_TO_LONG_max
-    LOAD64_F(fa1, fa1f, t0)
-    cmp.le.d  ft2, fa1, fa0
-    la        t0, .LDOUBLE_TO_LONG_ret_max
-    LOAD64(rRESULT0, rRESULT1, t0)
-    bc1nez    ft2, .Lop_double_to_long_set_vreg
-
-    la        t0, .LDOUBLE_TO_LONG_min
-    LOAD64_F(fa1, fa1f, t0)
-    cmp.le.d  ft2, fa0, fa1
-    la        t0, .LDOUBLE_TO_LONG_ret_min
-    LOAD64(rRESULT0, rRESULT1, t0)
-    bc1nez    ft2, .Lop_double_to_long_set_vreg
-
-    mov.d     fa1, fa0
-    cmp.un.d  ft2, fa0, fa1
-    li        rRESULT0, 0
-    li        rRESULT1, 0
-    bc1nez    ft2, .Lop_double_to_long_set_vreg
-#else
-    la        t0, .LDOUBLE_TO_LONG_max
-    LOAD64_F(fa1, fa1f, t0)
-    c.ole.d   fcc0, fa1, fa0
-    la        t0, .LDOUBLE_TO_LONG_ret_max
-    LOAD64(rRESULT0, rRESULT1, t0)
-    bc1t      .Lop_double_to_long_set_vreg
-
-    la        t0, .LDOUBLE_TO_LONG_min
-    LOAD64_F(fa1, fa1f, t0)
-    c.ole.d   fcc0, fa0, fa1
-    la        t0, .LDOUBLE_TO_LONG_ret_min
-    LOAD64(rRESULT0, rRESULT1, t0)
-    bc1t      .Lop_double_to_long_set_vreg
-
-    mov.d     fa1, fa0
-    c.un.d    fcc0, fa0, fa1
-    li        rRESULT0, 0
-    li        rRESULT1, 0
-    bc1t      .Lop_double_to_long_set_vreg
+#ifndef MIPS32REVGE6
+.Lop_double_to_long_get_opcode:
+    GET_INST_OPCODE(t1)                    #  extract opcode from rINST
+.Lop_double_to_long_set_vreg:
+    SET_VREG64_GOTO(rRESULT0, rRESULT1, rOBJ, t1)   #  vA/vA+1 <- v0/v1
 #endif
-    JAL(__fixdfdi)
-    b         .Lop_double_to_long_set_vreg
-
-.LDOUBLE_TO_LONG_max:
-    .dword 0x43e0000000000000              #  maxlong, as a double (high word)
-.LDOUBLE_TO_LONG_min:
-    .dword 0xc3e0000000000000              #  minlong, as a double (high word)
-.LDOUBLE_TO_LONG_ret_max:
-    .dword 0x7fffffffffffffff
-.LDOUBLE_TO_LONG_ret_min:
-    .dword 0x8000000000000000
 
 /* continuation for op_mul_long */
 
 .Lop_mul_long_finish:
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG64(v0, v1, a0)                 #  vAA::vAA+1 <- v0(low) :: v1(high)
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG64_GOTO(v0, v1, a0, t0)        #  vAA/vAA+1 <- v0(low)/v1(high)
 
 /* continuation for op_shl_long */
 
@@ -7969,51 +7859,21 @@
 .Lop_ushr_long_finish:
     SET_VREG64_GOTO(v1, zero, rOBJ, t0)    #  vAA/vAA+1 <- rlo/rhi
 
-/* continuation for op_add_double */
-
-.Lop_add_double_finish:
-    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-
-/* continuation for op_sub_double */
-
-.Lop_sub_double_finish:
-    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-
-/* continuation for op_mul_double */
-
-.Lop_mul_double_finish:
-    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-
-/* continuation for op_div_double */
-
-.Lop_div_double_finish:
-    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-
-/* continuation for op_rem_double */
-
-.Lop_rem_double_finish:
-    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-
 /* continuation for op_shl_long_2addr */
 
 .Lop_shl_long_2addr_finish:
-    SET_VREG64_GOTO(zero, v0, rOBJ, t0)    #  vAA/vAA+1 <- rlo/rhi
+    SET_VREG64_GOTO(zero, v0, rOBJ, t0)    #  vA/vA+1 <- rlo/rhi
 
 /* continuation for op_shr_long_2addr */
 
 .Lop_shr_long_2addr_finish:
     sra     a3, a1, 31                     #  a3<- sign(ah)
-    SET_VREG64_GOTO(v1, a3, t2, t0)        #  vAA/vAA+1 <- rlo/rhi
+    SET_VREG64_GOTO(v1, a3, t2, t0)        #  vA/vA+1 <- rlo/rhi
 
 /* continuation for op_ushr_long_2addr */
 
 .Lop_ushr_long_2addr_finish:
-    SET_VREG64_GOTO(v1, zero, t3, t0)      #  vAA/vAA+1 <- rlo/rhi
+    SET_VREG64_GOTO(v1, zero, t3, t0)      #  vA/vA+1 <- rlo/rhi
 
     .size   artMterpAsmSisterStart, .-artMterpAsmSisterStart
     .global artMterpAsmSisterEnd
@@ -12791,7 +12651,7 @@
     REFRESH_IBASE()
     addu    a2, rINST, rINST            # a2<- byte offset
     FETCH_ADVANCE_INST_RB(a2)           # update rPC, load rINST
-    and     ra, (THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    and     ra, THREAD_SUSPEND_OR_CHECKPOINT_REQUEST
     bnez    ra, .L_suspend_request_pending
     GET_INST_OPCODE(t0)                 # extract opcode from rINST
     GOTO_OPCODE(t0)                     # jump to next instruction
diff --git a/runtime/interpreter/mterp/out/mterp_mips64.S b/runtime/interpreter/mterp/out/mterp_mips64.S
index 143aeb0..037787f 100644
--- a/runtime/interpreter/mterp/out/mterp_mips64.S
+++ b/runtime/interpreter/mterp/out/mterp_mips64.S
@@ -637,7 +637,7 @@
     jal     MterpThreadFenceForConstructor
     lw      ra, THREAD_FLAGS_OFFSET(rSELF)
     move    a0, rSELF
-    and     ra, ra, (THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    and     ra, ra, THREAD_SUSPEND_OR_CHECKPOINT_REQUEST
     beqzc   ra, 1f
     jal     MterpSuspendCheck           # (self)
 1:
@@ -659,7 +659,7 @@
     jal     MterpThreadFenceForConstructor
     lw      ra, THREAD_FLAGS_OFFSET(rSELF)
     move    a0, rSELF
-    and     ra, ra, (THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    and     ra, ra, THREAD_SUSPEND_OR_CHECKPOINT_REQUEST
     beqzc   ra, 1f
     jal     MterpSuspendCheck           # (self)
 1:
@@ -681,7 +681,7 @@
     jal     MterpThreadFenceForConstructor
     lw      ra, THREAD_FLAGS_OFFSET(rSELF)
     move    a0, rSELF
-    and     ra, ra, (THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    and     ra, ra, THREAD_SUSPEND_OR_CHECKPOINT_REQUEST
     beqzc   ra, 1f
     jal     MterpSuspendCheck           # (self)
 1:
@@ -705,7 +705,7 @@
     jal     MterpThreadFenceForConstructor
     lw      ra, THREAD_FLAGS_OFFSET(rSELF)
     move    a0, rSELF
-    and     ra, ra, (THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    and     ra, ra, THREAD_SUSPEND_OR_CHECKPOINT_REQUEST
     beqzc   ra, 1f
     jal     MterpSuspendCheck           # (self)
 1:
@@ -3121,7 +3121,7 @@
     .extern MterpSuspendCheck
     lw      ra, THREAD_FLAGS_OFFSET(rSELF)
     move    a0, rSELF
-    and     ra, ra, (THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    and     ra, ra, THREAD_SUSPEND_OR_CHECKPOINT_REQUEST
     beqzc   ra, 1f
     jal     MterpSuspendCheck           # (self)
 1:
@@ -12179,7 +12179,7 @@
     REFRESH_IBASE
     daddu   a2, rINST, rINST            # a2<- byte offset
     FETCH_ADVANCE_INST_RB a2            # update rPC, load rINST
-    and     ra, (THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    and     ra, THREAD_SUSPEND_OR_CHECKPOINT_REQUEST
     bnezc   ra, .L_suspend_request_pending
     GET_INST_OPCODE v0                  # extract opcode from rINST
     GOTO_OPCODE v0                      # jump to next instruction
@@ -12296,7 +12296,7 @@
     lw      ra, THREAD_FLAGS_OFFSET(rSELF)
     sd      a0, 0(a2)
     move    a0, rSELF
-    and     ra, ra, (THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    and     ra, ra, THREAD_SUSPEND_OR_CHECKPOINT_REQUEST
     beqzc   ra, check2
     jal     MterpSuspendCheck                       # (self)
 check2:
diff --git a/runtime/interpreter/mterp/out/mterp_x86.S b/runtime/interpreter/mterp/out/mterp_x86.S
index d676fda..695d1e4 100644
--- a/runtime/interpreter/mterp/out/mterp_x86.S
+++ b/runtime/interpreter/mterp/out/mterp_x86.S
@@ -612,7 +612,7 @@
     .extern MterpThreadFenceForConstructor
     call    SYMBOL(MterpThreadFenceForConstructor)
     movl    rSELF, %eax
-    testl   $(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(%eax)
+    testl   $(THREAD_SUSPEND_OR_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(%eax)
     jz      1f
     movl    %eax, OUT_ARG0(%esp)
     call    SYMBOL(MterpSuspendCheck)
@@ -634,7 +634,7 @@
     .extern MterpThreadFenceForConstructor
     call    SYMBOL(MterpThreadFenceForConstructor)
     movl    rSELF, %eax
-    testl   $(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(%eax)
+    testl   $(THREAD_SUSPEND_OR_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(%eax)
     jz      1f
     movl    %eax, OUT_ARG0(%esp)
     call    SYMBOL(MterpSuspendCheck)
@@ -654,7 +654,7 @@
     .extern MterpThreadFenceForConstructor
     call    SYMBOL(MterpThreadFenceForConstructor)
     movl    rSELF, %eax
-    testl   $(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(%eax)
+    testl   $(THREAD_SUSPEND_OR_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(%eax)
     jz      1f
     movl    %eax, OUT_ARG0(%esp)
     call    SYMBOL(MterpSuspendCheck)
@@ -677,7 +677,7 @@
     .extern MterpThreadFenceForConstructor
     call    SYMBOL(MterpThreadFenceForConstructor)
     movl    rSELF, %eax
-    testl   $(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(%eax)
+    testl   $(THREAD_SUSPEND_OR_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(%eax)
     jz      1f
     movl    %eax, OUT_ARG0(%esp)
     call    SYMBOL(MterpSuspendCheck)
@@ -3104,7 +3104,7 @@
 .L_op_return_void_no_barrier: /* 0x73 */
 /* File: x86/op_return_void_no_barrier.S */
     movl    rSELF, %eax
-    testl   $(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(%eax)
+    testl   $(THREAD_SUSPEND_OR_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(%eax)
     jz      1f
     movl    %eax, OUT_ARG0(%esp)
     call    SYMBOL(MterpSuspendCheck)
@@ -12678,7 +12678,7 @@
     je      .L_add_batch                    # counted down to zero - report
 .L_resume_backward_branch:
     movl    rSELF, %eax
-    testl   $(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(%eax)
+    testl   $(THREAD_SUSPEND_OR_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(%eax)
     leal    (rPC, rINST, 2), rPC
     FETCH_INST
     jnz     .L_suspend_request_pending
diff --git a/runtime/interpreter/mterp/out/mterp_x86_64.S b/runtime/interpreter/mterp/out/mterp_x86_64.S
index df88499..2eab58c 100644
--- a/runtime/interpreter/mterp/out/mterp_x86_64.S
+++ b/runtime/interpreter/mterp/out/mterp_x86_64.S
@@ -587,7 +587,7 @@
     .extern MterpThreadFenceForConstructor
     call    SYMBOL(MterpThreadFenceForConstructor)
     movq    rSELF, OUT_ARG0
-    testl   $(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(OUT_ARG0)
+    testl   $(THREAD_SUSPEND_OR_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(OUT_ARG0)
     jz      1f
     call    SYMBOL(MterpSuspendCheck)
 1:
@@ -607,7 +607,7 @@
     .extern MterpThreadFenceForConstructor
     call    SYMBOL(MterpThreadFenceForConstructor)
     movq    rSELF, OUT_ARG0
-    testl   $(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(OUT_ARG0)
+    testl   $(THREAD_SUSPEND_OR_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(OUT_ARG0)
     jz      1f
     call    SYMBOL(MterpSuspendCheck)
 1:
@@ -625,7 +625,7 @@
     .extern MterpThreadFenceForConstructor
     call    SYMBOL(MterpThreadFenceForConstructor)
     movq    rSELF, OUT_ARG0
-    testl   $(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(OUT_ARG0)
+    testl   $(THREAD_SUSPEND_OR_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(OUT_ARG0)
     jz      1f
     call    SYMBOL(MterpSuspendCheck)
 1:
@@ -646,7 +646,7 @@
     .extern MterpThreadFenceForConstructor
     call    SYMBOL(MterpThreadFenceForConstructor)
     movq    rSELF, OUT_ARG0
-    testl   $(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(OUT_ARG0)
+    testl   $(THREAD_SUSPEND_OR_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(OUT_ARG0)
     jz      1f
     call    SYMBOL(MterpSuspendCheck)
 1:
@@ -2972,7 +2972,7 @@
 .L_op_return_void_no_barrier: /* 0x73 */
 /* File: x86_64/op_return_void_no_barrier.S */
     movq    rSELF, OUT_ARG0
-    testl   $(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(OUT_ARG0)
+    testl   $(THREAD_SUSPEND_OR_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(OUT_ARG0)
     jz      1f
     call    SYMBOL(MterpSuspendCheck)
 1:
@@ -11915,7 +11915,7 @@
     je      .L_add_batch                    # counted down to zero - report
 .L_resume_backward_branch:
     movq    rSELF, %rax
-    testl   $(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(%rax)
+    testl   $(THREAD_SUSPEND_OR_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(%rax)
     REFRESH_IBASE
     leaq    (rPC, rINSTq, 2), rPC
     FETCH_INST
diff --git a/runtime/interpreter/mterp/x86/footer.S b/runtime/interpreter/mterp/x86/footer.S
index e8c8ca8..088cb12 100644
--- a/runtime/interpreter/mterp/x86/footer.S
+++ b/runtime/interpreter/mterp/x86/footer.S
@@ -167,7 +167,7 @@
     je      .L_add_batch                    # counted down to zero - report
 .L_resume_backward_branch:
     movl    rSELF, %eax
-    testl   $$(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(%eax)
+    testl   $$(THREAD_SUSPEND_OR_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(%eax)
     leal    (rPC, rINST, 2), rPC
     FETCH_INST
     jnz     .L_suspend_request_pending
diff --git a/runtime/interpreter/mterp/x86/op_return.S b/runtime/interpreter/mterp/x86/op_return.S
index 8e3cfad..a8ebbed 100644
--- a/runtime/interpreter/mterp/x86/op_return.S
+++ b/runtime/interpreter/mterp/x86/op_return.S
@@ -7,7 +7,7 @@
     .extern MterpThreadFenceForConstructor
     call    SYMBOL(MterpThreadFenceForConstructor)
     movl    rSELF, %eax
-    testl   $$(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(%eax)
+    testl   $$(THREAD_SUSPEND_OR_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(%eax)
     jz      1f
     movl    %eax, OUT_ARG0(%esp)
     call    SYMBOL(MterpSuspendCheck)
diff --git a/runtime/interpreter/mterp/x86/op_return_void.S b/runtime/interpreter/mterp/x86/op_return_void.S
index a14a4f6..d9eddf3 100644
--- a/runtime/interpreter/mterp/x86/op_return_void.S
+++ b/runtime/interpreter/mterp/x86/op_return_void.S
@@ -1,7 +1,7 @@
     .extern MterpThreadFenceForConstructor
     call    SYMBOL(MterpThreadFenceForConstructor)
     movl    rSELF, %eax
-    testl   $$(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(%eax)
+    testl   $$(THREAD_SUSPEND_OR_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(%eax)
     jz      1f
     movl    %eax, OUT_ARG0(%esp)
     call    SYMBOL(MterpSuspendCheck)
diff --git a/runtime/interpreter/mterp/x86/op_return_void_no_barrier.S b/runtime/interpreter/mterp/x86/op_return_void_no_barrier.S
index 1d0e933..2fbda6b 100644
--- a/runtime/interpreter/mterp/x86/op_return_void_no_barrier.S
+++ b/runtime/interpreter/mterp/x86/op_return_void_no_barrier.S
@@ -1,5 +1,5 @@
     movl    rSELF, %eax
-    testl   $$(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(%eax)
+    testl   $$(THREAD_SUSPEND_OR_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(%eax)
     jz      1f
     movl    %eax, OUT_ARG0(%esp)
     call    SYMBOL(MterpSuspendCheck)
diff --git a/runtime/interpreter/mterp/x86/op_return_wide.S b/runtime/interpreter/mterp/x86/op_return_wide.S
index 7d1850a..5fff626 100644
--- a/runtime/interpreter/mterp/x86/op_return_wide.S
+++ b/runtime/interpreter/mterp/x86/op_return_wide.S
@@ -5,7 +5,7 @@
     .extern MterpThreadFenceForConstructor
     call    SYMBOL(MterpThreadFenceForConstructor)
     movl    rSELF, %eax
-    testl   $$(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(%eax)
+    testl   $$(THREAD_SUSPEND_OR_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(%eax)
     jz      1f
     movl    %eax, OUT_ARG0(%esp)
     call    SYMBOL(MterpSuspendCheck)
diff --git a/runtime/interpreter/mterp/x86_64/footer.S b/runtime/interpreter/mterp/x86_64/footer.S
index f78f163..ed5e5ea 100644
--- a/runtime/interpreter/mterp/x86_64/footer.S
+++ b/runtime/interpreter/mterp/x86_64/footer.S
@@ -151,7 +151,7 @@
     je      .L_add_batch                    # counted down to zero - report
 .L_resume_backward_branch:
     movq    rSELF, %rax
-    testl   $$(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(%rax)
+    testl   $$(THREAD_SUSPEND_OR_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(%rax)
     REFRESH_IBASE
     leaq    (rPC, rINSTq, 2), rPC
     FETCH_INST
diff --git a/runtime/interpreter/mterp/x86_64/op_return.S b/runtime/interpreter/mterp/x86_64/op_return.S
index 07e0e53..8cb6cba 100644
--- a/runtime/interpreter/mterp/x86_64/op_return.S
+++ b/runtime/interpreter/mterp/x86_64/op_return.S
@@ -7,7 +7,7 @@
     .extern MterpThreadFenceForConstructor
     call    SYMBOL(MterpThreadFenceForConstructor)
     movq    rSELF, OUT_ARG0
-    testl   $$(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(OUT_ARG0)
+    testl   $$(THREAD_SUSPEND_OR_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(OUT_ARG0)
     jz      1f
     call    SYMBOL(MterpSuspendCheck)
 1:
diff --git a/runtime/interpreter/mterp/x86_64/op_return_void.S b/runtime/interpreter/mterp/x86_64/op_return_void.S
index 6a12df3..ba68e7e 100644
--- a/runtime/interpreter/mterp/x86_64/op_return_void.S
+++ b/runtime/interpreter/mterp/x86_64/op_return_void.S
@@ -1,7 +1,7 @@
     .extern MterpThreadFenceForConstructor
     call    SYMBOL(MterpThreadFenceForConstructor)
     movq    rSELF, OUT_ARG0
-    testl   $$(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(OUT_ARG0)
+    testl   $$(THREAD_SUSPEND_OR_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(OUT_ARG0)
     jz      1f
     call    SYMBOL(MterpSuspendCheck)
 1:
diff --git a/runtime/interpreter/mterp/x86_64/op_return_void_no_barrier.S b/runtime/interpreter/mterp/x86_64/op_return_void_no_barrier.S
index 822b2e8..6799da1 100644
--- a/runtime/interpreter/mterp/x86_64/op_return_void_no_barrier.S
+++ b/runtime/interpreter/mterp/x86_64/op_return_void_no_barrier.S
@@ -1,5 +1,5 @@
     movq    rSELF, OUT_ARG0
-    testl   $$(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(OUT_ARG0)
+    testl   $$(THREAD_SUSPEND_OR_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(OUT_ARG0)
     jz      1f
     call    SYMBOL(MterpSuspendCheck)
 1:
diff --git a/runtime/interpreter/mterp/x86_64/op_return_wide.S b/runtime/interpreter/mterp/x86_64/op_return_wide.S
index 288eb96..d6d6d1b 100644
--- a/runtime/interpreter/mterp/x86_64/op_return_wide.S
+++ b/runtime/interpreter/mterp/x86_64/op_return_wide.S
@@ -5,7 +5,7 @@
     .extern MterpThreadFenceForConstructor
     call    SYMBOL(MterpThreadFenceForConstructor)
     movq    rSELF, OUT_ARG0
-    testl   $$(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(OUT_ARG0)
+    testl   $$(THREAD_SUSPEND_OR_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(OUT_ARG0)
     jz      1f
     call    SYMBOL(MterpSuspendCheck)
 1:
diff --git a/runtime/interpreter/unstarted_runtime.cc b/runtime/interpreter/unstarted_runtime.cc
index 6bf7e15..a5b1038 100644
--- a/runtime/interpreter/unstarted_runtime.cc
+++ b/runtime/interpreter/unstarted_runtime.cc
@@ -424,7 +424,7 @@
 
   std::unique_ptr<ZipArchive> zip_archive(ZipArchive::Open(jar_file.c_str(), error_msg));
   if (zip_archive == nullptr) {
-    return nullptr;;
+    return nullptr;
   }
   std::unique_ptr<ZipEntry> zip_entry(zip_archive->Find(entry_name, error_msg));
   if (zip_entry == nullptr) {
@@ -1097,10 +1097,12 @@
     return;
   }
   DCHECK_GE(start, 0);
-  DCHECK_GE(end, string->GetLength());
+  DCHECK_LE(start, end);
+  DCHECK_LE(end, string->GetLength());
   StackHandleScope<1> hs(self);
   Handle<mirror::CharArray> h_char_array(
       hs.NewHandle(shadow_frame->GetVRegReference(arg_offset + 3)->AsCharArray()));
+  DCHECK_GE(index, 0);
   DCHECK_LE(index, h_char_array->GetLength());
   DCHECK_LE(end - start, h_char_array->GetLength() - index);
   string->GetChars(start, end, h_char_array, index);
diff --git a/runtime/java_vm_ext.cc b/runtime/java_vm_ext.cc
index 8e76aeb..caf705a 100644
--- a/runtime/java_vm_ext.cc
+++ b/runtime/java_vm_ext.cc
@@ -562,6 +562,9 @@
   }
   MutexLock mu(self, *Locks::jni_weak_globals_lock_);
   while (UNLIKELY(!MayAccessWeakGlobals(self))) {
+    // Check and run the empty checkpoint before blocking so the empty checkpoint will work in the
+    // presence of threads blocking for weak ref access.
+    self->CheckEmptyCheckpoint();
     weak_globals_add_condition_.WaitHoldingLocks(self);
   }
   IndirectRef ref = weak_globals_.Add(kIRTFirstSegment, obj);
@@ -648,7 +651,6 @@
 }
 
 void JavaVMExt::BroadcastForNewWeakGlobals() {
-  CHECK(kUseReadBarrier);
   Thread* self = Thread::Current();
   MutexLock mu(self, *Locks::jni_weak_globals_lock_);
   weak_globals_add_condition_.Broadcast(self);
@@ -694,6 +696,9 @@
     Locks::jni_weak_globals_lock_->AssertHeld(self);
   }
   while (UNLIKELY(!MayAccessWeakGlobals(self))) {
+    // Check and run the empty checkpoint before blocking so the empty checkpoint will work in the
+    // presence of threads blocking for weak ref access.
+    self->CheckEmptyCheckpoint();
     weak_globals_add_condition_.WaitHoldingLocks(self);
   }
   return weak_globals_.Get(ref);
@@ -716,6 +721,9 @@
   DCHECK_EQ(IndirectReferenceTable::GetIndirectRefKind(ref), kWeakGlobal);
   MutexLock mu(self, *Locks::jni_weak_globals_lock_);
   while (UNLIKELY(!MayAccessWeakGlobals(self))) {
+    // Check and run the empty checkpoint before blocking so the empty checkpoint will work in the
+    // presence of threads blocking for weak ref access.
+    self->CheckEmptyCheckpoint();
     weak_globals_add_condition_.WaitHoldingLocks(self);
   }
   // When just checking a weak ref has been cleared, avoid triggering the read barrier in decode
diff --git a/runtime/java_vm_ext.h b/runtime/java_vm_ext.h
index 9e37f11..7374920 100644
--- a/runtime/java_vm_ext.h
+++ b/runtime/java_vm_ext.h
@@ -136,7 +136,6 @@
       REQUIRES_SHARED(Locks::mutator_lock_)
       REQUIRES(!Locks::jni_weak_globals_lock_);
   void BroadcastForNewWeakGlobals()
-      REQUIRES_SHARED(Locks::mutator_lock_)
       REQUIRES(!Locks::jni_weak_globals_lock_);
 
   jobject AddGlobalRef(Thread* self, ObjPtr<mirror::Object> obj)
diff --git a/runtime/jdwp/jdwp_event.cc b/runtime/jdwp/jdwp_event.cc
index 85bfd17..5574a11 100644
--- a/runtime/jdwp/jdwp_event.cc
+++ b/runtime/jdwp/jdwp_event.cc
@@ -621,8 +621,8 @@
   Thread* const self = Thread::Current();
   self->AssertThreadSuspensionIsAllowable();
   CHECK(pReq != nullptr);
+  CHECK_EQ(threadId, Dbg::GetThreadSelfId()) << "Only the current thread can suspend itself";
   /* send request and possibly suspend ourselves */
-  JDWP::ObjectId thread_self_id = Dbg::GetThreadSelfId();
   ScopedThreadSuspension sts(self, kWaitingForDebuggerSend);
   if (suspend_policy != SP_NONE) {
     AcquireJdwpTokenForEvent(threadId);
@@ -631,7 +631,7 @@
   {
     // Before suspending, we change our state to kSuspended so the debugger sees us as RUNNING.
     ScopedThreadStateChange stsc(self, kSuspended);
-    SuspendByPolicy(suspend_policy, thread_self_id);
+    SuspendByPolicy(suspend_policy, threadId);
   }
 }
 
@@ -658,13 +658,10 @@
 }
 
 void JdwpState::AcquireJdwpTokenForEvent(ObjectId threadId) {
-  CHECK_NE(Thread::Current(), GetDebugThread()) << "Expected event thread";
-  CHECK_NE(debug_thread_id_, threadId) << "Not expected debug thread";
   SetWaitForJdwpToken(threadId);
 }
 
 void JdwpState::ReleaseJdwpTokenForEvent() {
-  CHECK_NE(Thread::Current(), GetDebugThread()) << "Expected event thread";
   ClearWaitForJdwpToken();
 }
 
@@ -685,23 +682,28 @@
   /* this is held for very brief periods; contention is unlikely */
   MutexLock mu(self, jdwp_token_lock_);
 
-  CHECK_NE(jdwp_token_owner_thread_id_, threadId) << "Thread is already holding event thread lock";
+  if (jdwp_token_owner_thread_id_ == threadId) {
+    // Only the debugger thread may already hold the event token. For instance, it may trigger
+    // a CLASS_PREPARE event while processing a command that initializes a class.
+    CHECK_EQ(threadId, debug_thread_id_) << "Non-debugger thread is already holding event token";
+  } else {
+    /*
+     * If another thread is already doing stuff, wait for it.  This can
+     * go to sleep indefinitely.
+     */
 
-  /*
-   * If another thread is already doing stuff, wait for it.  This can
-   * go to sleep indefinitely.
-   */
-  while (jdwp_token_owner_thread_id_ != 0) {
-    VLOG(jdwp) << StringPrintf("event in progress (%#" PRIx64 "), %#" PRIx64 " sleeping",
-                               jdwp_token_owner_thread_id_, threadId);
-    waited = true;
-    jdwp_token_cond_.Wait(self);
-  }
+    while (jdwp_token_owner_thread_id_ != 0) {
+      VLOG(jdwp) << StringPrintf("event in progress (%#" PRIx64 "), %#" PRIx64 " sleeping",
+                                 jdwp_token_owner_thread_id_, threadId);
+      waited = true;
+      jdwp_token_cond_.Wait(self);
+    }
 
-  if (waited || threadId != debug_thread_id_) {
-    VLOG(jdwp) << StringPrintf("event token grabbed (%#" PRIx64 ")", threadId);
+    if (waited || threadId != debug_thread_id_) {
+      VLOG(jdwp) << StringPrintf("event token grabbed (%#" PRIx64 ")", threadId);
+    }
+    jdwp_token_owner_thread_id_ = threadId;
   }
-  jdwp_token_owner_thread_id_ = threadId;
 }
 
 /*
@@ -781,7 +783,7 @@
   SendRequestAndPossiblySuspend(pReq, suspend_policy, threadId);
 }
 
-static void LogMatchingEventsAndThread(const std::vector<JdwpEvent*> match_list,
+static void LogMatchingEventsAndThread(const std::vector<JdwpEvent*>& match_list,
                                        ObjectId thread_id)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   for (size_t i = 0, e = match_list.size(); i < e; ++i) {
@@ -1224,14 +1226,15 @@
     VLOG(jdwp) << "  suspend_policy=" << suspend_policy;
   }
 
-  if (thread_id == debug_thread_id_) {
+  ObjectId reported_thread_id = thread_id;
+  if (reported_thread_id == debug_thread_id_) {
     /*
      * JDWP says that, for a class prep in the debugger thread, we
      * should set thread to null and if any threads were supposed
      * to be suspended then we suspend all other threads.
      */
     VLOG(jdwp) << "  NOTE: class prepare in debugger thread!";
-    thread_id = 0;
+    reported_thread_id = 0;
     if (suspend_policy == SP_EVENT_THREAD) {
       suspend_policy = SP_ALL;
     }
@@ -1244,7 +1247,7 @@
   for (const JdwpEvent* pEvent : match_list) {
     expandBufAdd1(pReq, pEvent->eventKind);
     expandBufAdd4BE(pReq, pEvent->requestId);
-    expandBufAddObjectId(pReq, thread_id);
+    expandBufAddObjectId(pReq, reported_thread_id);
     expandBufAdd1(pReq, tag);
     expandBufAddRefTypeId(pReq, class_id);
     expandBufAddUtf8String(pReq, signature);
diff --git a/runtime/jit/jit.cc b/runtime/jit/jit.cc
index 4c10063..dac2e60 100644
--- a/runtime/jit/jit.cc
+++ b/runtime/jit/jit.cc
@@ -114,7 +114,7 @@
   } else {
     jit_options->invoke_transition_weight_ = std::max(
         jit_options->warmup_threshold_ / Jit::kDefaultInvokeTransitionWeightRatio,
-        static_cast<size_t>(1));;
+        static_cast<size_t>(1));
   }
 
   return jit_options;
@@ -274,6 +274,15 @@
               << ArtMethod::PrettyMethod(method_to_compile)
               << " osr=" << std::boolalpha << osr;
   }
+  if (kIsDebugBuild) {
+    if (self->IsExceptionPending()) {
+      mirror::Throwable* exception = self->GetException();
+      LOG(FATAL) << "No pending exception expected after compiling "
+                 << ArtMethod::PrettyMethod(method)
+                 << ": "
+                 << exception->Dump();
+    }
+  }
   return success;
 }
 
@@ -701,5 +710,24 @@
   }
 }
 
+ScopedJitSuspend::ScopedJitSuspend() {
+  jit::Jit* jit = Runtime::Current()->GetJit();
+  was_on_ = (jit != nullptr) && (jit->GetThreadPool() != nullptr);
+  if (was_on_) {
+    Thread* self = Thread::Current();
+    jit->WaitForCompilationToFinish(self);
+    jit->GetThreadPool()->StopWorkers(self);
+    jit->WaitForCompilationToFinish(self);
+  }
+}
+
+ScopedJitSuspend::~ScopedJitSuspend() {
+  if (was_on_) {
+    DCHECK(Runtime::Current()->GetJit() != nullptr);
+    DCHECK(Runtime::Current()->GetJit()->GetThreadPool() != nullptr);
+    Runtime::Current()->GetJit()->GetThreadPool()->StartWorkers(Thread::Current());
+  }
+}
+
 }  // namespace jit
 }  // namespace art
diff --git a/runtime/jit/jit.h b/runtime/jit/jit.h
index a782437..a230c78 100644
--- a/runtime/jit/jit.h
+++ b/runtime/jit/jit.h
@@ -175,6 +175,10 @@
 
   static bool LoadCompilerLibrary(std::string* error_msg);
 
+  ThreadPool* GetThreadPool() const {
+    return thread_pool_.get();
+  }
+
  private:
   Jit();
 
@@ -278,6 +282,16 @@
   DISALLOW_COPY_AND_ASSIGN(JitOptions);
 };
 
+// Helper class to stop the JIT for a given scope. This will wait for the JIT to quiesce.
+class ScopedJitSuspend {
+ public:
+  ScopedJitSuspend();
+  ~ScopedJitSuspend();
+
+ private:
+  bool was_on_;
+};
+
 }  // namespace jit
 }  // namespace art
 
diff --git a/runtime/jit/jit_code_cache.cc b/runtime/jit/jit_code_cache.cc
index a26d850..19f3099 100644
--- a/runtime/jit/jit_code_cache.cc
+++ b/runtime/jit/jit_code_cache.cc
@@ -23,6 +23,7 @@
 #include "base/stl_util.h"
 #include "base/systrace.h"
 #include "base/time_utils.h"
+#include "cha.h"
 #include "debugger_interface.h"
 #include "entrypoints/runtime_asm_entrypoints.h"
 #include "gc/accounting/bitmap-inl.h"
@@ -80,8 +81,18 @@
 
   std::string error_str;
   // Map name specific for android_os_Debug.cpp accounting.
+  // Map in low 4gb to simplify accessing root tables for x86_64.
+  // We could do PC-relative addressing to avoid this problem, but that
+  // would require reserving code and data area before submitting, which
+  // means more windows for the code memory to be RWX.
   MemMap* data_map = MemMap::MapAnonymous(
-      "data-code-cache", nullptr, max_capacity, kProtAll, false, false, &error_str, use_ashmem);
+      "data-code-cache", nullptr,
+      max_capacity,
+      kProtAll,
+      /* low_4gb */ true,
+      /* reuse */ false,
+      &error_str,
+      use_ashmem);
   if (data_map == nullptr) {
     std::ostringstream oss;
     oss << "Failed to create read write execute cache: " << error_str << " size=" << max_capacity;
@@ -123,7 +134,7 @@
                            size_t max_capacity,
                            bool garbage_collect_code)
     : lock_("Jit code cache", kJitCodeCacheLock),
-      lock_cond_("Jit code cache variable", lock_),
+      lock_cond_("Jit code cache condition variable", lock_),
       collection_in_progress_(false),
       code_map_(code_map),
       data_map_(data_map),
@@ -142,7 +153,9 @@
       number_of_collections_(0),
       histogram_stack_map_memory_use_("Memory used for stack maps", 16),
       histogram_code_memory_use_("Memory used for compiled code", 16),
-      histogram_profiling_info_memory_use_("Memory used for profiling info", 16) {
+      histogram_profiling_info_memory_use_("Memory used for profiling info", 16),
+      is_weak_access_enabled_(true),
+      inline_cache_cond_("Jit inline cache condition variable", lock_) {
 
   DCHECK_GE(max_capacity, initial_code_capacity + initial_data_capacity);
   code_mspace_ = create_mspace_with_base(code_map_->Begin(), code_end_, false /*locked*/);
@@ -197,34 +210,46 @@
 
 uint8_t* JitCodeCache::CommitCode(Thread* self,
                                   ArtMethod* method,
-                                  const uint8_t* vmap_table,
+                                  uint8_t* stack_map,
+                                  uint8_t* roots_data,
                                   size_t frame_size_in_bytes,
                                   size_t core_spill_mask,
                                   size_t fp_spill_mask,
                                   const uint8_t* code,
                                   size_t code_size,
-                                  bool osr) {
+                                  bool osr,
+                                  Handle<mirror::ObjectArray<mirror::Object>> roots,
+                                  bool has_should_deoptimize_flag,
+                                  const ArenaSet<ArtMethod*>& cha_single_implementation_list) {
   uint8_t* result = CommitCodeInternal(self,
                                        method,
-                                       vmap_table,
+                                       stack_map,
+                                       roots_data,
                                        frame_size_in_bytes,
                                        core_spill_mask,
                                        fp_spill_mask,
                                        code,
                                        code_size,
-                                       osr);
+                                       osr,
+                                       roots,
+                                       has_should_deoptimize_flag,
+                                       cha_single_implementation_list);
   if (result == nullptr) {
     // Retry.
     GarbageCollectCache(self);
     result = CommitCodeInternal(self,
                                 method,
-                                vmap_table,
+                                stack_map,
+                                roots_data,
                                 frame_size_in_bytes,
                                 core_spill_mask,
                                 fp_spill_mask,
                                 code,
                                 code_size,
-                                osr);
+                                osr,
+                                roots,
+                                has_should_deoptimize_flag,
+                                cha_single_implementation_list);
   }
   return result;
 }
@@ -243,82 +268,247 @@
   return reinterpret_cast<uintptr_t>(code) - RoundUp(sizeof(OatQuickMethodHeader), alignment);
 }
 
-void JitCodeCache::FreeCode(const void* code_ptr, ArtMethod* method ATTRIBUTE_UNUSED) {
+static uint32_t ComputeRootTableSize(uint32_t number_of_roots) {
+  return sizeof(uint32_t) + number_of_roots * sizeof(GcRoot<mirror::Object>);
+}
+
+static uint32_t GetNumberOfRoots(const uint8_t* stack_map) {
+  // The length of the table is stored just before the stack map (and therefore at the end of
+  // the table itself), in order to be able to fetch it from a `stack_map` pointer.
+  return reinterpret_cast<const uint32_t*>(stack_map)[-1];
+}
+
+static void FillRootTableLength(uint8_t* roots_data, uint32_t length) {
+  // Store the length of the table at the end. This will allow fetching it from a `stack_map`
+  // pointer.
+  reinterpret_cast<uint32_t*>(roots_data)[length] = length;
+}
+
+static void FillRootTable(uint8_t* roots_data, Handle<mirror::ObjectArray<mirror::Object>> roots)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  GcRoot<mirror::Object>* gc_roots = reinterpret_cast<GcRoot<mirror::Object>*>(roots_data);
+  const uint32_t length = roots->GetLength();
+  // Put all roots in `roots_data`.
+  for (uint32_t i = 0; i < length; ++i) {
+    ObjPtr<mirror::Object> object = roots->Get(i);
+    if (kIsDebugBuild) {
+      // Ensure the string is strongly interned. b/32995596
+      CHECK(object->IsString());
+      ObjPtr<mirror::String> str = reinterpret_cast<mirror::String*>(object.Ptr());
+      ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+      CHECK(class_linker->GetInternTable()->LookupStrong(Thread::Current(), str) != nullptr);
+    }
+    gc_roots[i] = GcRoot<mirror::Object>(object);
+  }
+  FillRootTableLength(roots_data, length);
+}
+
+static uint8_t* GetRootTable(const void* code_ptr, uint32_t* number_of_roots = nullptr) {
+  OatQuickMethodHeader* method_header = OatQuickMethodHeader::FromCodePointer(code_ptr);
+  uint8_t* data = method_header->GetOptimizedCodeInfoPtr();
+  uint32_t roots = GetNumberOfRoots(data);
+  if (number_of_roots != nullptr) {
+    *number_of_roots = roots;
+  }
+  return data - ComputeRootTableSize(roots);
+}
+
+void JitCodeCache::SweepRootTables(IsMarkedVisitor* visitor) {
+  MutexLock mu(Thread::Current(), lock_);
+  for (const auto& entry : method_code_map_) {
+    uint32_t number_of_roots = 0;
+    uint8_t* roots_data = GetRootTable(entry.first, &number_of_roots);
+    GcRoot<mirror::Object>* roots = reinterpret_cast<GcRoot<mirror::Object>*>(roots_data);
+    for (uint32_t i = 0; i < number_of_roots; ++i) {
+      // This does not need a read barrier because this is called by GC.
+      mirror::Object* object = roots[i].Read<kWithoutReadBarrier>();
+      DCHECK(object != nullptr);
+      mirror::Object* new_object = visitor->IsMarked(object);
+      // We know the string is marked because it's a strongly-interned string that
+      // is always alive. The IsMarked implementation of the CMS collector returns
+      // null for newly allocated objects, but we know those haven't moved. Therefore,
+      // only update the entry if we get a different non-null string.
+      // TODO: Do not use IsMarked for j.l.Class, and adjust once we move this method
+      // out of the weak access/creation pause. b/32167580
+      if (new_object != nullptr && new_object != object) {
+        DCHECK(new_object->IsString());
+        roots[i] = GcRoot<mirror::Object>(new_object);
+      }
+    }
+  }
+  // Walk over inline caches to clear entries containing unloaded classes.
+  for (ProfilingInfo* info : profiling_infos_) {
+    for (size_t i = 0; i < info->number_of_inline_caches_; ++i) {
+      InlineCache* cache = &info->cache_[i];
+      for (size_t j = 0; j < InlineCache::kIndividualCacheSize; ++j) {
+        // This does not need a read barrier because this is called by GC.
+        mirror::Class* cls = cache->classes_[j].Read<kWithoutReadBarrier>();
+        if (cls != nullptr) {
+          // Look at the classloader of the class to know if it has been
+          // unloaded.
+          // This does not need a read barrier because this is called by GC.
+          mirror::Object* class_loader =
+              cls->GetClassLoader<kDefaultVerifyFlags, kWithoutReadBarrier>();
+          if (class_loader == nullptr || visitor->IsMarked(class_loader) != nullptr) {
+            // The class loader is live, update the entry if the class has moved.
+            mirror::Class* new_cls = down_cast<mirror::Class*>(visitor->IsMarked(cls));
+            // Note that new_object can be null for CMS and newly allocated objects.
+            if (new_cls != nullptr && new_cls != cls) {
+              cache->classes_[j] = GcRoot<mirror::Class>(new_cls);
+            }
+          } else {
+            // The class loader is not live, clear the entry.
+            cache->classes_[j] = GcRoot<mirror::Class>(nullptr);
+          }
+        }
+      }
+    }
+  }
+}
+
+void JitCodeCache::FreeCode(const void* code_ptr) {
   uintptr_t allocation = FromCodeToAllocation(code_ptr);
-  const OatQuickMethodHeader* method_header = OatQuickMethodHeader::FromCodePointer(code_ptr);
   // Notify native debugger that we are about to remove the code.
   // It does nothing if we are not using native debugger.
   DeleteJITCodeEntryForAddress(reinterpret_cast<uintptr_t>(code_ptr));
-
-  // Use the offset directly to prevent sanity check that the method is
-  // compiled with optimizing.
-  // TODO(ngeoffray): Clean up.
-  if (method_header->vmap_table_offset_ != 0) {
-    const uint8_t* data = method_header->code_ - method_header->vmap_table_offset_;
-    FreeData(const_cast<uint8_t*>(data));
-  }
+  FreeData(GetRootTable(code_ptr));
   FreeCode(reinterpret_cast<uint8_t*>(allocation));
 }
 
+void JitCodeCache::FreeAllMethodHeaders(
+    const std::unordered_set<OatQuickMethodHeader*>& method_headers) {
+  {
+    MutexLock mu(Thread::Current(), *Locks::cha_lock_);
+    Runtime::Current()->GetClassHierarchyAnalysis()
+        ->RemoveDependentsWithMethodHeaders(method_headers);
+  }
+
+  // We need to remove entries in method_headers from CHA dependencies
+  // first since once we do FreeCode() below, the memory can be reused
+  // so it's possible for the same method_header to start representing
+  // different compile code.
+  MutexLock mu(Thread::Current(), lock_);
+  ScopedCodeCacheWrite scc(code_map_.get());
+  for (const OatQuickMethodHeader* method_header : method_headers) {
+    FreeCode(method_header->GetCode());
+  }
+}
+
 void JitCodeCache::RemoveMethodsIn(Thread* self, const LinearAlloc& alloc) {
   ScopedTrace trace(__PRETTY_FUNCTION__);
-  MutexLock mu(self, lock_);
-  // We do not check if a code cache GC is in progress, as this method comes
-  // with the classlinker_classes_lock_ held, and suspending ourselves could
-  // lead to a deadlock.
+  // We use a set to first collect all method_headers whose code need to be
+  // removed. We need to free the underlying code after we remove CHA dependencies
+  // for entries in this set. And it's more efficient to iterate through
+  // the CHA dependency map just once with an unordered_set.
+  std::unordered_set<OatQuickMethodHeader*> method_headers;
   {
-    ScopedCodeCacheWrite scc(code_map_.get());
-    for (auto it = method_code_map_.begin(); it != method_code_map_.end();) {
-      if (alloc.ContainsUnsafe(it->second)) {
-        FreeCode(it->first, it->second);
-        it = method_code_map_.erase(it);
+    MutexLock mu(self, lock_);
+    // We do not check if a code cache GC is in progress, as this method comes
+    // with the classlinker_classes_lock_ held, and suspending ourselves could
+    // lead to a deadlock.
+    {
+      ScopedCodeCacheWrite scc(code_map_.get());
+      for (auto it = method_code_map_.begin(); it != method_code_map_.end();) {
+        if (alloc.ContainsUnsafe(it->second)) {
+          method_headers.insert(OatQuickMethodHeader::FromCodePointer(it->first));
+          it = method_code_map_.erase(it);
+        } else {
+          ++it;
+        }
+      }
+    }
+    for (auto it = osr_code_map_.begin(); it != osr_code_map_.end();) {
+      if (alloc.ContainsUnsafe(it->first)) {
+        // Note that the code has already been pushed to method_headers in the loop
+        // above and is going to be removed in FreeCode() below.
+        it = osr_code_map_.erase(it);
+      } else {
+        ++it;
+      }
+    }
+    for (auto it = profiling_infos_.begin(); it != profiling_infos_.end();) {
+      ProfilingInfo* info = *it;
+      if (alloc.ContainsUnsafe(info->GetMethod())) {
+        info->GetMethod()->SetProfilingInfo(nullptr);
+        FreeData(reinterpret_cast<uint8_t*>(info));
+        it = profiling_infos_.erase(it);
       } else {
         ++it;
       }
     }
   }
-  for (auto it = osr_code_map_.begin(); it != osr_code_map_.end();) {
-    if (alloc.ContainsUnsafe(it->first)) {
-      // Note that the code has already been removed in the loop above.
-      it = osr_code_map_.erase(it);
-    } else {
-      ++it;
-    }
+  FreeAllMethodHeaders(method_headers);
+}
+
+bool JitCodeCache::IsWeakAccessEnabled(Thread* self) const {
+  return kUseReadBarrier
+      ? self->GetWeakRefAccessEnabled()
+      : is_weak_access_enabled_.LoadSequentiallyConsistent();
+}
+
+void JitCodeCache::WaitUntilInlineCacheAccessible(Thread* self) {
+  if (IsWeakAccessEnabled(self)) {
+    return;
   }
-  for (auto it = profiling_infos_.begin(); it != profiling_infos_.end();) {
-    ProfilingInfo* info = *it;
-    if (alloc.ContainsUnsafe(info->GetMethod())) {
-      info->GetMethod()->SetProfilingInfo(nullptr);
-      FreeData(reinterpret_cast<uint8_t*>(info));
-      it = profiling_infos_.erase(it);
-    } else {
-      ++it;
-    }
+  ScopedThreadSuspension sts(self, kWaitingWeakGcRootRead);
+  MutexLock mu(self, lock_);
+  while (!IsWeakAccessEnabled(self)) {
+    inline_cache_cond_.Wait(self);
   }
 }
 
-void JitCodeCache::ClearGcRootsInInlineCaches(Thread* self) {
+void JitCodeCache::BroadcastForInlineCacheAccess() {
+  Thread* self = Thread::Current();
   MutexLock mu(self, lock_);
-  for (ProfilingInfo* info : profiling_infos_) {
-    if (!info->IsInUseByCompiler()) {
-      info->ClearGcRootsInInlineCaches();
+  inline_cache_cond_.Broadcast(self);
+}
+
+void JitCodeCache::AllowInlineCacheAccess() {
+  DCHECK(!kUseReadBarrier);
+  is_weak_access_enabled_.StoreSequentiallyConsistent(true);
+  BroadcastForInlineCacheAccess();
+}
+
+void JitCodeCache::DisallowInlineCacheAccess() {
+  DCHECK(!kUseReadBarrier);
+  is_weak_access_enabled_.StoreSequentiallyConsistent(false);
+}
+
+void JitCodeCache::CopyInlineCacheInto(const InlineCache& ic,
+                                       Handle<mirror::ObjectArray<mirror::Class>> array) {
+  WaitUntilInlineCacheAccessible(Thread::Current());
+  // Note that we don't need to lock `lock_` here, the compiler calling
+  // this method has already ensured the inline cache will not be deleted.
+  for (size_t in_cache = 0, in_array = 0;
+       in_cache < InlineCache::kIndividualCacheSize;
+       ++in_cache) {
+    mirror::Class* object = ic.classes_[in_cache].Read();
+    if (object != nullptr) {
+      array->Set(in_array++, object);
     }
   }
 }
 
 uint8_t* JitCodeCache::CommitCodeInternal(Thread* self,
                                           ArtMethod* method,
-                                          const uint8_t* vmap_table,
+                                          uint8_t* stack_map,
+                                          uint8_t* roots_data,
                                           size_t frame_size_in_bytes,
                                           size_t core_spill_mask,
                                           size_t fp_spill_mask,
                                           const uint8_t* code,
                                           size_t code_size,
-                                          bool osr) {
+                                          bool osr,
+                                          Handle<mirror::ObjectArray<mirror::Object>> roots,
+                                          bool has_should_deoptimize_flag,
+                                          const ArenaSet<ArtMethod*>&
+                                              cha_single_implementation_list) {
+  DCHECK(stack_map != nullptr);
   size_t alignment = GetInstructionSetAlignment(kRuntimeISA);
   // Ensure the header ends up at expected instruction alignment.
   size_t header_size = RoundUp(sizeof(OatQuickMethodHeader), alignment);
   size_t total_size = header_size + code_size;
+  const uint32_t num_roots = roots->GetLength();
 
   OatQuickMethodHeader* method_header = nullptr;
   uint8_t* code_ptr = nullptr;
@@ -331,6 +521,9 @@
       ScopedCodeCacheWrite scc(code_map_.get());
       memory = AllocateCode(total_size);
       if (memory == nullptr) {
+        // Fill root table length so that ClearData works correctly in case of failure. Otherwise
+        // the length will be 0 and cause incorrect DCHECK failure.
+        FillRootTableLength(roots_data, num_roots);
         return nullptr;
       }
       code_ptr = memory + header_size;
@@ -338,21 +531,61 @@
       std::copy(code, code + code_size, code_ptr);
       method_header = OatQuickMethodHeader::FromCodePointer(code_ptr);
       new (method_header) OatQuickMethodHeader(
-          (vmap_table == nullptr) ? 0 : code_ptr - vmap_table,
+          code_ptr - stack_map,
           frame_size_in_bytes,
           core_spill_mask,
           fp_spill_mask,
           code_size);
+      // Flush caches before we remove write permission because on some ARMv8 hardware,
+      // flushing caches require write permissions.
+      //
+      // For reference, here are kernel patches discussing about this issue:
+      // https://android.googlesource.com/kernel/msm/%2B/0e7f7bcc3fc87489cda5aa6aff8ce40eed912279
+      // https://patchwork.kernel.org/patch/9047921/
+      FlushInstructionCache(reinterpret_cast<char*>(code_ptr),
+                            reinterpret_cast<char*>(code_ptr + code_size));
+      DCHECK(!Runtime::Current()->IsAotCompiler());
+      if (has_should_deoptimize_flag) {
+        method_header->SetHasShouldDeoptimizeFlag();
+      }
     }
 
-    FlushInstructionCache(reinterpret_cast<char*>(code_ptr),
-                          reinterpret_cast<char*>(code_ptr + code_size));
     number_of_compilations_++;
   }
   // We need to update the entry point in the runnable state for the instrumentation.
   {
+    // Need cha_lock_ for checking all single-implementation flags and register
+    // dependencies.
+    MutexLock cha_mu(self, *Locks::cha_lock_);
+    bool single_impl_still_valid = true;
+    for (ArtMethod* single_impl : cha_single_implementation_list) {
+      if (!single_impl->HasSingleImplementation()) {
+        // We simply discard the compiled code. Clear the
+        // counter so that it may be recompiled later. Hopefully the
+        // class hierarchy will be more stable when compilation is retried.
+        single_impl_still_valid = false;
+        method->ClearCounter();
+        break;
+      }
+    }
+
+    // Discard the code if any single-implementation assumptions are now invalid.
+    if (!single_impl_still_valid) {
+      VLOG(jit) << "JIT discarded jitted code due to invalid single-implementation assumptions.";
+      return nullptr;
+    }
+    for (ArtMethod* single_impl : cha_single_implementation_list) {
+      Runtime::Current()->GetClassHierarchyAnalysis()->AddDependency(
+          single_impl, method, method_header);
+    }
+
+    // The following needs to be guarded by cha_lock_ also. Otherwise it's
+    // possible that the compiled code is considered invalidated by some class linking,
+    // but below we still make the compiled code valid for the method.
     MutexLock mu(self, lock_);
     method_code_map_.Put(code_ptr, method);
+    // Fill the root table before updating the entry point.
+    FillRootTable(roots_data, roots);
     if (osr) {
       number_of_osr_compilations_++;
       osr_code_map_.Put(method, code_ptr);
@@ -372,7 +605,8 @@
         << " ccache_size=" << PrettySize(CodeCacheSizeLocked()) << ": "
         << " dcache_size=" << PrettySize(DataCacheSizeLocked()) << ": "
         << reinterpret_cast<const void*>(method_header->GetEntryPoint()) << ","
-        << reinterpret_cast<const void*>(method_header->GetEntryPoint() + method_header->code_size_);
+        << reinterpret_cast<const void*>(method_header->GetEntryPoint() +
+                                         method_header->GetCodeSize());
     histogram_code_memory_use_.AddValue(code_size);
     if (code_size > kCodeSizeLogThreshold) {
       LOG(INFO) << "JIT allocated "
@@ -403,13 +637,26 @@
   return used_memory_for_data_;
 }
 
-void JitCodeCache::ClearData(Thread* self, void* data) {
-  MutexLock mu(self, lock_);
-  FreeData(reinterpret_cast<uint8_t*>(data));
+static const uint8_t* FromStackMapToRoots(const uint8_t* stack_map_data) {
+  return stack_map_data - ComputeRootTableSize(GetNumberOfRoots(stack_map_data));
 }
 
-uint8_t* JitCodeCache::ReserveData(Thread* self, size_t size, ArtMethod* method) {
-  size = RoundUp(size, sizeof(void*));
+void JitCodeCache::ClearData(Thread* self,
+                             uint8_t* stack_map_data,
+                             uint8_t* roots_data) {
+  DCHECK_EQ(FromStackMapToRoots(stack_map_data), roots_data);
+  MutexLock mu(self, lock_);
+  FreeData(reinterpret_cast<uint8_t*>(roots_data));
+}
+
+void JitCodeCache::ReserveData(Thread* self,
+                               size_t stack_map_size,
+                               size_t number_of_roots,
+                               ArtMethod* method,
+                               uint8_t** stack_map_data,
+                               uint8_t** roots_data) {
+  size_t table_size = ComputeRootTableSize(number_of_roots);
+  size_t size = RoundUp(stack_map_size + table_size, sizeof(void*));
   uint8_t* result = nullptr;
 
   {
@@ -436,7 +683,8 @@
               << " for stack maps of "
               << ArtMethod::PrettyMethod(method);
   }
-  return result;
+  *roots_data = result;
+  *stack_map_data = result + table_size;
 }
 
 class MarkCodeVisitor FINAL : public StackVisitor {
@@ -659,20 +907,23 @@
 
 void JitCodeCache::RemoveUnmarkedCode(Thread* self) {
   ScopedTrace trace(__FUNCTION__);
-  MutexLock mu(self, lock_);
-  ScopedCodeCacheWrite scc(code_map_.get());
-  // Iterate over all compiled code and remove entries that are not marked.
-  for (auto it = method_code_map_.begin(); it != method_code_map_.end();) {
-    const void* code_ptr = it->first;
-    ArtMethod* method = it->second;
-    uintptr_t allocation = FromCodeToAllocation(code_ptr);
-    if (GetLiveBitmap()->Test(allocation)) {
-      ++it;
-    } else {
-      FreeCode(code_ptr, method);
-      it = method_code_map_.erase(it);
+  std::unordered_set<OatQuickMethodHeader*> method_headers;
+  {
+    MutexLock mu(self, lock_);
+    ScopedCodeCacheWrite scc(code_map_.get());
+    // Iterate over all compiled code and remove entries that are not marked.
+    for (auto it = method_code_map_.begin(); it != method_code_map_.end();) {
+      const void* code_ptr = it->first;
+      uintptr_t allocation = FromCodeToAllocation(code_ptr);
+      if (GetLiveBitmap()->Test(allocation)) {
+        ++it;
+      } else {
+        method_headers.insert(OatQuickMethodHeader::FromCodePointer(it->first));
+        it = method_code_map_.erase(it);
+      }
     }
   }
+  FreeAllMethodHeaders(method_headers);
 }
 
 void JitCodeCache::DoCollection(Thread* self, bool collect_profiling_info) {
@@ -730,8 +981,6 @@
 
   if (collect_profiling_info) {
     ScopedThreadSuspension sts(self, kSuspended);
-    gc::ScopedGCCriticalSection gcs(
-        self, gc::kGcCauseJitCodeCache, gc::kCollectorTypeJitCodeCache);
     MutexLock mu(self, lock_);
     // Free all profiling infos of methods not compiled nor being compiled.
     auto profiling_kept_end = std::remove_if(profiling_infos_.begin(), profiling_infos_.end(),
@@ -745,10 +994,6 @@
         // code cache collection.
         if (ContainsPc(ptr) &&
             info->GetMethod()->GetProfilingInfo(kRuntimePointerSize) == nullptr) {
-          // We clear the inline caches as classes in it might be stalled.
-          info->ClearGcRootsInInlineCaches();
-          // Do a fence to make sure the clearing is seen before attaching to the method.
-          QuasiAtomic::ThreadFenceRelease();
           info->GetMethod()->SetProfilingInfo(info);
         } else if (info->GetMethod()->GetProfilingInfo(kRuntimePointerSize) != info) {
           // No need for this ProfilingInfo object anymore.
diff --git a/runtime/jit/jit_code_cache.h b/runtime/jit/jit_code_cache.h
index e15c93a..30e2efb 100644
--- a/runtime/jit/jit_code_cache.h
+++ b/runtime/jit/jit_code_cache.h
@@ -20,6 +20,7 @@
 #include "instrumentation.h"
 
 #include "atomic.h"
+#include "base/arena_containers.h"
 #include "base/histogram-inl.h"
 #include "base/macros.h"
 #include "base/mutex.h"
@@ -36,6 +37,7 @@
 
 class ArtMethod;
 class LinearAlloc;
+class InlineCache;
 class ProfilingInfo;
 
 namespace jit {
@@ -90,15 +92,24 @@
       REQUIRES(!lock_);
 
   // Allocate and write code and its metadata to the code cache.
+  // `cha_single_implementation_list` needs to be registered via CHA (if it's
+  // still valid), since the compiled code still needs to be invalidated if the
+  // single-implementation assumptions are violated later. This needs to be done
+  // even if `has_should_deoptimize_flag` is false, which can happen due to CHA
+  // guard elimination.
   uint8_t* CommitCode(Thread* self,
                       ArtMethod* method,
-                      const uint8_t* vmap_table,
+                      uint8_t* stack_map,
+                      uint8_t* roots_data,
                       size_t frame_size_in_bytes,
                       size_t core_spill_mask,
                       size_t fp_spill_mask,
                       const uint8_t* code,
                       size_t code_size,
-                      bool osr)
+                      bool osr,
+                      Handle<mirror::ObjectArray<mirror::Object>> roots,
+                      bool has_should_deoptimize_flag,
+                      const ArenaSet<ArtMethod*>& cha_single_implementation_list)
       REQUIRES_SHARED(Locks::mutator_lock_)
       REQUIRES(!lock_);
 
@@ -108,13 +119,19 @@
   // Return true if the code cache contains this method.
   bool ContainsMethod(ArtMethod* method) REQUIRES(!lock_);
 
-  // Reserve a region of data of size at least "size". Returns null if there is no more room.
-  uint8_t* ReserveData(Thread* self, size_t size, ArtMethod* method)
+  // Allocate a region of data that contain `size` bytes, and potentially space
+  // for storing `number_of_roots` roots. Returns null if there is no more room.
+  void ReserveData(Thread* self,
+                   size_t size,
+                   size_t number_of_roots,
+                   ArtMethod* method,
+                   uint8_t** stack_map_data,
+                   uint8_t** roots_data)
       REQUIRES_SHARED(Locks::mutator_lock_)
       REQUIRES(!lock_);
 
   // Clear data from the data portion of the code cache.
-  void ClearData(Thread* self, void* data)
+  void ClearData(Thread* self, uint8_t* stack_map_data, uint8_t* roots_data)
       REQUIRES_SHARED(Locks::mutator_lock_)
       REQUIRES(!lock_);
 
@@ -148,7 +165,9 @@
       REQUIRES(!lock_)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  void ClearGcRootsInInlineCaches(Thread* self) REQUIRES(!lock_);
+  void CopyInlineCacheInto(const InlineCache& ic, Handle<mirror::ObjectArray<mirror::Class>> array)
+      REQUIRES(!lock_)
+      REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Create a 'ProfileInfo' for 'method'. If 'retry_allocation' is true,
   // will collect and retry if the first allocation is unsuccessful.
@@ -188,6 +207,16 @@
 
   bool IsOsrCompiled(ArtMethod* method) REQUIRES(!lock_);
 
+  void SweepRootTables(IsMarkedVisitor* visitor)
+      REQUIRES(!lock_)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
+  // The GC needs to disallow the reading of inline caches when it processes them,
+  // to avoid having a class being used while it is being deleted.
+  void AllowInlineCacheAccess() REQUIRES(!lock_);
+  void DisallowInlineCacheAccess() REQUIRES(!lock_);
+  void BroadcastForInlineCacheAccess() REQUIRES(!lock_);
+
  private:
   // Take ownership of maps.
   JitCodeCache(MemMap* code_map,
@@ -201,13 +230,17 @@
   // allocation fails. Return null if the allocation fails.
   uint8_t* CommitCodeInternal(Thread* self,
                               ArtMethod* method,
-                              const uint8_t* vmap_table,
+                              uint8_t* stack_map,
+                              uint8_t* roots_data,
                               size_t frame_size_in_bytes,
                               size_t core_spill_mask,
                               size_t fp_spill_mask,
                               const uint8_t* code,
                               size_t code_size,
-                              bool osr)
+                              bool osr,
+                              Handle<mirror::ObjectArray<mirror::Object>> roots,
+                              bool has_should_deoptimize_flag,
+                              const ArenaSet<ArtMethod*>& cha_single_implementation_list)
       REQUIRES(!lock_)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
@@ -222,8 +255,13 @@
   bool WaitForPotentialCollectionToComplete(Thread* self)
       REQUIRES(lock_) REQUIRES(!Locks::mutator_lock_);
 
-  // Free in the mspace allocations taken by 'method'.
-  void FreeCode(const void* code_ptr, ArtMethod* method) REQUIRES(lock_);
+  // Remove CHA dependents and underlying allocations for entries in `method_headers`.
+  void FreeAllMethodHeaders(const std::unordered_set<OatQuickMethodHeader*>& method_headers)
+      REQUIRES(!lock_)
+      REQUIRES(!Locks::cha_lock_);
+
+  // Free in the mspace allocations for `code_ptr`.
+  void FreeCode(const void* code_ptr) REQUIRES(lock_);
 
   // Number of bytes allocated in the code cache.
   size_t CodeCacheSizeLocked() REQUIRES(lock_);
@@ -261,6 +299,11 @@
   void FreeData(uint8_t* data) REQUIRES(lock_);
   uint8_t* AllocateData(size_t data_size) REQUIRES(lock_);
 
+  bool IsWeakAccessEnabled(Thread* self) const;
+  void WaitUntilInlineCacheAccessible(Thread* self)
+      REQUIRES(!lock_)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
   // Lock for guarding allocations, collections, and the method_code_map_.
   Mutex lock_;
   // Condition to wait on during collection.
@@ -333,6 +376,14 @@
   // Histograms for keeping track of profiling info statistics.
   Histogram<uint64_t> histogram_profiling_info_memory_use_ GUARDED_BY(lock_);
 
+  // Whether the GC allows accessing weaks in inline caches. Note that this
+  // is not used by the concurrent collector, which uses
+  // Thread::SetWeakRefAccessEnabled instead.
+  Atomic<bool> is_weak_access_enabled_;
+
+  // Condition to wait on for accessing inline caches.
+  ConditionVariable inline_cache_cond_ GUARDED_BY(lock_);
+
   DISALLOW_IMPLICIT_CONSTRUCTORS(JitCodeCache);
 };
 
diff --git a/runtime/jit/offline_profiling_info.cc b/runtime/jit/offline_profiling_info.cc
index f535151..6f2a8c6 100644
--- a/runtime/jit/offline_profiling_info.cc
+++ b/runtime/jit/offline_profiling_info.cc
@@ -37,7 +37,7 @@
 namespace art {
 
 const uint8_t ProfileCompilationInfo::kProfileMagic[] = { 'p', 'r', 'o', '\0' };
-const uint8_t ProfileCompilationInfo::kProfileVersion[] = { '0', '0', '1', '\0' };
+const uint8_t ProfileCompilationInfo::kProfileVersion[] = { '0', '0', '2', '\0' };
 
 static constexpr uint16_t kMaxDexFileKeyLength = PATH_MAX;
 
@@ -235,7 +235,7 @@
       AddUintToBuffer(&buffer, method_it);
     }
     for (auto class_id : dex_data.class_set) {
-      AddUintToBuffer(&buffer, class_id);
+      AddUintToBuffer(&buffer, class_id.index_);
     }
     DCHECK_EQ(required_capacity, buffer.size())
         << "Failed to add the expected number of bytes in the buffer";
@@ -282,12 +282,12 @@
 
 bool ProfileCompilationInfo::AddClassIndex(const std::string& dex_location,
                                            uint32_t checksum,
-                                           uint16_t class_idx) {
+                                           dex::TypeIndex type_idx) {
   DexFileData* const data = GetOrAddDexFileData(dex_location, checksum);
   if (data == nullptr) {
     return false;
   }
-  data->class_set.insert(class_idx);
+  data->class_set.insert(type_idx);
   return true;
 }
 
@@ -304,8 +304,8 @@
   }
 
   for (uint16_t i = 0; i < class_set_size; i++) {
-    uint16_t class_def_idx = line_buffer.ReadUintAndAdvance<uint16_t>();
-    if (!AddClassIndex(dex_location, checksum, class_def_idx)) {
+    uint16_t type_idx = line_buffer.ReadUintAndAdvance<uint16_t>();
+    if (!AddClassIndex(dex_location, checksum, dex::TypeIndex(type_idx))) {
       return false;
     }
   }
@@ -569,14 +569,14 @@
   return false;
 }
 
-bool ProfileCompilationInfo::ContainsClass(const DexFile& dex_file, uint16_t class_def_idx) const {
+bool ProfileCompilationInfo::ContainsClass(const DexFile& dex_file, dex::TypeIndex type_idx) const {
   auto info_it = info_.find(GetProfileDexFileKey(dex_file.GetLocation()));
   if (info_it != info_.end()) {
     if (!ChecksumMatch(dex_file, info_it->second.checksum)) {
       return false;
     }
-    const std::set<uint16_t>& classes = info_it->second.class_set;
-    return classes.find(class_def_idx) != classes.end();
+    const std::set<dex::TypeIndex>& classes = info_it->second.class_set;
+    return classes.find(type_idx) != classes.end();
   }
   return false;
 }
@@ -637,7 +637,7 @@
     os << "\n\tclasses: ";
     for (const auto class_it : dex_data.class_set) {
       if (dex_file != nullptr) {
-        os << "\n\t\t" << dex_file->GetClassDescriptor(dex_file->GetClassDef(class_it));
+        os << "\n\t\t" << dex_file->PrettyType(class_it);
       } else {
         os << class_it << ",";
       }
@@ -702,11 +702,11 @@
     }
 
     for (uint16_t c = 0; c < number_of_classes; c++) {
-      uint16_t class_idx = rand() % max_classes;
+      uint16_t type_idx = rand() % max_classes;
       if (c < (number_of_classes / kFavorSplit)) {
-        class_idx %= kFavorFirstN;
+        type_idx %= kFavorFirstN;
       }
-      info.AddClassIndex(profile_key, 0, class_idx);
+      info.AddClassIndex(profile_key, 0, dex::TypeIndex(type_idx));
     }
   }
   return info.Save(fd);
diff --git a/runtime/jit/offline_profiling_info.h b/runtime/jit/offline_profiling_info.h
index 0b26f9b..53d0eea 100644
--- a/runtime/jit/offline_profiling_info.h
+++ b/runtime/jit/offline_profiling_info.h
@@ -23,6 +23,7 @@
 #include "atomic.h"
 #include "dex_cache_resolved_classes.h"
 #include "dex_file.h"
+#include "dex_file_types.h"
 #include "method_reference.h"
 #include "safe_map.h"
 
@@ -65,8 +66,8 @@
   // Returns true if the method reference is present in the profiling info.
   bool ContainsMethod(const MethodReference& method_ref) const;
 
-  // Returns true if the class is present in the profiling info.
-  bool ContainsClass(const DexFile& dex_file, uint16_t class_def_idx) const;
+  // Returns true if the class's type is present in the profiling info.
+  bool ContainsClass(const DexFile& dex_file, dex::TypeIndex type_idx) const;
 
   // Dumps all the loaded profile info into a string and returns it.
   // If dex_files is not null then the method indices will be resolved to their
@@ -104,7 +105,7 @@
     explicit DexFileData(uint32_t location_checksum) : checksum(location_checksum) {}
     uint32_t checksum;
     std::set<uint16_t> method_set;
-    std::set<uint16_t> class_set;
+    std::set<dex::TypeIndex> class_set;
 
     bool operator==(const DexFileData& other) const {
       return checksum == other.checksum && method_set == other.method_set;
@@ -115,7 +116,7 @@
 
   DexFileData* GetOrAddDexFileData(const std::string& dex_location, uint32_t checksum);
   bool AddMethodIndex(const std::string& dex_location, uint32_t checksum, uint16_t method_idx);
-  bool AddClassIndex(const std::string& dex_location, uint32_t checksum, uint16_t class_idx);
+  bool AddClassIndex(const std::string& dex_location, uint32_t checksum, dex::TypeIndex type_idx);
   bool AddResolvedClasses(const DexCacheResolvedClasses& classes);
 
   // Parsing functionality.
@@ -152,7 +153,7 @@
     uint8_t* Get() { return storage_.get(); }
 
    private:
-    std::unique_ptr<uint8_t> storage_;
+    std::unique_ptr<uint8_t[]> storage_;
     uint8_t* ptr_current_;
     uint8_t* ptr_end_;
   };
@@ -179,6 +180,7 @@
   friend class ProfileCompilationInfoTest;
   friend class CompilerDriverProfileTest;
   friend class ProfileAssistantTest;
+  friend class Dex2oatLayoutTest;
 
   DexFileToProfileInfoMap info_;
 };
diff --git a/runtime/jit/profiling_info.cc b/runtime/jit/profiling_info.cc
index 9ec46f0..405280d 100644
--- a/runtime/jit/profiling_info.cc
+++ b/runtime/jit/profiling_info.cc
@@ -36,15 +36,6 @@
   for (size_t i = 0; i < number_of_inline_caches_; ++i) {
     cache_[i].dex_pc_ = entries[i];
   }
-  if (method->IsCopied()) {
-    // GetHoldingClassOfCopiedMethod is expensive, but creating a profiling info for a copied method
-    // appears to happen very rarely in practice.
-    holding_class_ = GcRoot<mirror::Class>(
-        Runtime::Current()->GetClassLinker()->GetHoldingClassOfCopiedMethod(method));
-  } else {
-    holding_class_ = GcRoot<mirror::Class>(method->GetDeclaringClass());
-  }
-  DCHECK(!holding_class_.IsNull());
 }
 
 bool ProfilingInfo::Create(Thread* self, ArtMethod* method, bool retry_allocation) {
@@ -116,14 +107,6 @@
         --i;
       } else {
         // We successfully set `cls`, just return.
-        // Since the instrumentation is marked from the declaring class we need to mark the card so
-        // that mod-union tables and card rescanning know about the update.
-        // Note that the declaring class is not necessarily the holding class if the method is
-        // copied. We need the card mark to be in the holding class since that is from where we
-        // will visit the profiling info.
-        if (!holding_class_.IsNull()) {
-          Runtime::Current()->GetHeap()->WriteBarrierEveryFieldOf(holding_class_.Read());
-        }
         return;
       }
     }
diff --git a/runtime/jit/profiling_info.h b/runtime/jit/profiling_info.h
index 1056fac..9902bb5 100644
--- a/runtime/jit/profiling_info.h
+++ b/runtime/jit/profiling_info.h
@@ -39,46 +39,13 @@
 // Once the classes_ array is full, we consider the INVOKE to be megamorphic.
 class InlineCache {
  public:
-  bool IsMonomorphic() const {
-    DCHECK_GE(kIndividualCacheSize, 2);
-    return !classes_[0].IsNull() && classes_[1].IsNull();
-  }
-
-  bool IsMegamorphic() const {
-    for (size_t i = 0; i < kIndividualCacheSize; ++i) {
-      if (classes_[i].IsNull()) {
-        return false;
-      }
-    }
-    return true;
-  }
-
-  mirror::Class* GetMonomorphicType() const REQUIRES_SHARED(Locks::mutator_lock_) {
-    // Note that we cannot ensure the inline cache is actually monomorphic
-    // at this point, as other threads may have updated it.
-    DCHECK(!classes_[0].IsNull());
-    return classes_[0].Read();
-  }
-
-  bool IsUninitialized() const {
-    return classes_[0].IsNull();
-  }
-
-  bool IsPolymorphic() const {
-    DCHECK_GE(kIndividualCacheSize, 3);
-    return !classes_[1].IsNull() && classes_[kIndividualCacheSize - 1].IsNull();
-  }
-
-  mirror::Class* GetTypeAt(size_t i) const REQUIRES_SHARED(Locks::mutator_lock_) {
-    return classes_[i].Read();
-  }
-
   static constexpr uint16_t kIndividualCacheSize = 5;
 
  private:
   uint32_t dex_pc_;
   GcRoot<mirror::Class> classes_[kIndividualCacheSize];
 
+  friend class jit::JitCodeCache;
   friend class ProfilingInfo;
 
   DISALLOW_COPY_AND_ASSIGN(InlineCache);
@@ -102,18 +69,6 @@
       REQUIRES(Roles::uninterruptible_)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  // NO_THREAD_SAFETY_ANALYSIS since we don't know what the callback requires.
-  template<typename RootVisitorType>
-  void VisitRoots(RootVisitorType& visitor) NO_THREAD_SAFETY_ANALYSIS {
-    visitor.VisitRootIfNonNull(holding_class_.AddressWithoutBarrier());
-    for (size_t i = 0; i < number_of_inline_caches_; ++i) {
-      InlineCache* cache = &cache_[i];
-      for (size_t j = 0; j < InlineCache::kIndividualCacheSize; ++j) {
-        visitor.VisitRootIfNonNull(cache->classes_[j].AddressWithoutBarrier());
-      }
-    }
-  }
-
   ArtMethod* GetMethod() const {
     return method_;
   }
@@ -175,9 +130,6 @@
   // Method this profiling info is for.
   ArtMethod* const method_;
 
-  // Holding class for the method in case method is a copied method.
-  GcRoot<mirror::Class> holding_class_;
-
   // Whether the ArtMethod is currently being compiled. This flag
   // is implicitly guarded by the JIT code cache lock.
   // TODO: Make the JIT code cache lock global.
diff --git a/runtime/jni_internal.cc b/runtime/jni_internal.cc
index 0217a67..01a2ad8 100644
--- a/runtime/jni_internal.cc
+++ b/runtime/jni_internal.cc
@@ -157,14 +157,14 @@
     ThrowNoSuchMethodError(soa, c, name, sig, is_static ? "static" : "non-static");
     return nullptr;
   }
-  return soa.EncodeMethod(method);
+  return jni::EncodeArtMethod(method);
 }
 
 static ObjPtr<mirror::ClassLoader> GetClassLoader(const ScopedObjectAccess& soa)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   ArtMethod* method = soa.Self()->GetCurrentMethod(nullptr);
   // If we are running Runtime.nativeLoad, use the overriding ClassLoader it set.
-  if (method == soa.DecodeMethod(WellKnownClasses::java_lang_Runtime_nativeLoad)) {
+  if (method == jni::DecodeArtMethod(WellKnownClasses::java_lang_Runtime_nativeLoad)) {
     return soa.Decode<mirror::ClassLoader>(soa.Self()->GetClassLoaderOverride());
   }
   // If we have a method, use its ClassLoader for context.
@@ -235,7 +235,7 @@
                                    sig, name, c->GetDescriptor(&temp));
     return nullptr;
   }
-  return soa.EncodeField(field);
+  return jni::EncodeArtField(field);
 }
 
 static void ThrowAIOOBE(ScopedObjectAccess& soa, mirror::Array* array, jsize start,
@@ -368,7 +368,7 @@
   static jmethodID FromReflectedMethod(JNIEnv* env, jobject jlr_method) {
     CHECK_NON_NULL_ARGUMENT(jlr_method);
     ScopedObjectAccess soa(env);
-    return soa.EncodeMethod(ArtMethod::FromReflectedMethod(soa, jlr_method));
+    return jni::EncodeArtMethod(ArtMethod::FromReflectedMethod(soa, jlr_method));
   }
 
   static jfieldID FromReflectedField(JNIEnv* env, jobject jlr_field) {
@@ -380,13 +380,13 @@
       return nullptr;
     }
     ObjPtr<mirror::Field> field = ObjPtr<mirror::Field>::DownCast(obj_field);
-    return soa.EncodeField(field->GetArtField());
+    return jni::EncodeArtField(field->GetArtField());
   }
 
   static jobject ToReflectedMethod(JNIEnv* env, jclass, jmethodID mid, jboolean) {
     CHECK_NON_NULL_ARGUMENT(mid);
     ScopedObjectAccess soa(env);
-    ArtMethod* m = soa.DecodeMethod(mid);
+    ArtMethod* m = jni::DecodeArtMethod(mid);
     mirror::Executable* method;
     DCHECK_EQ(Runtime::Current()->GetClassLinker()->GetImagePointerSize(), kRuntimePointerSize);
     DCHECK(!Runtime::Current()->IsActiveTransaction());
@@ -401,7 +401,7 @@
   static jobject ToReflectedField(JNIEnv* env, jclass, jfieldID fid, jboolean) {
     CHECK_NON_NULL_ARGUMENT(fid);
     ScopedObjectAccess soa(env);
-    ArtField* f = soa.DecodeField(fid);
+    ArtField* f = jni::DecodeArtField(fid);
     return soa.AddLocalReference<jobject>(
         mirror::Field::CreateFromArtField<kRuntimePointerSize>(soa.Self(), f, true));
   }
@@ -631,8 +631,8 @@
     }
     if (c->IsStringClass()) {
       // Replace calls to String.<init> with equivalent StringFactory call.
-      jmethodID sf_mid = soa.EncodeMethod(
-          WellKnownClasses::StringInitToStringFactory(soa.DecodeMethod(mid)));
+      jmethodID sf_mid = jni::EncodeArtMethod(
+          WellKnownClasses::StringInitToStringFactory(jni::DecodeArtMethod(mid)));
       return CallStaticObjectMethodV(env, WellKnownClasses::java_lang_StringFactory, sf_mid, args);
     }
     ObjPtr<mirror::Object> result = c->AllocObject(soa.Self());
@@ -658,8 +658,8 @@
     }
     if (c->IsStringClass()) {
       // Replace calls to String.<init> with equivalent StringFactory call.
-      jmethodID sf_mid = soa.EncodeMethod(
-          WellKnownClasses::StringInitToStringFactory(soa.DecodeMethod(mid)));
+      jmethodID sf_mid = jni::EncodeArtMethod(
+          WellKnownClasses::StringInitToStringFactory(jni::DecodeArtMethod(mid)));
       return CallStaticObjectMethodA(env, WellKnownClasses::java_lang_StringFactory, sf_mid, args);
     }
     ObjPtr<mirror::Object> result = c->AllocObject(soa.Self());
@@ -1237,14 +1237,14 @@
     CHECK_NON_NULL_ARGUMENT(fid);
     ScopedObjectAccess soa(env);
     ObjPtr<mirror::Object> o = soa.Decode<mirror::Object>(obj);
-    ArtField* f = soa.DecodeField(fid);
+    ArtField* f = jni::DecodeArtField(fid);
     return soa.AddLocalReference<jobject>(f->GetObject(o));
   }
 
   static jobject GetStaticObjectField(JNIEnv* env, jclass, jfieldID fid) {
     CHECK_NON_NULL_ARGUMENT(fid);
     ScopedObjectAccess soa(env);
-    ArtField* f = soa.DecodeField(fid);
+    ArtField* f = jni::DecodeArtField(fid);
     return soa.AddLocalReference<jobject>(f->GetObject(f->GetDeclaringClass()));
   }
 
@@ -1254,7 +1254,7 @@
     ScopedObjectAccess soa(env);
     ObjPtr<mirror::Object> o = soa.Decode<mirror::Object>(java_object);
     ObjPtr<mirror::Object> v = soa.Decode<mirror::Object>(java_value);
-    ArtField* f = soa.DecodeField(fid);
+    ArtField* f = jni::DecodeArtField(fid);
     f->SetObject<false>(o, v);
   }
 
@@ -1262,7 +1262,7 @@
     CHECK_NON_NULL_ARGUMENT_RETURN_VOID(fid);
     ScopedObjectAccess soa(env);
     ObjPtr<mirror::Object> v = soa.Decode<mirror::Object>(java_value);
-    ArtField* f = soa.DecodeField(fid);
+    ArtField* f = jni::DecodeArtField(fid);
     f->SetObject<false>(f->GetDeclaringClass(), v);
   }
 
@@ -1271,13 +1271,13 @@
   CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(fid); \
   ScopedObjectAccess soa(env); \
   ObjPtr<mirror::Object> o = soa.Decode<mirror::Object>(instance); \
-  ArtField* f = soa.DecodeField(fid); \
+  ArtField* f = jni::DecodeArtField(fid); \
   return f->Get ##fn (o)
 
 #define GET_STATIC_PRIMITIVE_FIELD(fn) \
   CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(fid); \
   ScopedObjectAccess soa(env); \
-  ArtField* f = soa.DecodeField(fid); \
+  ArtField* f = jni::DecodeArtField(fid); \
   return f->Get ##fn (f->GetDeclaringClass())
 
 #define SET_PRIMITIVE_FIELD(fn, instance, value) \
@@ -1285,13 +1285,13 @@
   CHECK_NON_NULL_ARGUMENT_RETURN_VOID(fid); \
   ScopedObjectAccess soa(env); \
   ObjPtr<mirror::Object> o = soa.Decode<mirror::Object>(instance); \
-  ArtField* f = soa.DecodeField(fid); \
+  ArtField* f = jni::DecodeArtField(fid); \
   f->Set ##fn <false>(o, value)
 
 #define SET_STATIC_PRIMITIVE_FIELD(fn, value) \
   CHECK_NON_NULL_ARGUMENT_RETURN_VOID(fid); \
   ScopedObjectAccess soa(env); \
-  ArtField* f = soa.DecodeField(fid); \
+  ArtField* f = jni::DecodeArtField(fid); \
   f->Set ##fn <false>(f->GetDeclaringClass(), value)
 
   static jboolean GetBooleanField(JNIEnv* env, jobject obj, jfieldID fid) {
diff --git a/runtime/jni_internal.h b/runtime/jni_internal.h
index b829934..b3837c4 100644
--- a/runtime/jni_internal.h
+++ b/runtime/jni_internal.h
@@ -20,6 +20,8 @@
 #include <jni.h>
 #include <iosfwd>
 
+#include "base/macros.h"
+
 #ifndef NATIVE_METHOD
 #define NATIVE_METHOD(className, functionName, signature) \
   { #functionName, signature, reinterpret_cast<void*>(className ## _ ## functionName) }
@@ -36,6 +38,9 @@
 
 namespace art {
 
+class ArtField;
+class ArtMethod;
+
 const JNINativeInterface* GetJniNativeInterface();
 const JNINativeInterface* GetRuntimeShutdownNativeInterface();
 
@@ -46,6 +51,29 @@
 
 int ThrowNewException(JNIEnv* env, jclass exception_class, const char* msg, jobject cause);
 
+namespace jni {
+
+ALWAYS_INLINE
+static inline ArtField* DecodeArtField(jfieldID fid) {
+  return reinterpret_cast<ArtField*>(fid);
+}
+
+ALWAYS_INLINE
+static inline jfieldID EncodeArtField(ArtField* field) {
+  return reinterpret_cast<jfieldID>(field);
+}
+
+ALWAYS_INLINE
+static inline jmethodID EncodeArtMethod(ArtMethod* art_method) {
+  return reinterpret_cast<jmethodID>(art_method);
+}
+
+ALWAYS_INLINE
+static inline ArtMethod* DecodeArtMethod(jmethodID method_id) {
+  return reinterpret_cast<ArtMethod*>(method_id);
+}
+
+}  // namespace jni
 }  // namespace art
 
 std::ostream& operator<<(std::ostream& os, const jobjectRefType& rhs);
diff --git a/runtime/jni_internal_test.cc b/runtime/jni_internal_test.cc
index e990935..a421c34 100644
--- a/runtime/jni_internal_test.cc
+++ b/runtime/jni_internal_test.cc
@@ -679,12 +679,8 @@
   ASSERT_TRUE(env_->IsInstanceOf(o, c));
   // ...whose fields haven't been initialized because
   // we didn't call a constructor.
-  if (art::mirror::kUseStringCompression) {
-    // Zero-length string is compressed, so the length internally will be -(1 << 31).
-    ASSERT_EQ(-2147483648, env_->GetIntField(o, env_->GetFieldID(c, "count", "I")));
-  } else {
-    ASSERT_EQ(0, env_->GetIntField(o, env_->GetFieldID(c, "count", "I")));
-  }
+  // Even with string compression empty string has `count == 0`.
+  ASSERT_EQ(0, env_->GetIntField(o, env_->GetFieldID(c, "count", "I")));
 }
 
 TEST_F(JniInternalTest, GetVersion) {
@@ -895,11 +891,12 @@
   // Make sure we can actually use it.
   jstring s = env_->NewStringUTF("poop");
   if (mirror::kUseStringCompression) {
-    // Negative because s is compressed (first bit is 1)
-    ASSERT_EQ(-2147483644, env_->GetIntField(s, fid2));
+    ASSERT_EQ(mirror::String::GetFlaggedCount(4, /* compressible */ true),
+              env_->GetIntField(s, fid2));
     // Create incompressible string
     jstring s_16 = env_->NewStringUTF("\u0444\u0444");
-    ASSERT_EQ(2, env_->GetIntField(s_16, fid2));
+    ASSERT_EQ(mirror::String::GetFlaggedCount(2, /* compressible */ false),
+              env_->GetIntField(s_16, fid2));
   } else {
     ASSERT_EQ(4, env_->GetIntField(s, fid2));
   }
diff --git a/runtime/jvalue.h b/runtime/jvalue.h
index 52a0f23..398bfbc 100644
--- a/runtime/jvalue.h
+++ b/runtime/jvalue.h
@@ -29,7 +29,7 @@
 class Object;
 }  // namespace mirror
 
-union PACKED(4) JValue {
+union PACKED(alignof(mirror::Object*)) JValue {
   // We default initialize JValue instances to all-zeros.
   JValue() : j(0) {}
 
diff --git a/runtime/lock_word.h b/runtime/lock_word.h
index 538b6eb..2f2565b 100644
--- a/runtime/lock_word.h
+++ b/runtime/lock_word.h
@@ -61,7 +61,7 @@
  */
 class LockWord {
  public:
-  enum SizeShiftsAndMasks {  // private marker to avoid generate-operator-out.py from processing.
+  enum SizeShiftsAndMasks : uint32_t {  // private marker to avoid generate-operator-out.py from processing.
     // Number of bits to encode the state, currently just fat or thin/unlocked or hash code.
     kStateSize = 2,
     kReadBarrierStateSize = 1,
@@ -91,6 +91,8 @@
     kStateFat = 1,
     kStateHash = 2,
     kStateForwardingAddress = 3,
+    kStateForwardingAddressShifted = kStateForwardingAddress << kStateShift,
+    kStateForwardingAddressOverflow = (1 + kStateMask - kStateForwardingAddress) << kStateShift,
 
     // Read barrier bit.
     kReadBarrierStateShift = kThinLockCountSize + kThinLockCountShift,
@@ -140,7 +142,7 @@
 
   static LockWord FromForwardingAddress(size_t target) {
     DCHECK_ALIGNED(target, (1 << kStateSize));
-    return LockWord((target >> kForwardingAddressShift) | (kStateForwardingAddress << kStateShift));
+    return LockWord((target >> kForwardingAddressShift) | kStateForwardingAddressShifted);
   }
 
   static LockWord FromHashCode(uint32_t hash_code, uint32_t gc_state) {
@@ -202,6 +204,8 @@
 
   void SetReadBarrierState(uint32_t rb_state) {
     DCHECK_EQ(rb_state & ~kReadBarrierStateMask, 0U);
+    DCHECK(rb_state == ReadBarrier::WhiteState() ||
+           rb_state == ReadBarrier::GrayState()) << rb_state;
     DCHECK_NE(static_cast<uint32_t>(GetState()), static_cast<uint32_t>(kForwardingAddress));
     // Clear and or the bits.
     value_ &= ~(kReadBarrierStateMask << kReadBarrierStateShift);
@@ -256,6 +260,14 @@
   LockWord();
 
   explicit LockWord(uint32_t val) : value_(val) {
+    // Make sure adding the overflow causes an overflow.
+    constexpr uint64_t overflow = static_cast<uint64_t>(kStateForwardingAddressShifted) +
+        static_cast<uint64_t>(kStateForwardingAddressOverflow);
+    constexpr bool is_larger = overflow > static_cast<uint64_t>(0xFFFFFFFF);
+    static_assert(is_larger, "should have overflowed");
+    static_assert(
+         (~kStateForwardingAddress & kStateMask) == 0,
+        "READ_BARRIER_MARK_REG relies on the forwarding address state being only one bits");
     CheckReadBarrierState();
   }
 
@@ -270,9 +282,8 @@
       if (!kUseReadBarrier) {
         DCHECK_EQ(rb_state, 0U);
       } else {
-        DCHECK(rb_state == ReadBarrier::white_ptr_ ||
-               rb_state == ReadBarrier::gray_ptr_ ||
-               rb_state == ReadBarrier::black_ptr_) << rb_state;
+        DCHECK(rb_state == ReadBarrier::WhiteState() ||
+               rb_state == ReadBarrier::GrayState()) << rb_state;
       }
     }
   }
diff --git a/runtime/mem_map.cc b/runtime/mem_map.cc
index 1ec59b3..6da72e4 100644
--- a/runtime/mem_map.cc
+++ b/runtime/mem_map.cc
@@ -282,6 +282,7 @@
 #ifndef __LP64__
   UNUSED(low_4gb);
 #endif
+  use_ashmem = use_ashmem && !kIsTargetLinux;
   if (byte_count == 0) {
     return new MemMap(name, nullptr, 0, nullptr, 0, prot, false);
   }
@@ -522,6 +523,7 @@
 
 MemMap* MemMap::RemapAtEnd(uint8_t* new_end, const char* tail_name, int tail_prot,
                            std::string* error_msg, bool use_ashmem) {
+  use_ashmem = use_ashmem && !kIsTargetLinux;
   DCHECK_GE(new_end, Begin());
   DCHECK_LE(new_end, End());
   DCHECK_LE(begin_ + size_, reinterpret_cast<uint8_t*>(base_begin_) + base_size_);
diff --git a/runtime/mem_map.h b/runtime/mem_map.h
index 049ae12..0fea1a5 100644
--- a/runtime/mem_map.h
+++ b/runtime/mem_map.h
@@ -68,7 +68,7 @@
                               bool low_4gb,
                               bool reuse,
                               std::string* error_msg,
-                              bool use_ashmem = !kIsTargetLinux);
+                              bool use_ashmem = true);
 
   // Create placeholder for a region allocated by direct call to mmap.
   // This is useful when we do not have control over the code calling mmap,
@@ -172,7 +172,7 @@
                      const char* tail_name,
                      int tail_prot,
                      std::string* error_msg,
-                     bool use_ashmem = !kIsTargetLinux);
+                     bool use_ashmem = true);
 
   static bool CheckNoGaps(MemMap* begin_map, MemMap* end_map)
       REQUIRES(!Locks::mem_maps_lock_);
diff --git a/runtime/method_handles-inl.h b/runtime/method_handles-inl.h
index ff5d2a1..1240792 100644
--- a/runtime/method_handles-inl.h
+++ b/runtime/method_handles-inl.h
@@ -31,122 +31,70 @@
 
 namespace art {
 
-// Assigns |type| to the primitive type associated with |dst_class|. Returns
-// true iff. |dst_class| was a boxed type (Integer, Long etc.), false otherwise.
-REQUIRES_SHARED(Locks::mutator_lock_)
-static inline bool GetPrimitiveType(ObjPtr<mirror::Class> dst_class, Primitive::Type* type) {
-  if (dst_class->DescriptorEquals("Ljava/lang/Boolean;")) {
-    (*type) = Primitive::kPrimBoolean;
+inline bool ConvertArgumentValue(Handle<mirror::MethodType> callsite_type,
+                                 Handle<mirror::MethodType> callee_type,
+                                 int index,
+                                 JValue* value) REQUIRES_SHARED(Locks::mutator_lock_) {
+  ObjPtr<mirror::Class> from_class(callsite_type->GetPTypes()->GetWithoutChecks(index));
+  ObjPtr<mirror::Class> to_class(callee_type->GetPTypes()->GetWithoutChecks(index));
+  if (from_class == to_class) {
     return true;
-  } else if (dst_class->DescriptorEquals("Ljava/lang/Byte;")) {
-    (*type) = Primitive::kPrimByte;
-    return true;
-  } else if (dst_class->DescriptorEquals("Ljava/lang/Character;")) {
-    (*type) = Primitive::kPrimChar;
-    return true;
-  } else if (dst_class->DescriptorEquals("Ljava/lang/Float;")) {
-    (*type) = Primitive::kPrimFloat;
-    return true;
-  } else if (dst_class->DescriptorEquals("Ljava/lang/Double;")) {
-    (*type) = Primitive::kPrimDouble;
-    return true;
-  } else if (dst_class->DescriptorEquals("Ljava/lang/Integer;")) {
-    (*type) = Primitive::kPrimInt;
-    return true;
-  } else if (dst_class->DescriptorEquals("Ljava/lang/Long;")) {
-    (*type) = Primitive::kPrimLong;
-    return true;
-  } else if (dst_class->DescriptorEquals("Ljava/lang/Short;")) {
-    (*type) = Primitive::kPrimShort;
+  }
+
+  // |value| may contain a bare heap pointer which is generally
+  // |unsafe. ConvertJValueCommon() saves |value|, |from_class|, and
+  // |to_class| to Handles where necessary to avoid issues if the heap
+  // changes.
+  if (ConvertJValueCommon(callsite_type, callee_type, from_class, to_class, value)) {
+    DCHECK(!Thread::Current()->IsExceptionPending());
     return true;
   } else {
+    DCHECK(Thread::Current()->IsExceptionPending());
+    value->SetJ(0);
     return false;
   }
 }
 
-REQUIRES_SHARED(Locks::mutator_lock_)
-inline bool ConvertJValue(Handle<mirror::Class> from,
-                          Handle<mirror::Class> to,
-                          const JValue& from_value,
-                          JValue* to_value) {
-  const Primitive::Type from_type = from->GetPrimitiveType();
-  const Primitive::Type to_type = to->GetPrimitiveType();
-
-  // This method must be called only when the types don't match.
-  DCHECK(from.Get() != to.Get());
-
-  if ((from_type != Primitive::kPrimNot) && (to_type != Primitive::kPrimNot)) {
-    // Throws a ClassCastException if we're unable to convert a primitive value.
-    return ConvertPrimitiveValue(false, from_type, to_type, from_value, to_value);
-  } else if ((from_type == Primitive::kPrimNot) && (to_type == Primitive::kPrimNot)) {
-    // They're both reference types. If "from" is null, we can pass it
-    // through unchanged. If not, we must generate a cast exception if
-    // |to| is not assignable from the dynamic type of |ref|.
-    mirror::Object* const ref = from_value.GetL();
-    if (ref == nullptr || to->IsAssignableFrom(ref->GetClass())) {
-      to_value->SetL(ref);
-      return true;
-    } else {
-      ThrowClassCastException(to.Get(), ref->GetClass());
-      return false;
-    }
-  } else {
-    // Precisely one of the source or the destination are reference types.
-    // We must box or unbox.
-    if (to_type == Primitive::kPrimNot) {
-      // The target type is a reference, we must box.
-      Primitive::Type type;
-      // TODO(narayan): This is a CHECK for now. There might be a few corner cases
-      // here that we might not have handled yet. For exmple, if |to| is java/lang/Number;,
-      // we will need to box this "naturally".
-      CHECK(GetPrimitiveType(to.Get(), &type));
-      // First perform a primitive conversion to the unboxed equivalent of the target,
-      // if necessary. This should be for the rarer cases like (int->Long) etc.
-      if (UNLIKELY(from_type != type)) {
-         if (!ConvertPrimitiveValue(false, from_type, type, from_value, to_value)) {
-           return false;
-         }
-      } else {
-        *to_value = from_value;
-      }
-
-      // Then perform the actual boxing, and then set the reference.
-      ObjPtr<mirror::Object> boxed = BoxPrimitive(type, from_value);
-      to_value->SetL(boxed.Ptr());
-      return true;
-    } else {
-      // The target type is a primitive, we must unbox.
-      ObjPtr<mirror::Object> ref(from_value.GetL());
-
-      // Note that UnboxPrimitiveForResult already performs all of the type
-      // conversions that we want, based on |to|.
-      JValue unboxed_value;
-      return UnboxPrimitiveForResult(ref, to.Get(), to_value);
-    }
+inline bool ConvertReturnValue(Handle<mirror::MethodType> callsite_type,
+                               Handle<mirror::MethodType> callee_type,
+                               JValue* value)  REQUIRES_SHARED(Locks::mutator_lock_) {
+  ObjPtr<mirror::Class> from_class(callee_type->GetRType());
+  ObjPtr<mirror::Class> to_class(callsite_type->GetRType());
+  if (to_class->GetPrimitiveType() == Primitive::kPrimVoid || from_class == to_class) {
+    return true;
   }
 
-  return true;
+  // |value| may contain a bare heap pointer which is generally
+  // unsafe. ConvertJValueCommon() saves |value|, |from_class|, and
+  // |to_class| to Handles where necessary to avoid issues if the heap
+  // changes.
+  if (ConvertJValueCommon(callsite_type, callee_type, from_class, to_class, value)) {
+    DCHECK(!Thread::Current()->IsExceptionPending());
+    return true;
+  } else {
+    DCHECK(Thread::Current()->IsExceptionPending());
+    value->SetJ(0);
+    return false;
+  }
 }
 
 template <typename G, typename S>
 bool PerformConversions(Thread* self,
-                        Handle<mirror::ObjectArray<mirror::Class>> from_types,
-                        Handle<mirror::ObjectArray<mirror::Class>> to_types,
+                        Handle<mirror::MethodType> callsite_type,
+                        Handle<mirror::MethodType> callee_type,
                         G* getter,
                         S* setter,
-                        int32_t num_conversions) {
+                        int32_t num_conversions) REQUIRES_SHARED(Locks::mutator_lock_) {
   StackHandleScope<2> hs(self);
-  MutableHandle<mirror::Class> from(hs.NewHandle<mirror::Class>(nullptr));
-  MutableHandle<mirror::Class> to(hs.NewHandle<mirror::Class>(nullptr));
+  Handle<mirror::ObjectArray<mirror::Class>> from_types(hs.NewHandle(callsite_type->GetPTypes()));
+  Handle<mirror::ObjectArray<mirror::Class>> to_types(hs.NewHandle(callee_type->GetPTypes()));
 
   for (int32_t i = 0; i < num_conversions; ++i) {
-    from.Assign(from_types->GetWithoutChecks(i));
-    to.Assign(to_types->GetWithoutChecks(i));
-
-    const Primitive::Type from_type = from->GetPrimitiveType();
-    const Primitive::Type to_type = to->GetPrimitiveType();
-
-    if (from.Get() == to.Get()) {
+    ObjPtr<mirror::Class> from(from_types->GetWithoutChecks(i));
+    ObjPtr<mirror::Class> to(to_types->GetWithoutChecks(i));
+    const Primitive::Type from_type = from_types->GetWithoutChecks(i)->GetPrimitiveType();
+    const Primitive::Type to_type = to_types->GetWithoutChecks(i)->GetPrimitiveType();
+    if (from == to) {
       // Easy case - the types are identical. Nothing left to do except to pass
       // the arguments along verbatim.
       if (Primitive::Is64BitType(from_type)) {
@@ -156,31 +104,29 @@
       } else {
         setter->Set(getter->Get());
       }
-
-      continue;
     } else {
-      JValue from_value;
-      JValue to_value;
+      JValue value;
 
       if (Primitive::Is64BitType(from_type)) {
-        from_value.SetJ(getter->GetLong());
+        value.SetJ(getter->GetLong());
       } else if (from_type == Primitive::kPrimNot) {
-        from_value.SetL(getter->GetReference());
+        value.SetL(getter->GetReference());
       } else {
-        from_value.SetI(getter->Get());
+        value.SetI(getter->Get());
       }
 
-      if (!ConvertJValue(from, to, from_value, &to_value)) {
+      // Caveat emptor - ObjPtr's not guaranteed valid after this call.
+      if (!ConvertArgumentValue(callsite_type, callee_type, i, &value)) {
         DCHECK(self->IsExceptionPending());
         return false;
       }
 
       if (Primitive::Is64BitType(to_type)) {
-        setter->SetLong(to_value.GetJ());
+        setter->SetLong(value.GetJ());
       } else if (to_type == Primitive::kPrimNot) {
-        setter->SetReference(to_value.GetL());
+        setter->SetReference(value.GetL());
       } else {
-        setter->Set(to_value.GetI());
+        setter->Set(value.GetI());
       }
     }
   }
@@ -196,10 +142,10 @@
                                             uint32_t first_src_reg,
                                             uint32_t first_dest_reg,
                                             const uint32_t (&arg)[Instruction::kMaxVarArgRegs],
-                                            ShadowFrame* callee_frame) {
-  StackHandleScope<4> hs(self);
-  Handle<mirror::ObjectArray<mirror::Class>> from_types(hs.NewHandle(callsite_type->GetPTypes()));
-  Handle<mirror::ObjectArray<mirror::Class>> to_types(hs.NewHandle(callee_type->GetPTypes()));
+                                            ShadowFrame* callee_frame)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  ObjPtr<mirror::ObjectArray<mirror::Class>> from_types(callsite_type->GetPTypes());
+  ObjPtr<mirror::ObjectArray<mirror::Class>> to_types(callee_type->GetPTypes());
 
   const int32_t num_method_params = from_types->GetLength();
   if (to_types->GetLength() != num_method_params) {
@@ -211,8 +157,8 @@
   ShadowFrameSetter setter(callee_frame, first_dest_reg);
 
   return PerformConversions<ShadowFrameGetter<is_range>, ShadowFrameSetter>(self,
-                                                                            from_types,
-                                                                            to_types,
+                                                                            callsite_type,
+                                                                            callee_type,
                                                                             &getter,
                                                                             &setter,
                                                                             num_method_params);
diff --git a/runtime/method_handles.cc b/runtime/method_handles.cc
new file mode 100644
index 0000000..3c22d7f
--- /dev/null
+++ b/runtime/method_handles.cc
@@ -0,0 +1,282 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "method_handles.h"
+
+#include "method_handles-inl.h"
+#include "jvalue.h"
+#include "jvalue-inl.h"
+#include "reflection.h"
+#include "reflection-inl.h"
+#include "well_known_classes.h"
+
+namespace art {
+
+namespace {
+
+#define PRIMITIVES_LIST(V) \
+  V(Primitive::kPrimBoolean, Boolean, Boolean, Z) \
+  V(Primitive::kPrimByte, Byte, Byte, B)          \
+  V(Primitive::kPrimChar, Char, Character, C)     \
+  V(Primitive::kPrimShort, Short, Short, S)       \
+  V(Primitive::kPrimInt, Int, Integer, I)         \
+  V(Primitive::kPrimLong, Long, Long, J)          \
+  V(Primitive::kPrimFloat, Float, Float, F)       \
+  V(Primitive::kPrimDouble, Double, Double, D)
+
+// Assigns |type| to the primitive type associated with |klass|. Returns
+// true iff. |klass| was a boxed type (Integer, Long etc.), false otherwise.
+bool GetUnboxedPrimitiveType(ObjPtr<mirror::Class> klass, Primitive::Type* type)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  ScopedAssertNoThreadSuspension ants(__FUNCTION__);
+#define LOOKUP_PRIMITIVE(primitive, _, __, ___)                         \
+  if (klass->DescriptorEquals(Primitive::BoxedDescriptor(primitive))) { \
+    *type = primitive;                                                  \
+    return true;                                                        \
+  }
+
+  PRIMITIVES_LIST(LOOKUP_PRIMITIVE);
+#undef LOOKUP_PRIMITIVE
+  return false;
+}
+
+ObjPtr<mirror::Class> GetBoxedPrimitiveClass(Primitive::Type type)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  ScopedAssertNoThreadSuspension ants(__FUNCTION__);
+  jmethodID m = nullptr;
+  switch (type) {
+#define CASE_PRIMITIVE(primitive, _, java_name, __)              \
+    case primitive:                                              \
+      m = WellKnownClasses::java_lang_ ## java_name ## _valueOf; \
+      break;
+    PRIMITIVES_LIST(CASE_PRIMITIVE);
+#undef CASE_PRIMITIVE
+    case Primitive::Type::kPrimNot:
+    case Primitive::Type::kPrimVoid:
+      return nullptr;
+  }
+  return jni::DecodeArtMethod(m)->GetDeclaringClass();
+}
+
+bool GetUnboxedTypeAndValue(ObjPtr<mirror::Object> o, Primitive::Type* type, JValue* value)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  ScopedAssertNoThreadSuspension ants(__FUNCTION__);
+  ObjPtr<mirror::Class> klass = o->GetClass();
+  ArtField* primitive_field = &klass->GetIFieldsPtr()->At(0);
+#define CASE_PRIMITIVE(primitive, abbrev, _, shorthand)         \
+  if (klass == GetBoxedPrimitiveClass(primitive)) {             \
+    *type = primitive;                                          \
+    value->Set ## shorthand(primitive_field->Get ## abbrev(o)); \
+    return true;                                                \
+  }
+  PRIMITIVES_LIST(CASE_PRIMITIVE)
+#undef CASE_PRIMITIVE
+  return false;
+}
+
+inline bool IsReferenceType(Primitive::Type type) {
+  return type == Primitive::kPrimNot;
+}
+
+inline bool IsPrimitiveType(Primitive::Type type) {
+  return !IsReferenceType(type);
+}
+
+}  // namespace
+
+bool IsParameterTypeConvertible(ObjPtr<mirror::Class> from, ObjPtr<mirror::Class> to)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  // This function returns true if there's any conceivable conversion
+  // between |from| and |to|. It's expected this method will be used
+  // to determine if a WrongMethodTypeException should be raised. The
+  // decision logic follows the documentation for MethodType.asType().
+  if (from == to) {
+    return true;
+  }
+
+  Primitive::Type from_primitive = from->GetPrimitiveType();
+  Primitive::Type to_primitive = to->GetPrimitiveType();
+  DCHECK(from_primitive != Primitive::Type::kPrimVoid);
+  DCHECK(to_primitive != Primitive::Type::kPrimVoid);
+
+  // If |to| and |from| are references.
+  if (IsReferenceType(from_primitive) && IsReferenceType(to_primitive)) {
+    // Assignability is determined during parameter conversion when
+    // invoking the associated method handle.
+    return true;
+  }
+
+  // If |to| and |from| are primitives and a widening conversion exists.
+  if (Primitive::IsWidenable(from_primitive, to_primitive)) {
+    return true;
+  }
+
+  // If |to| is a reference and |from| is a primitive, then boxing conversion.
+  if (IsReferenceType(to_primitive) && IsPrimitiveType(from_primitive)) {
+    return to->IsAssignableFrom(GetBoxedPrimitiveClass(from_primitive));
+  }
+
+  // If |from| is a reference and |to| is a primitive, then unboxing conversion.
+  if (IsPrimitiveType(to_primitive) && IsReferenceType(from_primitive)) {
+    if (from->DescriptorEquals("Ljava/lang/Object;")) {
+      // Object might be converted into a primitive during unboxing.
+      return true;
+    } else if (Primitive::IsNumericType(to_primitive) &&
+               from->DescriptorEquals("Ljava/lang/Number;")) {
+      // Number might be unboxed into any of the number primitive types.
+      return true;
+    }
+    Primitive::Type unboxed_type;
+    if (GetUnboxedPrimitiveType(from, &unboxed_type)) {
+      if (unboxed_type == to_primitive) {
+        // Straightforward unboxing conversion such as Boolean => boolean.
+        return true;
+      } else {
+        // Check if widening operations for numeric primitives would work,
+        // such as Byte => byte => long.
+        return Primitive::IsWidenable(unboxed_type, to_primitive);
+      }
+    }
+  }
+
+  return false;
+}
+
+bool IsReturnTypeConvertible(ObjPtr<mirror::Class> from, ObjPtr<mirror::Class> to)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  if (to->GetPrimitiveType() == Primitive::Type::kPrimVoid) {
+    // Result will be ignored.
+    return true;
+  } else if (from->GetPrimitiveType() == Primitive::Type::kPrimVoid) {
+    // Returned value will be 0 / null.
+    return true;
+  } else {
+    // Otherwise apply usual parameter conversion rules.
+    return IsParameterTypeConvertible(from, to);
+  }
+}
+
+bool ConvertJValueCommon(
+    Handle<mirror::MethodType> callsite_type,
+    Handle<mirror::MethodType> callee_type,
+    ObjPtr<mirror::Class> from,
+    ObjPtr<mirror::Class> to,
+    JValue* value) {
+  // The reader maybe concerned about the safety of the heap object
+  // that may be in |value|. There is only one case where allocation
+  // is obviously needed and that's for boxing. However, in the case
+  // of boxing |value| contains a non-reference type.
+
+  const Primitive::Type from_type = from->GetPrimitiveType();
+  const Primitive::Type to_type = to->GetPrimitiveType();
+
+  // Put incoming value into |src_value| and set return value to 0.
+  // Errors and conversions from void require the return value to be 0.
+  const JValue src_value(*value);
+  value->SetJ(0);
+
+  // Conversion from void set result to zero.
+  if (from_type == Primitive::kPrimVoid) {
+    return true;
+  }
+
+  // This method must be called only when the types don't match.
+  DCHECK(from != to);
+
+  if (IsPrimitiveType(from_type) && IsPrimitiveType(to_type)) {
+    // The source and target types are both primitives.
+    if (UNLIKELY(!ConvertPrimitiveValueNoThrow(from_type, to_type, src_value, value))) {
+      ThrowWrongMethodTypeException(callee_type.Get(), callsite_type.Get());
+      return false;
+    }
+    return true;
+  } else if (IsReferenceType(from_type) && IsReferenceType(to_type)) {
+    // They're both reference types. If "from" is null, we can pass it
+    // through unchanged. If not, we must generate a cast exception if
+    // |to| is not assignable from the dynamic type of |ref|.
+    //
+    // Playing it safe with StackHandleScope here, not expecting any allocation
+    // in mirror::Class::IsAssignable().
+    StackHandleScope<2> hs(Thread::Current());
+    Handle<mirror::Class> h_to(hs.NewHandle(to));
+    Handle<mirror::Object> h_obj(hs.NewHandle(src_value.GetL()));
+    if (h_obj.Get() != nullptr && !to->IsAssignableFrom(h_obj->GetClass())) {
+      ThrowClassCastException(h_to.Get(), h_obj->GetClass());
+      return false;
+    }
+    value->SetL(h_obj.Get());
+    return true;
+  } else if (IsReferenceType(to_type)) {
+    DCHECK(IsPrimitiveType(from_type));
+    // The source type is a primitive and the target type is a reference, so we must box.
+    // The target type maybe a super class of the boxed source type, for example,
+    // if the source type is int, it's boxed type is java.lang.Integer, and the target
+    // type could be java.lang.Number.
+    Primitive::Type type;
+    if (!GetUnboxedPrimitiveType(to, &type)) {
+      ObjPtr<mirror::Class> boxed_from_class = GetBoxedPrimitiveClass(from_type);
+      if (boxed_from_class->IsSubClass(to)) {
+        type = from_type;
+      } else {
+        ThrowWrongMethodTypeException(callee_type.Get(), callsite_type.Get());
+        return false;
+      }
+    }
+
+    if (UNLIKELY(from_type != type)) {
+      ThrowWrongMethodTypeException(callee_type.Get(), callsite_type.Get());
+      return false;
+    }
+
+    if (!ConvertPrimitiveValueNoThrow(from_type, type, src_value, value)) {
+      ThrowWrongMethodTypeException(callee_type.Get(), callsite_type.Get());
+      return false;
+    }
+
+    // Then perform the actual boxing, and then set the reference.
+    ObjPtr<mirror::Object> boxed = BoxPrimitive(type, src_value);
+    value->SetL(boxed.Ptr());
+    return true;
+  } else {
+    // The source type is a reference and the target type is a primitive, so we must unbox.
+    DCHECK(IsReferenceType(from_type));
+    DCHECK(IsPrimitiveType(to_type));
+
+    ObjPtr<mirror::Object> from_obj(src_value.GetL());
+    if (UNLIKELY(from_obj == nullptr)) {
+      ThrowNullPointerException(
+          StringPrintf("Expected to unbox a '%s' primitive type but was returned null",
+                       from->PrettyDescriptor().c_str()).c_str());
+      return false;
+    }
+
+    Primitive::Type unboxed_type;
+    JValue unboxed_value;
+    if (UNLIKELY(!GetUnboxedTypeAndValue(from_obj, &unboxed_type, &unboxed_value))) {
+      ThrowWrongMethodTypeException(callee_type.Get(), callsite_type.Get());
+      return false;
+    }
+
+    if (UNLIKELY(!ConvertPrimitiveValueNoThrow(unboxed_type, to_type, unboxed_value, value))) {
+      ThrowClassCastException(from, to);
+      return false;
+    }
+
+    return true;
+  }
+}
+
+}  // namespace art
diff --git a/runtime/method_handles.h b/runtime/method_handles.h
index 0d3f9f1..d0a4902 100644
--- a/runtime/method_handles.h
+++ b/runtime/method_handles.h
@@ -20,7 +20,10 @@
 #include <ostream>
 
 #include "dex_instruction.h"
+#include "handle.h"
 #include "jvalue.h"
+#include "mirror/class.h"
+#include "mirror/method_type.h"
 
 namespace art {
 
@@ -43,12 +46,13 @@
   kInvokeStatic,
   kInvokeInterface,
   kInvokeTransform,
+  kInvokeCallSiteTransform,
   kInstanceGet,
   kInstancePut,
   kStaticGet,
   kStaticPut,
   kLastValidKind = kStaticPut,
-  kLastInvokeKind = kInvokeTransform
+  kLastInvokeKind = kInvokeCallSiteTransform
 };
 
 // Whether the given method handle kind is some variant of an invoke.
@@ -56,13 +60,51 @@
   return handle_kind <= kLastInvokeKind;
 }
 
-// Performs a single argument conversion from type |from| to a distinct
-// type |to|. Returns true on success, false otherwise.
-REQUIRES_SHARED(Locks::mutator_lock_)
-inline bool ConvertJValue(Handle<mirror::Class> from,
-                          Handle<mirror::Class> to,
-                          const JValue& from_value,
-                          JValue* to_value) ALWAYS_INLINE;
+// Whether the given method handle kind is some variant of a tranform.
+inline bool IsInvokeTransform(const MethodHandleKind handle_kind) {
+  return handle_kind == kInvokeTransform || handle_kind == kInvokeCallSiteTransform;
+}
+
+// Returns true if there is a possible conversion from |from| to |to|
+// for a MethodHandle parameter.
+bool IsParameterTypeConvertible(ObjPtr<mirror::Class> from,
+                                ObjPtr<mirror::Class> to);
+
+// Returns true if there is a possible conversion from |from| to |to|
+// for the return type of a MethodHandle.
+bool IsReturnTypeConvertible(ObjPtr<mirror::Class> from,
+                             ObjPtr<mirror::Class> to);
+
+// Performs a conversion from type |from| to a distinct type |to| as
+// part of conversion of |caller_type| to |callee_type|. The value to
+// be converted is in |value|. Returns true on success and updates
+// |value| with the converted value, false otherwise.
+bool ConvertJValueCommon(Handle<mirror::MethodType> callsite_type,
+                         Handle<mirror::MethodType> callee_type,
+                         ObjPtr<mirror::Class> from,
+                         ObjPtr<mirror::Class> to,
+                         JValue* value)
+    REQUIRES_SHARED(Locks::mutator_lock_);
+
+// Converts the value of the argument at position |index| from type
+// expected by |callee_type| to type used by |callsite_type|. |value|
+// represents the value to be converted. Returns true on success and
+// updates |value|, false otherwise.
+ALWAYS_INLINE bool ConvertArgumentValue(Handle<mirror::MethodType> callsite_type,
+                                        Handle<mirror::MethodType> callee_type,
+                                        int index,
+                                        JValue* value)
+    REQUIRES_SHARED(Locks::mutator_lock_);
+
+// Converts the return value from return type yielded by
+// |callee_type| to the return type yielded by
+// |callsite_type|. |value| represents the value to be
+// converted. Returns true on success and updates |value|, false
+// otherwise.
+ALWAYS_INLINE bool ConvertReturnValue(Handle<mirror::MethodType> callsite_type,
+                                      Handle<mirror::MethodType> callee_type,
+                                      JValue* value)
+    REQUIRES_SHARED(Locks::mutator_lock_);
 
 // Perform argument conversions between |callsite_type| (the type of the
 // incoming arguments) and |callee_type| (the type of the method being
@@ -109,17 +151,16 @@
 // up too much space, we can make G / S abstract base classes that are
 // overridden by concrete classes.
 template <typename G, typename S>
-REQUIRES_SHARED(Locks::mutator_lock_)
 bool PerformConversions(Thread* self,
-                        Handle<mirror::ObjectArray<mirror::Class>> from_types,
-                        Handle<mirror::ObjectArray<mirror::Class>> to_types,
+                        Handle<mirror::MethodType> callsite_type,
+                        Handle<mirror::MethodType> callee_type,
                         G* getter,
                         S* setter,
-                        int32_t num_conversions);
+                        int32_t num_conversions) REQUIRES_SHARED(Locks::mutator_lock_);
 
 // A convenience wrapper around |PerformConversions|, for the case where
 // the setter and getter are both ShadowFrame based.
-template <bool is_range> REQUIRES_SHARED(Locks::mutator_lock_)
+template <bool is_range>
 bool ConvertAndCopyArgumentsFromCallerFrame(Thread* self,
                                             Handle<mirror::MethodType> callsite_type,
                                             Handle<mirror::MethodType> callee_type,
@@ -127,7 +168,8 @@
                                             uint32_t first_src_reg,
                                             uint32_t first_dest_reg,
                                             const uint32_t (&arg)[Instruction::kMaxVarArgRegs],
-                                            ShadowFrame* callee_frame);
+                                            ShadowFrame* callee_frame)
+    REQUIRES_SHARED(Locks::mutator_lock_);
 
 // A convenience class that allows for iteration through a list of
 // input argument registers |arg| for non-range invokes or a list of
diff --git a/runtime/mirror/array-inl.h b/runtime/mirror/array-inl.h
index b11dad8..7d7c1d7 100644
--- a/runtime/mirror/array-inl.h
+++ b/runtime/mirror/array-inl.h
@@ -424,6 +424,29 @@
   }
 }
 
+template<bool kUnchecked>
+void PointerArray::Memcpy(int32_t dst_pos,
+                          ObjPtr<PointerArray> src,
+                          int32_t src_pos,
+                          int32_t count,
+                          PointerSize ptr_size) {
+  DCHECK(!Runtime::Current()->IsActiveTransaction());
+  DCHECK(!src.IsNull());
+  if (ptr_size == PointerSize::k64) {
+    LongArray* l_this = (kUnchecked ? down_cast<LongArray*>(static_cast<Object*>(this))
+                                    : AsLongArray());
+    LongArray* l_src = (kUnchecked ? down_cast<LongArray*>(static_cast<Object*>(src.Ptr()))
+                                   : src->AsLongArray());
+    l_this->Memcpy(dst_pos, l_src, src_pos, count);
+  } else {
+    IntArray* i_this = (kUnchecked ? down_cast<IntArray*>(static_cast<Object*>(this))
+                                   : AsIntArray());
+    IntArray* i_src = (kUnchecked ? down_cast<IntArray*>(static_cast<Object*>(src.Ptr()))
+                                  : src->AsIntArray());
+    i_this->Memcpy(dst_pos, i_src, src_pos, count);
+  }
+}
+
 template<typename T>
 inline void PrimitiveArray<T>::SetArrayClass(ObjPtr<Class> array_class) {
   CHECK(array_class_.IsNull());
diff --git a/runtime/mirror/array.h b/runtime/mirror/array.h
index 994e9b2..19d300e 100644
--- a/runtime/mirror/array.h
+++ b/runtime/mirror/array.h
@@ -208,6 +208,17 @@
             typename Visitor>
   void Fixup(mirror::PointerArray* dest, PointerSize pointer_size, const Visitor& visitor)
       REQUIRES_SHARED(Locks::mutator_lock_);
+
+  // Works like memcpy(), except we guarantee not to allow tearing of array values (ie using smaller
+  // than element size copies). Arguments are assumed to be within the bounds of the array and the
+  // arrays non-null. Cannot be called in an active transaction.
+  template<bool kUnchecked = false>
+  void Memcpy(int32_t dst_pos,
+              ObjPtr<PointerArray> src,
+              int32_t src_pos,
+              int32_t count,
+              PointerSize pointer_size)
+      REQUIRES_SHARED(Locks::mutator_lock_);
 };
 
 }  // namespace mirror
diff --git a/runtime/mirror/class-inl.h b/runtime/mirror/class-inl.h
index 9992a9e..5fdf8f3 100644
--- a/runtime/mirror/class-inl.h
+++ b/runtime/mirror/class-inl.h
@@ -65,8 +65,10 @@
       OFFSET_OF_OBJECT_MEMBER(Class, super_class_));
 }
 
+template<VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
 inline ClassLoader* Class::GetClassLoader() {
-  return GetFieldObject<ClassLoader>(OFFSET_OF_OBJECT_MEMBER(Class, class_loader_));
+  return GetFieldObject<ClassLoader, kVerifyFlags, kReadBarrierOption>(
+      OFFSET_OF_OBJECT_MEMBER(Class, class_loader_));
 }
 
 template<VerifyObjectFlags kVerifyFlags>
@@ -236,7 +238,7 @@
 template<VerifyObjectFlags kVerifyFlags,
          ReadBarrierOption kReadBarrierOption>
 inline PointerArray* Class::GetVTable() {
-  DCHECK(IsResolved<kVerifyFlags>() || IsErroneous<kVerifyFlags>());
+  DCHECK(IsLoaded<kVerifyFlags>() || IsErroneous<kVerifyFlags>());
   return GetFieldObject<PointerArray, kVerifyFlags, kReadBarrierOption>(
       OFFSET_OF_OBJECT_MEMBER(Class, vtable_));
 }
@@ -372,7 +374,7 @@
     // to access the field if the FieldId specifies an accessible subclass of the declaring
     // class rather than the declaring class itself.
     ObjPtr<DexCache> referrer_dex_cache = use_referrers_cache ? this->GetDexCache() : dex_cache;
-    uint32_t class_idx = referrer_dex_cache->GetDexFile()->GetFieldId(field_idx).class_idx_;
+    dex::TypeIndex class_idx = referrer_dex_cache->GetDexFile()->GetFieldId(field_idx).class_idx_;
     // The referenced class has already been resolved with the field, but may not be in the dex
     // cache. Use LookupResolveType here to search the class table if it is not in the dex cache.
     // should be no thread suspension due to the class being resolved.
@@ -410,7 +412,7 @@
     // to access the method if the MethodId specifies an accessible subclass of the declaring
     // class rather than the declaring class itself.
     ObjPtr<DexCache> referrer_dex_cache = use_referrers_cache ? this->GetDexCache() : dex_cache;
-    uint32_t class_idx = referrer_dex_cache->GetDexFile()->GetMethodId(method_idx).class_idx_;
+    dex::TypeIndex class_idx = referrer_dex_cache->GetDexFile()->GetMethodId(method_idx).class_idx_;
     // The referenced class has already been resolved with the method, but may not be in the dex
     // cache.
     ObjPtr<Class> dex_access_to = Runtime::Current()->GetClassLinker()->LookupResolvedType(
@@ -526,20 +528,18 @@
 template<VerifyObjectFlags kVerifyFlags,
          ReadBarrierOption kReadBarrierOption>
 inline IfTable* Class::GetIfTable() {
-  return GetFieldObject<IfTable, kVerifyFlags, kReadBarrierOption>(
-      OFFSET_OF_OBJECT_MEMBER(Class, iftable_));
+  ObjPtr<IfTable> ret = GetFieldObject<IfTable, kVerifyFlags, kReadBarrierOption>(IfTableOffset());
+  DCHECK(ret != nullptr) << PrettyClass(this);
+  return ret.Ptr();
 }
 
 inline int32_t Class::GetIfTableCount() {
-  ObjPtr<IfTable> iftable = GetIfTable();
-  if (iftable == nullptr) {
-    return 0;
-  }
-  return iftable->Count();
+  return GetIfTable()->Count();
 }
 
 inline void Class::SetIfTable(ObjPtr<IfTable> new_iftable) {
-  SetFieldObject<false>(OFFSET_OF_OBJECT_MEMBER(Class, iftable_), new_iftable);
+  DCHECK(new_iftable != nullptr) << PrettyClass(this);
+  SetFieldObject<false>(IfTableOffset(), new_iftable);
 }
 
 inline LengthPrefixedArray<ArtField>* Class::GetIFieldsPtr() {
@@ -896,7 +896,8 @@
   klass->SetClassSize(class_size_);
   klass->SetPrimitiveType(Primitive::kPrimNot);  // Default to not being primitive.
   klass->SetDexClassDefIndex(DexFile::kDexNoIndex16);  // Default to no valid class def index.
-  klass->SetDexTypeIndex(DexFile::kDexNoIndex16);  // Default to no valid type index.
+  klass->SetDexTypeIndex(dex::TypeIndex(DexFile::kDexNoIndex16));  // Default to no valid type
+                                                                   // index.
   // Default to force slow path until initialized.
   klass->SetObjectSizeAllocFastPath(std::numeric_limits<uint32_t>::max());
 }
diff --git a/runtime/mirror/class.cc b/runtime/mirror/class.cc
index 6a357b3..0cfe29b 100644
--- a/runtime/mirror/class.cc
+++ b/runtime/mirror/class.cc
@@ -18,6 +18,7 @@
 
 #include "art_field-inl.h"
 #include "art_method-inl.h"
+#include "class_ext.h"
 #include "class_linker-inl.h"
 #include "class_loader.h"
 #include "class-inl.h"
@@ -29,6 +30,7 @@
 #include "method.h"
 #include "object_array-inl.h"
 #include "object-inl.h"
+#include "object_lock.h"
 #include "runtime.h"
 #include "thread.h"
 #include "throwable.h"
@@ -58,12 +60,49 @@
   java_lang_Class_.VisitRootIfNonNull(visitor, RootInfo(kRootStickyClass));
 }
 
-inline void Class::SetVerifyError(ObjPtr<Object> error) {
-  CHECK(error != nullptr) << PrettyClass();
-  if (Runtime::Current()->IsActiveTransaction()) {
-    SetFieldObject<true>(OFFSET_OF_OBJECT_MEMBER(Class, verify_error_), error);
+ClassExt* Class::GetExtData() {
+  return GetFieldObject<ClassExt>(OFFSET_OF_OBJECT_MEMBER(Class, ext_data_));
+}
+
+ClassExt* Class::EnsureExtDataPresent(Thread* self) {
+  ObjPtr<ClassExt> existing(GetExtData());
+  if (!existing.IsNull()) {
+    return existing.Ptr();
+  }
+  StackHandleScope<3> hs(self);
+  // Handlerize 'this' since we are allocating here.
+  Handle<Class> h_this(hs.NewHandle(this));
+  // Clear exception so we can allocate.
+  Handle<Throwable> throwable(hs.NewHandle(self->GetException()));
+  self->ClearException();
+  // Allocate the ClassExt
+  Handle<ClassExt> new_ext(hs.NewHandle(ClassExt::Alloc(self)));
+  if (new_ext.Get() == nullptr) {
+    // OOM allocating the classExt.
+    // TODO Should we restore the suppressed exception?
+    self->AssertPendingOOMException();
+    return nullptr;
   } else {
-    SetFieldObject<false>(OFFSET_OF_OBJECT_MEMBER(Class, verify_error_), error);
+    MemberOffset ext_offset(OFFSET_OF_OBJECT_MEMBER(Class, ext_data_));
+    bool set;
+    // Set the ext_data_ field using CAS semantics.
+    if (Runtime::Current()->IsActiveTransaction()) {
+      set = h_this->CasFieldStrongSequentiallyConsistentObject<true>(ext_offset,
+                                                                     ObjPtr<ClassExt>(nullptr),
+                                                                     new_ext.Get());
+    } else {
+      set = h_this->CasFieldStrongSequentiallyConsistentObject<false>(ext_offset,
+                                                                      ObjPtr<ClassExt>(nullptr),
+                                                                      new_ext.Get());
+    }
+    ObjPtr<ClassExt> ret(set ? new_ext.Get() : h_this->GetExtData());
+    DCHECK(!set || h_this->GetExtData() == new_ext.Get());
+    CHECK(!ret.IsNull());
+    // Restore the exception if there was one.
+    if (throwable.Get() != nullptr) {
+      self->SetException(throwable.Get());
+    }
+    return ret.Ptr();
   }
 }
 
@@ -95,10 +134,16 @@
       }
     }
 
-    // Remember the current exception.
-    CHECK(self->GetException() != nullptr);
-    h_this->SetVerifyError(self->GetException());
+    ObjPtr<ClassExt> ext(h_this->EnsureExtDataPresent(self));
+    if (!ext.IsNull()) {
+      self->AssertPendingException();
+      ext->SetVerifyError(self->GetException());
+    } else {
+      self->AssertPendingOOMException();
+    }
+    self->AssertPendingException();
   }
+
   static_assert(sizeof(Status) == sizeof(uint32_t), "Size of status not equal to uint32");
   if (Runtime::Current()->IsActiveTransaction()) {
     h_this->SetField32Volatile<true>(StatusOffset(), new_status);
@@ -878,7 +923,7 @@
   return &GetDexFile().GetClassDef(class_def_idx);
 }
 
-uint16_t Class::GetDirectInterfaceTypeIdx(uint32_t idx) {
+dex::TypeIndex Class::GetDirectInterfaceTypeIdx(uint32_t idx) {
   DCHECK(!IsPrimitive());
   DCHECK(!IsArrayClass());
   return GetInterfaceTypeList()->GetTypeItem(idx).type_idx_;
@@ -902,10 +947,11 @@
     DCHECK(interfaces != nullptr);
     return interfaces->Get(idx);
   } else {
-    uint16_t type_idx = klass->GetDirectInterfaceTypeIdx(idx);
+    dex::TypeIndex type_idx = klass->GetDirectInterfaceTypeIdx(idx);
     ObjPtr<Class> interface = klass->GetDexCache()->GetResolvedType(type_idx);
     if (interface == nullptr) {
-      interface = Runtime::Current()->GetClassLinker()->ResolveType(klass->GetDexFile(), type_idx,
+      interface = Runtime::Current()->GetClassLinker()->ResolveType(klass->GetDexFile(),
+                                                                    type_idx,
                                                                     klass.Get());
       CHECK(interface != nullptr || self->IsExceptionPending());
     }
@@ -1085,10 +1131,12 @@
   return depth;
 }
 
-uint32_t Class::FindTypeIndexInOtherDexFile(const DexFile& dex_file) {
+dex::TypeIndex Class::FindTypeIndexInOtherDexFile(const DexFile& dex_file) {
   std::string temp;
   const DexFile::TypeId* type_id = dex_file.FindTypeId(GetDescriptor(&temp));
-  return (type_id == nullptr) ? DexFile::kDexNoIndex : dex_file.GetIndexForTypeId(*type_id);
+  return (type_id == nullptr)
+      ? dex::TypeIndex(DexFile::kDexNoIndex)
+      : dex_file.GetIndexForTypeId(*type_id);
 }
 
 template <PointerSize kPointerSize, bool kTransactionActive>
diff --git a/runtime/mirror/class.h b/runtime/mirror/class.h
index 5793795..248c941 100644
--- a/runtime/mirror/class.h
+++ b/runtime/mirror/class.h
@@ -20,6 +20,7 @@
 #include "base/enums.h"
 #include "base/iteration_range.h"
 #include "dex_file.h"
+#include "dex_file_types.h"
 #include "class_flags.h"
 #include "gc_root.h"
 #include "gc/allocator_type.h"
@@ -49,6 +50,7 @@
 
 namespace mirror {
 
+class ClassExt;
 class ClassLoader;
 class Constructor;
 class DexCache;
@@ -561,7 +563,7 @@
   // The size of java.lang.Class.class.
   static uint32_t ClassClassSize(PointerSize pointer_size) {
     // The number of vtable entries in java.lang.Class.
-    uint32_t vtable_entries = Object::kVTableLength + 72;
+    uint32_t vtable_entries = Object::kVTableLength + 73;
     return ComputeClassSize(true, vtable_entries, 0, 0, 4, 1, 0, pointer_size);
   }
 
@@ -672,6 +674,8 @@
     return MemberOffset(OFFSETOF_MEMBER(Class, super_class_));
   }
 
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+           ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   ClassLoader* GetClassLoader() ALWAYS_INLINE REQUIRES_SHARED(Locks::mutator_lock_);
 
   void SetClassLoader(ObjPtr<ClassLoader> new_cl) REQUIRES_SHARED(Locks::mutator_lock_);
@@ -680,6 +684,10 @@
     return MemberOffset(OFFSETOF_MEMBER(Class, dex_cache_));
   }
 
+  static MemberOffset IfTableOffset() {
+    return MemberOffset(OFFSETOF_MEMBER(Class, iftable_));
+  }
+
   enum {
     kDumpClassFullDetail = 1,
     kDumpClassClassLoader = (1 << 1),
@@ -1126,10 +1134,13 @@
 
   void SetClinitThreadId(pid_t new_clinit_thread_id) REQUIRES_SHARED(Locks::mutator_lock_);
 
-  Object* GetVerifyError() REQUIRES_SHARED(Locks::mutator_lock_) {
-    // DCHECK(IsErroneous());
-    return GetFieldObject<Class>(OFFSET_OF_OBJECT_MEMBER(Class, verify_error_));
-  }
+  ClassExt* GetExtData() REQUIRES_SHARED(Locks::mutator_lock_);
+
+  // Returns the ExtData for this class, allocating one if necessary. This should be the only way
+  // to force ext_data_ to be set. No functions are available for changing an already set ext_data_
+  // since doing so is not allowed.
+  ClassExt* EnsureExtDataPresent(Thread* self)
+      REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(!Roles::uninterruptible_);
 
   uint16_t GetDexClassDefIndex() REQUIRES_SHARED(Locks::mutator_lock_) {
     return GetField32(OFFSET_OF_OBJECT_MEMBER(Class, dex_class_def_idx_));
@@ -1140,16 +1151,17 @@
     SetField32<false>(OFFSET_OF_OBJECT_MEMBER(Class, dex_class_def_idx_), class_def_idx);
   }
 
-  uint16_t GetDexTypeIndex() REQUIRES_SHARED(Locks::mutator_lock_) {
-    return GetField32(OFFSET_OF_OBJECT_MEMBER(Class, dex_type_idx_));
+  dex::TypeIndex GetDexTypeIndex() REQUIRES_SHARED(Locks::mutator_lock_) {
+    return dex::TypeIndex(
+        static_cast<uint16_t>(GetField32(OFFSET_OF_OBJECT_MEMBER(Class, dex_type_idx_))));
   }
 
-  void SetDexTypeIndex(uint16_t type_idx) REQUIRES_SHARED(Locks::mutator_lock_) {
+  void SetDexTypeIndex(dex::TypeIndex type_idx) REQUIRES_SHARED(Locks::mutator_lock_) {
     // Not called within a transaction.
-    SetField32<false>(OFFSET_OF_OBJECT_MEMBER(Class, dex_type_idx_), type_idx);
+    SetField32<false>(OFFSET_OF_OBJECT_MEMBER(Class, dex_type_idx_), type_idx.index_);
   }
 
-  uint32_t FindTypeIndexInOtherDexFile(const DexFile& dex_file)
+  dex::TypeIndex FindTypeIndexInOtherDexFile(const DexFile& dex_file)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   static Class* GetJavaLangClass() REQUIRES_SHARED(Locks::mutator_lock_) {
@@ -1190,7 +1202,7 @@
 
   ALWAYS_INLINE uint32_t NumDirectInterfaces() REQUIRES_SHARED(Locks::mutator_lock_);
 
-  uint16_t GetDirectInterfaceTypeIdx(uint32_t idx) REQUIRES_SHARED(Locks::mutator_lock_);
+  dex::TypeIndex GetDirectInterfaceTypeIdx(uint32_t idx) REQUIRES_SHARED(Locks::mutator_lock_);
 
   static ObjPtr<Class> GetDirectInterface(Thread* self,
                                           Handle<Class> klass,
@@ -1318,8 +1330,6 @@
   ALWAYS_INLINE void SetMethodsPtrInternal(LengthPrefixedArray<ArtMethod>* new_methods)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  void SetVerifyError(ObjPtr<Object> klass) REQUIRES_SHARED(Locks::mutator_lock_);
-
   template <bool throw_on_failure, bool use_referrers_cache>
   bool ResolvedFieldAccessTest(ObjPtr<Class> access_to,
                                ArtField* field,
@@ -1384,6 +1394,12 @@
   // runtime such as arrays and primitive classes).
   HeapReference<DexCache> dex_cache_;
 
+  // Extraneous class data that is not always needed. This field is allocated lazily and may
+  // only be set with 'this' locked. This is synchronized on 'this'.
+  // TODO(allight) We should probably synchronize it on something external or handle allocation in
+  // some other (safe) way to prevent possible deadlocks.
+  HeapReference<ClassExt> ext_data_;
+
   // The interface table (iftable_) contains pairs of a interface class and an array of the
   // interface methods. There is one pair per interface supported by this class.  That means one
   // pair for each interface we support directly, indirectly via superclass, or indirectly via a
@@ -1408,10 +1424,6 @@
   // check for interfaces and return null.
   HeapReference<Class> super_class_;
 
-  // If class verify fails, we must return same error on subsequent tries. We may store either
-  // the class of the error, or an actual instance of Throwable here.
-  HeapReference<Object> verify_error_;
-
   // Virtual method table (vtable), for use by "invoke-virtual".  The vtable from the superclass is
   // copied in, and virtual methods from our class either replace those from the super or are
   // appended. For abstract classes, methods may be created in the vtable that aren't in
diff --git a/runtime/mirror/class_ext.cc b/runtime/mirror/class_ext.cc
new file mode 100644
index 0000000..259bbbe
--- /dev/null
+++ b/runtime/mirror/class_ext.cc
@@ -0,0 +1,130 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "class_ext.h"
+
+#include "art_method-inl.h"
+#include "base/casts.h"
+#include "base/enums.h"
+#include "class-inl.h"
+#include "dex_file-inl.h"
+#include "gc/accounting/card_table-inl.h"
+#include "object-inl.h"
+#include "object_array.h"
+#include "object_array-inl.h"
+#include "stack_trace_element.h"
+#include "utils.h"
+#include "well_known_classes.h"
+
+namespace art {
+namespace mirror {
+
+GcRoot<Class> ClassExt::dalvik_system_ClassExt_;
+
+void ClassExt::SetObsoleteArrays(ObjPtr<PointerArray> methods,
+                                 ObjPtr<ObjectArray<DexCache>> dex_caches) {
+  DCHECK_EQ(GetLockOwnerThreadId(), Thread::Current()->GetThreadId())
+      << "Obsolete arrays are set without synchronization!";
+  CHECK_EQ(methods.IsNull(), dex_caches.IsNull());
+  auto obsolete_dex_cache_off = OFFSET_OF_OBJECT_MEMBER(ClassExt, obsolete_dex_caches_);
+  auto obsolete_methods_off = OFFSET_OF_OBJECT_MEMBER(ClassExt, obsolete_methods_);
+  DCHECK(!Runtime::Current()->IsActiveTransaction());
+  SetFieldObject<false>(obsolete_dex_cache_off, dex_caches.Ptr());
+  SetFieldObject<false>(obsolete_methods_off, methods.Ptr());
+}
+
+// TODO We really need to be careful how we update this. If we ever in the future make it so that
+// these arrays are written into without all threads being suspended we have a race condition!
+bool ClassExt::ExtendObsoleteArrays(Thread* self, uint32_t increase) {
+  DCHECK_EQ(GetLockOwnerThreadId(), Thread::Current()->GetThreadId())
+      << "Obsolete arrays are set without synchronization!";
+  StackHandleScope<5> hs(self);
+  Handle<ClassExt> h_this(hs.NewHandle(this));
+  Handle<PointerArray> old_methods(hs.NewHandle(h_this->GetObsoleteMethods()));
+  Handle<ObjectArray<DexCache>> old_dex_caches(hs.NewHandle(h_this->GetObsoleteDexCaches()));
+  ClassLinker* cl = Runtime::Current()->GetClassLinker();
+  size_t new_len;
+  if (old_methods.Get() == nullptr) {
+    CHECK(old_dex_caches.Get() == nullptr);
+    new_len = increase;
+  } else {
+    CHECK_EQ(old_methods->GetLength(), old_dex_caches->GetLength());
+    new_len = increase + old_methods->GetLength();
+  }
+  Handle<PointerArray> new_methods(hs.NewHandle<PointerArray>(
+      cl->AllocPointerArray(self, new_len)));
+  if (new_methods.IsNull()) {
+    // Fail.
+    self->AssertPendingOOMException();
+    return false;
+  }
+  Handle<ObjectArray<DexCache>> new_dex_caches(hs.NewHandle<ObjectArray<DexCache>>(
+      ObjectArray<DexCache>::Alloc(self,
+                                   cl->FindClass(self,
+                                                 "[Ljava/lang/DexCache;",
+                                                 ScopedNullHandle<ClassLoader>()),
+                                   new_len)));
+  if (new_dex_caches.IsNull()) {
+    // Fail.
+    self->AssertPendingOOMException();
+    return false;
+  }
+
+  if (!old_methods.IsNull()) {
+    // Copy the old contents.
+    new_methods->Memcpy(0,
+                        old_methods.Get(),
+                        0,
+                        old_methods->GetLength(),
+                        cl->GetImagePointerSize());
+    new_dex_caches->AsObjectArray<Object>()->AssignableCheckingMemcpy<false>(
+        0, old_dex_caches->AsObjectArray<Object>(), 0, old_dex_caches->GetLength(), false);
+  }
+  // Set the fields.
+  h_this->SetObsoleteArrays(new_methods.Get(), new_dex_caches.Get());
+
+  return true;
+}
+
+ClassExt* ClassExt::Alloc(Thread* self) {
+  DCHECK(dalvik_system_ClassExt_.Read() != nullptr);
+  return down_cast<ClassExt*>(dalvik_system_ClassExt_.Read()->AllocObject(self).Ptr());
+}
+
+void ClassExt::SetVerifyError(ObjPtr<Object> err) {
+  if (Runtime::Current()->IsActiveTransaction()) {
+    SetFieldObject<true>(OFFSET_OF_OBJECT_MEMBER(ClassExt, verify_error_), err);
+  } else {
+    SetFieldObject<false>(OFFSET_OF_OBJECT_MEMBER(ClassExt, verify_error_), err);
+  }
+}
+
+void ClassExt::SetClass(ObjPtr<Class> dalvik_system_ClassExt) {
+  CHECK(dalvik_system_ClassExt != nullptr);
+  dalvik_system_ClassExt_ = GcRoot<Class>(dalvik_system_ClassExt);
+}
+
+void ClassExt::ResetClass() {
+  CHECK(!dalvik_system_ClassExt_.IsNull());
+  dalvik_system_ClassExt_ = GcRoot<Class>(nullptr);
+}
+
+void ClassExt::VisitRoots(RootVisitor* visitor) {
+  dalvik_system_ClassExt_.VisitRootIfNonNull(visitor, RootInfo(kRootStickyClass));
+}
+
+}  // namespace mirror
+}  // namespace art
diff --git a/runtime/mirror/class_ext.h b/runtime/mirror/class_ext.h
new file mode 100644
index 0000000..9104631
--- /dev/null
+++ b/runtime/mirror/class_ext.h
@@ -0,0 +1,97 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_MIRROR_CLASS_EXT_H_
+#define ART_RUNTIME_MIRROR_CLASS_EXT_H_
+
+#include "class-inl.h"
+
+#include "array.h"
+#include "dex_cache.h"
+#include "gc_root.h"
+#include "object.h"
+#include "object_array.h"
+#include "object_callbacks.h"
+#include "string.h"
+
+namespace art {
+
+struct ClassExtOffsets;
+
+namespace mirror {
+
+// C++ mirror of dalvik.system.ClassExt
+class MANAGED ClassExt : public Object {
+ public:
+  static uint32_t ClassSize(PointerSize pointer_size) {
+    uint32_t vtable_entries = Object::kVTableLength;
+    return Class::ComputeClassSize(true, vtable_entries, 0, 0, 0, 0, 0, pointer_size);
+  }
+
+  // Size of an instance of dalvik.system.ClassExt.
+  static constexpr uint32_t InstanceSize() {
+    return sizeof(ClassExt);
+  }
+
+  void SetVerifyError(ObjPtr<Object> obj) REQUIRES_SHARED(Locks::mutator_lock_);
+
+  Object* GetVerifyError() REQUIRES_SHARED(Locks::mutator_lock_) {
+    return GetFieldObject<ClassExt>(OFFSET_OF_OBJECT_MEMBER(ClassExt, verify_error_));
+  }
+
+  ObjectArray<DexCache>* GetObsoleteDexCaches() REQUIRES_SHARED(Locks::mutator_lock_) {
+    return GetFieldObject<ObjectArray<DexCache>>(
+        OFFSET_OF_OBJECT_MEMBER(ClassExt, obsolete_dex_caches_));
+  }
+
+  PointerArray* GetObsoleteMethods() REQUIRES_SHARED(Locks::mutator_lock_) {
+    return GetFieldObject<PointerArray>(OFFSET_OF_OBJECT_MEMBER(ClassExt, obsolete_methods_));
+  }
+
+  void SetObsoleteArrays(ObjPtr<PointerArray> methods, ObjPtr<ObjectArray<DexCache>> dex_caches)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
+  // Extend the obsolete arrays by the given amount.
+  bool ExtendObsoleteArrays(Thread* self, uint32_t increase)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
+  static void SetClass(ObjPtr<Class> dalvik_system_ClassExt);
+  static void ResetClass();
+  static void VisitRoots(RootVisitor* visitor) REQUIRES_SHARED(Locks::mutator_lock_);
+
+  static ClassExt* Alloc(Thread* self) REQUIRES_SHARED(Locks::mutator_lock_);
+
+ private:
+  // Field order required by test "ValidateFieldOrderOfJavaCppUnionClasses".
+  HeapReference<ObjectArray<DexCache>> obsolete_dex_caches_;
+
+  HeapReference<PointerArray> obsolete_methods_;
+
+  HeapReference<DexCache> original_dex_cache_;
+
+  // The saved verification error of this class.
+  HeapReference<Object> verify_error_;
+
+  static GcRoot<Class> dalvik_system_ClassExt_;
+
+  friend struct art::ClassExtOffsets;  // for verifying offset information
+  DISALLOW_IMPLICIT_CONSTRUCTORS(ClassExt);
+};
+
+}  // namespace mirror
+}  // namespace art
+
+#endif  // ART_RUNTIME_MIRROR_CLASS_EXT_H_
diff --git a/runtime/mirror/dex_cache-inl.h b/runtime/mirror/dex_cache-inl.h
index df3865b..be8815a 100644
--- a/runtime/mirror/dex_cache-inl.h
+++ b/runtime/mirror/dex_cache-inl.h
@@ -40,13 +40,14 @@
   return Class::ComputeClassSize(true, vtable_entries, 0, 0, 0, 0, 0, pointer_size);
 }
 
-inline mirror::String* DexCache::GetResolvedString(uint32_t string_idx) {
-  DCHECK_LT(string_idx, GetDexFile()->NumStringIds());
-  return StringDexCachePair::Lookup(GetStrings(), string_idx, NumStrings()).Read();
+inline mirror::String* DexCache::GetResolvedString(dex::StringIndex string_idx) {
+  DCHECK_LT(string_idx.index_, GetDexFile()->NumStringIds());
+  return StringDexCachePair::Lookup(GetStrings(), string_idx.index_, NumStrings()).Read();
 }
 
-inline void DexCache::SetResolvedString(uint32_t string_idx, ObjPtr<mirror::String> resolved) {
-  StringDexCachePair::Assign(GetStrings(), string_idx, resolved.Ptr(), NumStrings());
+inline void DexCache::SetResolvedString(dex::StringIndex string_idx,
+                                        ObjPtr<mirror::String> resolved) {
+  StringDexCachePair::Assign(GetStrings(), string_idx.index_, resolved.Ptr(), NumStrings());
   Runtime* const runtime = Runtime::Current();
   if (UNLIKELY(runtime->IsActiveTransaction())) {
     DCHECK(runtime->IsAotCompiler());
@@ -56,12 +57,12 @@
   runtime->GetHeap()->WriteBarrierEveryFieldOf(this);
 }
 
-inline void DexCache::ClearString(uint32_t string_idx) {
-  const uint32_t slot_idx = string_idx % NumStrings();
+inline void DexCache::ClearString(dex::StringIndex string_idx) {
+  const uint32_t slot_idx = string_idx.index_ % NumStrings();
   DCHECK(Runtime::Current()->IsAotCompiler());
   StringDexCacheType* slot = &GetStrings()[slot_idx];
   // This is racy but should only be called from the transactional interpreter.
-  if (slot->load(std::memory_order_relaxed).index == string_idx) {
+  if (slot->load(std::memory_order_relaxed).index == string_idx.index_) {
     StringDexCachePair cleared(
         nullptr,
         StringDexCachePair::InvalidIndexForSlot(slot_idx));
@@ -69,15 +70,15 @@
   }
 }
 
-inline Class* DexCache::GetResolvedType(uint32_t type_idx) {
-  DCHECK_LT(type_idx, NumResolvedTypes());
-  return GetResolvedTypes()[type_idx].Read();
+inline Class* DexCache::GetResolvedType(dex::TypeIndex type_idx) {
+  DCHECK_LT(type_idx.index_, NumResolvedTypes());
+  return GetResolvedTypes()[type_idx.index_].Read();
 }
 
-inline void DexCache::SetResolvedType(uint32_t type_idx, ObjPtr<Class> resolved) {
-  DCHECK_LT(type_idx, NumResolvedTypes());  // NOTE: Unchecked, i.e. not throwing AIOOB.
+inline void DexCache::SetResolvedType(dex::TypeIndex type_idx, ObjPtr<Class> resolved) {
+  DCHECK_LT(type_idx.index_, NumResolvedTypes());  // NOTE: Unchecked, i.e. not throwing AIOOB.
   // TODO default transaction support.
-  GetResolvedTypes()[type_idx] = GcRoot<Class>(resolved);
+  GetResolvedTypes()[type_idx.index_] = GcRoot<Class>(resolved);
   // TODO: Fine-grained marking, so that we don't need to go through all arrays in full.
   Runtime::Current()->GetHeap()->WriteBarrierEveryFieldOf(this);
 }
@@ -174,14 +175,9 @@
     // tell the compiler to treat "Read" as a template rather than a field or
     // function. Otherwise, on encountering the "<" token, the compiler would
     // treat "Read" as a field.
-    T* before = source.object.template Read<kReadBarrierOption>();
-    // TODO(narayan): This additional GC root construction and assignment
-    // is unnecessary. We're already operating on a copy of the DexCachePair
-    // that's in the cache.
-    GcRoot<T> root(before);
-    visitor.VisitRootIfNonNull(root.AddressWithoutBarrier());
-    if (root.Read() != before) {
-      source.object = GcRoot<T>(root.Read());
+    T* const before = source.object.template Read<kReadBarrierOption>();
+    visitor.VisitRootIfNonNull(source.object.AddressWithoutBarrier());
+    if (source.object.template Read<kReadBarrierOption>() != before) {
       pairs[i].store(source, std::memory_order_relaxed);
     }
   }
diff --git a/runtime/mirror/dex_cache.h b/runtime/mirror/dex_cache.h
index 1ae694d..cc4d01a 100644
--- a/runtime/mirror/dex_cache.h
+++ b/runtime/mirror/dex_cache.h
@@ -21,6 +21,7 @@
 #include "art_field.h"
 #include "art_method.h"
 #include "class.h"
+#include "dex_file_types.h"
 #include "object.h"
 #include "object_array.h"
 
@@ -213,19 +214,19 @@
     return OFFSET_OF_OBJECT_MEMBER(DexCache, num_resolved_method_types_);
   }
 
-  mirror::String* GetResolvedString(uint32_t string_idx) ALWAYS_INLINE
+  mirror::String* GetResolvedString(dex::StringIndex string_idx) ALWAYS_INLINE
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  void SetResolvedString(uint32_t string_idx, ObjPtr<mirror::String> resolved) ALWAYS_INLINE
+  void SetResolvedString(dex::StringIndex string_idx, ObjPtr<mirror::String> resolved) ALWAYS_INLINE
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Clear a string for a string_idx, used to undo string intern transactions to make sure
   // the string isn't kept live.
-  void ClearString(uint32_t string_idx) REQUIRES_SHARED(Locks::mutator_lock_);
+  void ClearString(dex::StringIndex string_idx) REQUIRES_SHARED(Locks::mutator_lock_);
 
-  Class* GetResolvedType(uint32_t type_idx) REQUIRES_SHARED(Locks::mutator_lock_);
+  Class* GetResolvedType(dex::TypeIndex type_idx) REQUIRES_SHARED(Locks::mutator_lock_);
 
-  void SetResolvedType(uint32_t type_idx, ObjPtr<Class> resolved)
+  void SetResolvedType(dex::TypeIndex type_idx, ObjPtr<Class> resolved)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   ALWAYS_INLINE ArtMethod* GetResolvedMethod(uint32_t method_idx, PointerSize ptr_size)
diff --git a/runtime/mirror/emulated_stack_frame.cc b/runtime/mirror/emulated_stack_frame.cc
index 4ba71ea..d607040 100644
--- a/runtime/mirror/emulated_stack_frame.cc
+++ b/runtime/mirror/emulated_stack_frame.cc
@@ -173,13 +173,22 @@
 
   Handle<mirror::ObjectArray<mirror::Object>> references(hs.NewHandle(
       mirror::ObjectArray<mirror::Object>::Alloc(self, array_class, refs_size)));
+  if (references.Get() == nullptr) {
+    DCHECK(self->IsExceptionPending());
+    return nullptr;
+  }
+
   Handle<ByteArray> stack_frame(hs.NewHandle(ByteArray::Alloc(self, frame_size)));
+  if (stack_frame.Get() == nullptr) {
+    DCHECK(self->IsExceptionPending());
+    return nullptr;
+  }
 
   // Step 4 : Perform argument conversions (if required).
   ShadowFrameGetter<is_range> getter(first_src_reg, arg, caller_frame);
   EmulatedStackFrameAccessor setter(references, stack_frame, stack_frame->GetLength());
   if (!PerformConversions<ShadowFrameGetter<is_range>, EmulatedStackFrameAccessor>(
-      self, from_types, to_types, &getter, &setter, num_method_params)) {
+          self, caller_type, callee_type, &getter, &setter, num_method_params)) {
     return nullptr;
   }
 
@@ -197,9 +206,8 @@
                                             Handle<mirror::MethodType> callee_type,
                                             const uint32_t first_dest_reg,
                                             ShadowFrame* callee_frame) {
-  StackHandleScope<4> hs(self);
-  Handle<mirror::ObjectArray<mirror::Class>> from_types(hs.NewHandle(GetType()->GetPTypes()));
-  Handle<mirror::ObjectArray<mirror::Class>> to_types(hs.NewHandle(callee_type->GetPTypes()));
+  ObjPtr<mirror::ObjectArray<mirror::Class>> from_types(GetType()->GetPTypes());
+  ObjPtr<mirror::ObjectArray<mirror::Class>> to_types(callee_type->GetPTypes());
 
   const int32_t num_method_params = from_types->GetLength();
   if (to_types->GetLength() != num_method_params) {
@@ -207,6 +215,8 @@
     return false;
   }
 
+  StackHandleScope<3> hs(self);
+  Handle<mirror::MethodType> frame_callsite_type(hs.NewHandle(GetType()));
   Handle<mirror::ObjectArray<mirror::Object>> references(hs.NewHandle(GetReferences()));
   Handle<ByteArray> stack_frame(hs.NewHandle(GetStackFrame()));
 
@@ -214,7 +224,7 @@
   ShadowFrameSetter setter(callee_frame, first_dest_reg);
 
   return PerformConversions<EmulatedStackFrameAccessor, ShadowFrameSetter>(
-      self, from_types, to_types, &getter, &setter, num_method_params);
+      self, frame_callsite_type, callee_type, &getter, &setter, num_method_params);
 }
 
 void EmulatedStackFrame::GetReturnValue(Thread* self, JValue* value) {
diff --git a/runtime/mirror/emulated_stack_frame.h b/runtime/mirror/emulated_stack_frame.h
index 9fa06b7..d83a536 100644
--- a/runtime/mirror/emulated_stack_frame.h
+++ b/runtime/mirror/emulated_stack_frame.h
@@ -58,6 +58,10 @@
   // Sets the return value slot of this emulated stack frame to |value|.
   void SetReturnValue(Thread* self, const JValue& value) REQUIRES_SHARED(Locks::mutator_lock_);
 
+  mirror::MethodType* GetType() REQUIRES_SHARED(Locks::mutator_lock_) {
+    return GetFieldObject<MethodType>(OFFSET_OF_OBJECT_MEMBER(EmulatedStackFrame, type_));
+  }
+
   static void SetClass(Class* klass) REQUIRES_SHARED(Locks::mutator_lock_);
   static void ResetClass() REQUIRES_SHARED(Locks::mutator_lock_);
   static void VisitRoots(RootVisitor* visitor) REQUIRES_SHARED(Locks::mutator_lock_);
@@ -67,10 +71,6 @@
     return static_class_.Read();
   }
 
-  mirror::MethodType* GetType() REQUIRES_SHARED(Locks::mutator_lock_) {
-    return GetFieldObject<MethodType>(OFFSET_OF_OBJECT_MEMBER(EmulatedStackFrame, type_));
-  }
-
   mirror::ObjectArray<mirror::Object>* GetReferences() REQUIRES_SHARED(Locks::mutator_lock_) {
     return GetFieldObject<mirror::ObjectArray<mirror::Object>>(
         OFFSET_OF_OBJECT_MEMBER(EmulatedStackFrame, references_));
diff --git a/runtime/mirror/method_handle_impl.cc b/runtime/mirror/method_handle_impl.cc
index fdfaaa8..4f1c448 100644
--- a/runtime/mirror/method_handle_impl.cc
+++ b/runtime/mirror/method_handle_impl.cc
@@ -22,6 +22,12 @@
 namespace art {
 namespace mirror {
 
+mirror::Class* MethodHandle::StaticClass() {
+  mirror::Class* klass = MethodHandleImpl::StaticClass()->GetSuperClass();
+  DCHECK(klass->DescriptorEquals("Ljava/lang/invoke/MethodHandle;"));
+  return klass;
+}
+
 GcRoot<mirror::Class> MethodHandleImpl::static_class_;
 
 void MethodHandleImpl::SetClass(Class* klass) {
diff --git a/runtime/mirror/method_handle_impl.h b/runtime/mirror/method_handle_impl.h
index 7bf9c5b..5ea82b5 100644
--- a/runtime/mirror/method_handle_impl.h
+++ b/runtime/mirror/method_handle_impl.h
@@ -36,6 +36,10 @@
     return GetFieldObject<mirror::MethodType>(OFFSET_OF_OBJECT_MEMBER(MethodHandle, method_type_));
   }
 
+  mirror::MethodType* GetNominalType() REQUIRES_SHARED(Locks::mutator_lock_) {
+    return GetFieldObject<mirror::MethodType>(OFFSET_OF_OBJECT_MEMBER(MethodHandle, nominal_type_));
+  }
+
   ArtField* GetTargetField() REQUIRES_SHARED(Locks::mutator_lock_) {
     return reinterpret_cast<ArtField*>(
         GetField64(OFFSET_OF_OBJECT_MEMBER(MethodHandle, art_field_or_method_)));
@@ -53,15 +57,17 @@
     return static_cast<MethodHandleKind>(handle_kind);
   }
 
+  static mirror::Class* StaticClass() REQUIRES_SHARED(Locks::mutator_lock_);
+
  private:
-  HeapReference<mirror::Object> as_type_cache_;
+  HeapReference<mirror::MethodType> nominal_type_;
   HeapReference<mirror::MethodType> method_type_;
   uint64_t art_field_or_method_;
   uint32_t handle_kind_;
 
  private:
-  static MemberOffset AsTypeCacheOffset() {
-    return MemberOffset(OFFSETOF_MEMBER(MethodHandle, as_type_cache_));
+  static MemberOffset NominalTypeOffset() {
+    return MemberOffset(OFFSETOF_MEMBER(MethodHandle, nominal_type_));
   }
   static MemberOffset MethodTypeOffset() {
     return MemberOffset(OFFSETOF_MEMBER(MethodHandle, method_type_));
diff --git a/runtime/mirror/method_type.cc b/runtime/mirror/method_type.cc
index 0b52931..5d77a16 100644
--- a/runtime/mirror/method_type.cc
+++ b/runtime/mirror/method_type.cc
@@ -18,6 +18,7 @@
 
 #include "class-inl.h"
 #include "gc_root-inl.h"
+#include "method_handles.h"
 
 namespace art {
 namespace mirror {
@@ -43,28 +44,66 @@
   return mt.Get();
 }
 
-bool MethodType::IsExactMatch(mirror::MethodType* other) REQUIRES_SHARED(Locks::mutator_lock_) {
-  if (GetRType() != other->GetRType()) {
-    return false;
-  }
-
+bool MethodType::IsExactMatch(mirror::MethodType* target) REQUIRES_SHARED(Locks::mutator_lock_) {
   mirror::ObjectArray<Class>* const p_types = GetPTypes();
   const int32_t params_length = p_types->GetLength();
 
-  mirror::ObjectArray<Class>* const other_p_types = other->GetPTypes();
-  if (params_length != other_p_types->GetLength()) {
+  mirror::ObjectArray<Class>* const target_p_types = target->GetPTypes();
+  if (params_length != target_p_types->GetLength()) {
+    return false;
+  }
+  for (int32_t i = 0; i < params_length; ++i) {
+    if (p_types->GetWithoutChecks(i) != target_p_types->GetWithoutChecks(i)) {
+      return false;
+    }
+  }
+  return GetRType() == target->GetRType();
+}
+
+bool MethodType::IsConvertible(mirror::MethodType* target) REQUIRES_SHARED(Locks::mutator_lock_) {
+  mirror::ObjectArray<Class>* const p_types = GetPTypes();
+  const int32_t params_length = p_types->GetLength();
+
+  mirror::ObjectArray<Class>* const target_p_types = target->GetPTypes();
+  if (params_length != target_p_types->GetLength()) {
+    return false;
+  }
+
+  // Perform return check before invoking method handle otherwise side
+  // effects from the invocation may be observable before
+  // WrongMethodTypeException is raised.
+  if (!IsReturnTypeConvertible(target->GetRType(), GetRType())) {
     return false;
   }
 
   for (int32_t i = 0; i < params_length; ++i) {
-    if (p_types->GetWithoutChecks(i) != other_p_types->GetWithoutChecks(i)) {
+    if (!IsParameterTypeConvertible(p_types->GetWithoutChecks(i),
+                                    target_p_types->GetWithoutChecks(i))) {
       return false;
     }
   }
-
   return true;
 }
 
+std::string MethodType::PrettyDescriptor() REQUIRES_SHARED(Locks::mutator_lock_) {
+  std::ostringstream ss;
+  ss << "(";
+
+  mirror::ObjectArray<Class>* const p_types = GetPTypes();
+  const int32_t params_length = p_types->GetLength();
+  for (int32_t i = 0; i < params_length; ++i) {
+    ss << p_types->GetWithoutChecks(i)->PrettyDescriptor();
+    if (i != (params_length - 1)) {
+      ss << ", ";
+    }
+  }
+
+  ss << ")";
+  ss << GetRType()->PrettyDescriptor();
+
+  return ss.str();
+}
+
 void MethodType::SetClass(Class* klass) {
   CHECK(static_class_.IsNull()) << static_class_.Read() << " " << klass;
   CHECK(klass != nullptr);
diff --git a/runtime/mirror/method_type.h b/runtime/mirror/method_type.h
index 5b50409..9a98143 100644
--- a/runtime/mirror/method_type.h
+++ b/runtime/mirror/method_type.h
@@ -52,9 +52,17 @@
   static void ResetClass() REQUIRES_SHARED(Locks::mutator_lock_);
   static void VisitRoots(RootVisitor* visitor) REQUIRES_SHARED(Locks::mutator_lock_);
 
-  // Returns true iff. |other| is an exact match for this method type, i.e
+  // Returns true iff. |this| is an exact match for method type |target|, i.e
   // iff. they have the same return types and parameter types.
-  bool IsExactMatch(mirror::MethodType* other) REQUIRES_SHARED(Locks::mutator_lock_);
+  bool IsExactMatch(mirror::MethodType* target) REQUIRES_SHARED(Locks::mutator_lock_);
+
+  // Returns true iff. |this| can be converted to match |target| method type, i.e
+  // iff. they have convertible return types and parameter types.
+  bool IsConvertible(mirror::MethodType* target) REQUIRES_SHARED(Locks::mutator_lock_);
+
+  // Returns the pretty descriptor for this method type, suitable for display in
+  // exception messages and the like.
+  std::string PrettyDescriptor() REQUIRES_SHARED(Locks::mutator_lock_);
 
  private:
   static MemberOffset FormOffset() {
diff --git a/runtime/mirror/object-inl.h b/runtime/mirror/object-inl.h
index 3bf9d94..6d29ed3 100644
--- a/runtime/mirror/object-inl.h
+++ b/runtime/mirror/object-inl.h
@@ -135,25 +135,82 @@
   Monitor::Wait(self, this, ms, ns, true, kTimedWaiting);
 }
 
-inline Object* Object::GetReadBarrierPointer() {
+inline uint32_t Object::GetReadBarrierState(uintptr_t* fake_address_dependency) {
+#ifdef USE_BAKER_READ_BARRIER
+  CHECK(kUseBakerReadBarrier);
+#if defined(__arm__)
+  uintptr_t obj = reinterpret_cast<uintptr_t>(this);
+  uintptr_t result;
+  DCHECK_EQ(OFFSETOF_MEMBER(Object, monitor_), 4U);
+  // Use inline assembly to prevent the compiler from optimizing away the false dependency.
+  __asm__ __volatile__(
+      "ldr %[result], [%[obj], #4]\n\t"
+      // This instruction is enough to "fool the compiler and the CPU" by having `fad` always be
+      // null, without them being able to assume that fact.
+      "eor %[fad], %[result], %[result]\n\t"
+      : [result] "+r" (result), [fad] "=r" (*fake_address_dependency)
+      : [obj] "r" (obj));
+  DCHECK_EQ(*fake_address_dependency, 0U);
+  LockWord lw(static_cast<uint32_t>(result));
+  uint32_t rb_state = lw.ReadBarrierState();
+  return rb_state;
+#elif defined(__aarch64__)
+  uintptr_t obj = reinterpret_cast<uintptr_t>(this);
+  uintptr_t result;
+  DCHECK_EQ(OFFSETOF_MEMBER(Object, monitor_), 4U);
+  // Use inline assembly to prevent the compiler from optimizing away the false dependency.
+  __asm__ __volatile__(
+      "ldr %w[result], [%[obj], #4]\n\t"
+      // This instruction is enough to "fool the compiler and the CPU" by having `fad` always be
+      // null, without them being able to assume that fact.
+      "eor %[fad], %[result], %[result]\n\t"
+      : [result] "+r" (result), [fad] "=r" (*fake_address_dependency)
+      : [obj] "r" (obj));
+  DCHECK_EQ(*fake_address_dependency, 0U);
+  LockWord lw(static_cast<uint32_t>(result));
+  uint32_t rb_state = lw.ReadBarrierState();
+  return rb_state;
+#elif defined(__i386__) || defined(__x86_64__)
+  LockWord lw = GetLockWord(false);
+  // i386/x86_64 don't need fake address dependency. Use a compiler fence to avoid compiler
+  // reordering.
+  *fake_address_dependency = 0;
+  std::atomic_signal_fence(std::memory_order_acquire);
+  uint32_t rb_state = lw.ReadBarrierState();
+  return rb_state;
+#else
+  // mips/mips64
+  LOG(FATAL) << "Unreachable";
+  UNREACHABLE();
+  UNUSED(fake_address_dependency);
+#endif
+#else  // !USE_BAKER_READ_BARRIER
+  LOG(FATAL) << "Unreachable";
+  UNREACHABLE();
+  UNUSED(fake_address_dependency);
+#endif
+}
+
+inline uint32_t Object::GetReadBarrierState() {
 #ifdef USE_BAKER_READ_BARRIER
   DCHECK(kUseBakerReadBarrier);
-  return reinterpret_cast<Object*>(GetLockWord(false).ReadBarrierState());
-#elif USE_BROOKS_READ_BARRIER
-  DCHECK(kUseBrooksReadBarrier);
-  return GetFieldObject<Object, kVerifyNone, kWithoutReadBarrier>(
-      OFFSET_OF_OBJECT_MEMBER(Object, x_rb_ptr_));
+  LockWord lw(GetField<uint32_t, /*kIsVolatile*/false>(OFFSET_OF_OBJECT_MEMBER(Object, monitor_)));
+  uint32_t rb_state = lw.ReadBarrierState();
+  DCHECK(ReadBarrier::IsValidReadBarrierState(rb_state)) << rb_state;
+  return rb_state;
 #else
   LOG(FATAL) << "Unreachable";
   UNREACHABLE();
 #endif
 }
 
-inline Object* Object::GetReadBarrierPointerAcquire() {
+inline uint32_t Object::GetReadBarrierStateAcquire() {
 #ifdef USE_BAKER_READ_BARRIER
   DCHECK(kUseBakerReadBarrier);
   LockWord lw(GetFieldAcquire<uint32_t>(OFFSET_OF_OBJECT_MEMBER(Object, monitor_)));
-  return reinterpret_cast<Object*>(lw.ReadBarrierState());
+  uint32_t rb_state = lw.ReadBarrierState();
+  DCHECK(ReadBarrier::IsValidReadBarrierState(rb_state)) << rb_state;
+  return rb_state;
 #else
   LOG(FATAL) << "Unreachable";
   UNREACHABLE();
@@ -169,48 +226,38 @@
 #endif
 }
 
-inline void Object::SetReadBarrierPointer(Object* rb_ptr) {
+inline void Object::SetReadBarrierState(uint32_t rb_state) {
 #ifdef USE_BAKER_READ_BARRIER
   DCHECK(kUseBakerReadBarrier);
-  DCHECK_EQ(reinterpret_cast<uint64_t>(rb_ptr) >> 32, 0U);
-  DCHECK_NE(rb_ptr, ReadBarrier::BlackPtr()) << "Setting to black is not supported";
+  DCHECK(ReadBarrier::IsValidReadBarrierState(rb_state)) << rb_state;
   LockWord lw = GetLockWord(false);
-  lw.SetReadBarrierState(static_cast<uint32_t>(reinterpret_cast<uintptr_t>(rb_ptr)));
+  lw.SetReadBarrierState(rb_state);
   SetLockWord(lw, false);
-#elif USE_BROOKS_READ_BARRIER
-  DCHECK(kUseBrooksReadBarrier);
-  // We don't mark the card as this occurs as part of object allocation. Not all objects have
-  // backing cards, such as large objects.
-  SetFieldObjectWithoutWriteBarrier<false, false, kVerifyNone>(
-      OFFSET_OF_OBJECT_MEMBER(Object, x_rb_ptr_), rb_ptr);
 #else
   LOG(FATAL) << "Unreachable";
   UNREACHABLE();
-  UNUSED(rb_ptr);
+  UNUSED(rb_state);
 #endif
 }
 
 template<bool kCasRelease>
-inline bool Object::AtomicSetReadBarrierPointer(Object* expected_rb_ptr, Object* rb_ptr) {
+inline bool Object::AtomicSetReadBarrierState(uint32_t expected_rb_state, uint32_t rb_state) {
 #ifdef USE_BAKER_READ_BARRIER
   DCHECK(kUseBakerReadBarrier);
-  DCHECK_EQ(reinterpret_cast<uint64_t>(expected_rb_ptr) >> 32, 0U);
-  DCHECK_EQ(reinterpret_cast<uint64_t>(rb_ptr) >> 32, 0U);
-  DCHECK_NE(expected_rb_ptr, ReadBarrier::BlackPtr()) << "Setting to black is not supported";
-  DCHECK_NE(rb_ptr, ReadBarrier::BlackPtr()) << "Setting to black is not supported";
+  DCHECK(ReadBarrier::IsValidReadBarrierState(expected_rb_state)) << expected_rb_state;
+  DCHECK(ReadBarrier::IsValidReadBarrierState(rb_state)) << rb_state;
   LockWord expected_lw;
   LockWord new_lw;
   do {
     LockWord lw = GetLockWord(false);
-    if (UNLIKELY(reinterpret_cast<Object*>(lw.ReadBarrierState()) != expected_rb_ptr)) {
+    if (UNLIKELY(lw.ReadBarrierState() != expected_rb_state)) {
       // Lost the race.
       return false;
     }
     expected_lw = lw;
-    expected_lw.SetReadBarrierState(
-        static_cast<uint32_t>(reinterpret_cast<uintptr_t>(expected_rb_ptr)));
+    expected_lw.SetReadBarrierState(expected_rb_state);
     new_lw = lw;
-    new_lw.SetReadBarrierState(static_cast<uint32_t>(reinterpret_cast<uintptr_t>(rb_ptr)));
+    new_lw.SetReadBarrierState(rb_state);
     // ConcurrentCopying::ProcessMarkStackRef uses this with kCasRelease == true.
     // If kCasRelease == true, use a CAS release so that when GC updates all the fields of
     // an object and then changes the object from gray to black, the field updates (stores) will be
@@ -219,23 +266,8 @@
              CasLockWordWeakRelease(expected_lw, new_lw) :
              CasLockWordWeakRelaxed(expected_lw, new_lw)));
   return true;
-#elif USE_BROOKS_READ_BARRIER
-  DCHECK(kUseBrooksReadBarrier);
-  MemberOffset offset = OFFSET_OF_OBJECT_MEMBER(Object, x_rb_ptr_);
-  uint8_t* raw_addr = reinterpret_cast<uint8_t*>(this) + offset.SizeValue();
-  Atomic<uint32_t>* atomic_rb_ptr = reinterpret_cast<Atomic<uint32_t>*>(raw_addr);
-  HeapReference<Object> expected_ref(HeapReference<Object>::FromMirrorPtr(expected_rb_ptr));
-  HeapReference<Object> new_ref(HeapReference<Object>::FromMirrorPtr(rb_ptr));
-  do {
-    if (UNLIKELY(atomic_rb_ptr->LoadRelaxed() != expected_ref.reference_)) {
-      // Lost the race.
-      return false;
-    }
-  } while (!atomic_rb_ptr->CompareExchangeWeakSequentiallyConsistent(expected_ref.reference_,
-                                                                     new_ref.reference_));
-  return true;
 #else
-  UNUSED(expected_rb_ptr, rb_ptr);
+  UNUSED(expected_rb_state, rb_state);
   LOG(FATAL) << "Unreachable";
   UNREACHABLE();
 #endif
@@ -259,19 +291,12 @@
 }
 
 
-inline void Object::AssertReadBarrierPointer() const {
-  if (kUseBakerReadBarrier) {
-    Object* obj = const_cast<Object*>(this);
-    DCHECK(obj->GetReadBarrierPointer() == nullptr)
-        << "Bad Baker pointer: obj=" << reinterpret_cast<void*>(obj)
-        << " ptr=" << reinterpret_cast<void*>(obj->GetReadBarrierPointer());
-  } else {
-    CHECK(kUseBrooksReadBarrier);
-    Object* obj = const_cast<Object*>(this);
-    DCHECK_EQ(obj, obj->GetReadBarrierPointer())
-        << "Bad Brooks pointer: obj=" << reinterpret_cast<void*>(obj)
-        << " ptr=" << reinterpret_cast<void*>(obj->GetReadBarrierPointer());
-  }
+inline void Object::AssertReadBarrierState() const {
+  CHECK(kUseBakerReadBarrier);
+  Object* obj = const_cast<Object*>(this);
+  DCHECK(obj->GetReadBarrierState() == ReadBarrier::WhiteState())
+      << "Bad Baker pointer: obj=" << reinterpret_cast<void*>(obj)
+      << " rb_state" << reinterpret_cast<void*>(obj->GetReadBarrierState());
 }
 
 template<VerifyObjectFlags kVerifyFlags>
diff --git a/runtime/mirror/object.cc b/runtime/mirror/object.cc
index 8cfb60e..f5b9ab3 100644
--- a/runtime/mirror/object.cc
+++ b/runtime/mirror/object.cc
@@ -235,8 +235,6 @@
   }
   for (ObjPtr<Class> cur = c; cur != nullptr; cur = cur->GetSuperClass()) {
     for (ArtField& field : cur->GetIFields()) {
-      StackHandleScope<1> hs(Thread::Current());
-      Handle<Object> h_object(hs.NewHandle(new_value));
       if (field.GetOffset().Int32Value() == field_offset.Int32Value()) {
         CHECK_NE(field.GetTypeAsPrimitiveType(), Primitive::kPrimNot);
         // TODO: resolve the field type for moving GC.
diff --git a/runtime/mirror/object.h b/runtime/mirror/object.h
index 886637b..67b5ddb 100644
--- a/runtime/mirror/object.h
+++ b/runtime/mirror/object.h
@@ -94,19 +94,22 @@
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
   void SetClass(ObjPtr<Class> new_klass) REQUIRES_SHARED(Locks::mutator_lock_);
 
-  // TODO: Clean these up and change to return int32_t
-  Object* GetReadBarrierPointer() REQUIRES_SHARED(Locks::mutator_lock_);
-
-  // Get the read barrier pointer with release semantics, only supported for baker.
-  Object* GetReadBarrierPointerAcquire() REQUIRES_SHARED(Locks::mutator_lock_);
+  // Get the read barrier state with a fake address dependency.
+  // '*fake_address_dependency' will be set to 0.
+  ALWAYS_INLINE uint32_t GetReadBarrierState(uintptr_t* fake_address_dependency)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+  // This version does not offer any special mechanism to prevent load-load reordering.
+  ALWAYS_INLINE uint32_t GetReadBarrierState() REQUIRES_SHARED(Locks::mutator_lock_);
+  // Get the read barrier state with a load-acquire.
+  ALWAYS_INLINE uint32_t GetReadBarrierStateAcquire() REQUIRES_SHARED(Locks::mutator_lock_);
 
 #ifndef USE_BAKER_OR_BROOKS_READ_BARRIER
   NO_RETURN
 #endif
-  void SetReadBarrierPointer(Object* rb_ptr) REQUIRES_SHARED(Locks::mutator_lock_);
+  ALWAYS_INLINE void SetReadBarrierState(uint32_t rb_state) REQUIRES_SHARED(Locks::mutator_lock_);
 
   template<bool kCasRelease = false>
-  ALWAYS_INLINE bool AtomicSetReadBarrierPointer(Object* expected_rb_ptr, Object* rb_ptr)
+  ALWAYS_INLINE bool AtomicSetReadBarrierState(uint32_t expected_rb_state, uint32_t rb_state)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   ALWAYS_INLINE uint32_t GetMarkBit() REQUIRES_SHARED(Locks::mutator_lock_);
@@ -114,7 +117,8 @@
   ALWAYS_INLINE bool AtomicSetMarkBit(uint32_t expected_mark_bit, uint32_t mark_bit)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  void AssertReadBarrierPointer() const REQUIRES_SHARED(Locks::mutator_lock_);
+  // Assert that the read barrier state is in the default (white) state.
+  ALWAYS_INLINE void AssertReadBarrierState() const REQUIRES_SHARED(Locks::mutator_lock_);
 
   // The verifier treats all interfaces as java.lang.Object and relies on runtime checks in
   // invoke-interface to detect incompatible interface types.
diff --git a/runtime/mirror/object_array-inl.h b/runtime/mirror/object_array-inl.h
index 5fb9459..0fdf132 100644
--- a/runtime/mirror/object_array-inl.h
+++ b/runtime/mirror/object_array-inl.h
@@ -119,10 +119,10 @@
       OffsetOfElement(i), object);
 }
 
-template<class T>
+template<class T> template<VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
 inline T* ObjectArray<T>::GetWithoutChecks(int32_t i) {
   DCHECK(CheckIsValidIndex(i));
-  return GetFieldObject<T>(OffsetOfElement(i));
+  return GetFieldObject<T, kVerifyFlags, kReadBarrierOption>(OffsetOfElement(i));
 }
 
 template<class T>
@@ -145,17 +145,53 @@
   const bool copy_forward = (src != this) || (dst_pos < src_pos) || (dst_pos - src_pos >= count);
   if (copy_forward) {
     // Forward copy.
-    for (int i = 0; i < count; ++i) {
-      // We need a RB here. ObjectArray::GetWithoutChecks() contains a RB.
-      Object* obj = src->GetWithoutChecks(src_pos + i);
-      SetWithoutChecksAndWriteBarrier<false>(dst_pos + i, obj);
+    bool baker_non_gray_case = false;
+    if (kUseReadBarrier && kUseBakerReadBarrier) {
+      uintptr_t fake_address_dependency;
+      if (!ReadBarrier::IsGray(src.Ptr(), &fake_address_dependency)) {
+        baker_non_gray_case = true;
+        DCHECK_EQ(fake_address_dependency, 0U);
+        src.Assign(reinterpret_cast<ObjectArray<T>*>(
+            reinterpret_cast<uintptr_t>(src.Ptr()) | fake_address_dependency));
+        for (int i = 0; i < count; ++i) {
+          // We can skip the RB here because 'src' isn't gray.
+          T* obj = src->template GetWithoutChecks<kDefaultVerifyFlags, kWithoutReadBarrier>(
+              src_pos + i);
+          SetWithoutChecksAndWriteBarrier<false>(dst_pos + i, obj);
+        }
+      }
+    }
+    if (!baker_non_gray_case) {
+      for (int i = 0; i < count; ++i) {
+        // We need a RB here. ObjectArray::GetWithoutChecks() contains a RB.
+        T* obj = src->GetWithoutChecks(src_pos + i);
+        SetWithoutChecksAndWriteBarrier<false>(dst_pos + i, obj);
+      }
     }
   } else {
     // Backward copy.
-    for (int i = count - 1; i >= 0; --i) {
-      // We need a RB here. ObjectArray::GetWithoutChecks() contains a RB.
-      Object* obj = src->GetWithoutChecks(src_pos + i);
-      SetWithoutChecksAndWriteBarrier<false>(dst_pos + i, obj);
+    bool baker_non_gray_case = false;
+    if (kUseReadBarrier && kUseBakerReadBarrier) {
+      uintptr_t fake_address_dependency;
+      if (!ReadBarrier::IsGray(src.Ptr(), &fake_address_dependency)) {
+        baker_non_gray_case = true;
+        DCHECK_EQ(fake_address_dependency, 0U);
+        src.Assign(reinterpret_cast<ObjectArray<T>*>(
+            reinterpret_cast<uintptr_t>(src.Ptr()) | fake_address_dependency));
+        for (int i = count - 1; i >= 0; --i) {
+          // We can skip the RB here because 'src' isn't gray.
+          T* obj = src->template GetWithoutChecks<kDefaultVerifyFlags, kWithoutReadBarrier>(
+              src_pos + i);
+          SetWithoutChecksAndWriteBarrier<false>(dst_pos + i, obj);
+        }
+      }
+    }
+    if (!baker_non_gray_case) {
+      for (int i = count - 1; i >= 0; --i) {
+        // We need a RB here. ObjectArray::GetWithoutChecks() contains a RB.
+        T* obj = src->GetWithoutChecks(src_pos + i);
+        SetWithoutChecksAndWriteBarrier<false>(dst_pos + i, obj);
+      }
     }
   }
   Runtime::Current()->GetHeap()->WriteBarrierArray(this, dst_pos, count);
@@ -184,10 +220,28 @@
   // TODO: Optimize this later?
   // We can't use memmove since it does not handle read barriers and may do by per byte copying.
   // See b/32012820.
-  for (int i = 0; i < count; ++i) {
-    // We need a RB here. ObjectArray::GetWithoutChecks() contains a RB.
-    T* obj = src->GetWithoutChecks(src_pos + i);
-    SetWithoutChecksAndWriteBarrier<false>(dst_pos + i, obj);
+  bool baker_non_gray_case = false;
+  if (kUseReadBarrier && kUseBakerReadBarrier) {
+    uintptr_t fake_address_dependency;
+    if (!ReadBarrier::IsGray(src.Ptr(), &fake_address_dependency)) {
+      baker_non_gray_case = true;
+      DCHECK_EQ(fake_address_dependency, 0U);
+      src.Assign(reinterpret_cast<ObjectArray<T>*>(
+          reinterpret_cast<uintptr_t>(src.Ptr()) | fake_address_dependency));
+      for (int i = 0; i < count; ++i) {
+        // We can skip the RB here because 'src' isn't gray.
+        Object* obj = src->template GetWithoutChecks<kDefaultVerifyFlags, kWithoutReadBarrier>(
+            src_pos + i);
+        SetWithoutChecksAndWriteBarrier<false>(dst_pos + i, obj);
+      }
+    }
+  }
+  if (!baker_non_gray_case) {
+    for (int i = 0; i < count; ++i) {
+      // We need a RB here. ObjectArray::GetWithoutChecks() contains a RB.
+      T* obj = src->GetWithoutChecks(src_pos + i);
+      SetWithoutChecksAndWriteBarrier<false>(dst_pos + i, obj);
+    }
   }
   Runtime::Current()->GetHeap()->WriteBarrierArray(this, dst_pos, count);
   if (kIsDebugBuild) {
@@ -212,27 +266,62 @@
   Class* dst_class = GetClass()->GetComponentType();
   Class* lastAssignableElementClass = dst_class;
 
-  Object* o = nullptr;
+  T* o = nullptr;
   int i = 0;
-  for (; i < count; ++i) {
-    // The follow get operations force the objects to be verified.
-    // We need a RB here. ObjectArray::GetWithoutChecks() contains a RB.
-    o = src->GetWithoutChecks(src_pos + i);
-    if (o == nullptr) {
-      // Null is always assignable.
-      SetWithoutChecks<kTransactionActive>(dst_pos + i, nullptr);
-    } else {
-      // TODO: use the underlying class reference to avoid uncompression when not necessary.
-      Class* o_class = o->GetClass();
-      if (LIKELY(lastAssignableElementClass == o_class)) {
-        SetWithoutChecks<kTransactionActive>(dst_pos + i, o);
-      } else if (LIKELY(dst_class->IsAssignableFrom(o_class))) {
-        lastAssignableElementClass = o_class;
-        SetWithoutChecks<kTransactionActive>(dst_pos + i, o);
+  bool baker_non_gray_case = false;
+  if (kUseReadBarrier && kUseBakerReadBarrier) {
+    uintptr_t fake_address_dependency;
+    if (!ReadBarrier::IsGray(src.Ptr(), &fake_address_dependency)) {
+      baker_non_gray_case = true;
+      DCHECK_EQ(fake_address_dependency, 0U);
+      src.Assign(reinterpret_cast<ObjectArray<T>*>(
+          reinterpret_cast<uintptr_t>(src.Ptr()) | fake_address_dependency));
+      for (; i < count; ++i) {
+        // The follow get operations force the objects to be verified.
+        // We can skip the RB here because 'src' isn't gray.
+        o = src->template GetWithoutChecks<kDefaultVerifyFlags, kWithoutReadBarrier>(
+            src_pos + i);
+        if (o == nullptr) {
+          // Null is always assignable.
+          SetWithoutChecks<kTransactionActive>(dst_pos + i, nullptr);
+        } else {
+          // TODO: use the underlying class reference to avoid uncompression when not necessary.
+          Class* o_class = o->GetClass();
+          if (LIKELY(lastAssignableElementClass == o_class)) {
+            SetWithoutChecks<kTransactionActive>(dst_pos + i, o);
+          } else if (LIKELY(dst_class->IsAssignableFrom(o_class))) {
+            lastAssignableElementClass = o_class;
+            SetWithoutChecks<kTransactionActive>(dst_pos + i, o);
+          } else {
+            // Can't put this element into the array, break to perform write-barrier and throw
+            // exception.
+            break;
+          }
+        }
+      }
+    }
+  }
+  if (!baker_non_gray_case) {
+    for (; i < count; ++i) {
+      // The follow get operations force the objects to be verified.
+      // We need a RB here. ObjectArray::GetWithoutChecks() contains a RB.
+      o = src->GetWithoutChecks(src_pos + i);
+      if (o == nullptr) {
+        // Null is always assignable.
+        SetWithoutChecks<kTransactionActive>(dst_pos + i, nullptr);
       } else {
-        // Can't put this element into the array, break to perform write-barrier and throw
-        // exception.
-        break;
+        // TODO: use the underlying class reference to avoid uncompression when not necessary.
+        Class* o_class = o->GetClass();
+        if (LIKELY(lastAssignableElementClass == o_class)) {
+          SetWithoutChecks<kTransactionActive>(dst_pos + i, o);
+        } else if (LIKELY(dst_class->IsAssignableFrom(o_class))) {
+          lastAssignableElementClass = o_class;
+          SetWithoutChecks<kTransactionActive>(dst_pos + i, o);
+        } else {
+          // Can't put this element into the array, break to perform write-barrier and throw
+          // exception.
+          break;
+        }
       }
     }
   }
diff --git a/runtime/mirror/object_array.h b/runtime/mirror/object_array.h
index e4e954e..b7a9561 100644
--- a/runtime/mirror/object_array.h
+++ b/runtime/mirror/object_array.h
@@ -72,6 +72,8 @@
   ALWAYS_INLINE void SetWithoutChecksAndWriteBarrier(int32_t i, ObjPtr<T> object)
       NO_THREAD_SAFETY_ANALYSIS;
 
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+           ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   ALWAYS_INLINE T* GetWithoutChecks(int32_t i) REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Copy src into this array (dealing with overlaps as memmove does) without assignability checks.
diff --git a/runtime/mirror/object_test.cc b/runtime/mirror/object_test.cc
index 5bf254d..4b47f7f 100644
--- a/runtime/mirror/object_test.cc
+++ b/runtime/mirror/object_test.cc
@@ -313,7 +313,7 @@
   ArtMethod* sort = java_util_Arrays->FindDirectMethod("sort", "([I)V", kRuntimePointerSize);
   const DexFile::TypeId* type_id = java_lang_dex_file_->FindTypeId("[I");
   ASSERT_TRUE(type_id != nullptr);
-  uint32_t type_idx = java_lang_dex_file_->GetIndexForTypeId(*type_id);
+  dex::TypeIndex type_idx = java_lang_dex_file_->GetIndexForTypeId(*type_id);
   Object* array = CheckAndAllocArrayFromCodeInstrumented(
       type_idx, 3, sort, Thread::Current(), false,
       Runtime::Current()->GetHeap()->GetCurrentAllocator());
diff --git a/runtime/mirror/string-inl.h b/runtime/mirror/string-inl.h
index d42bb92..6870fda 100644
--- a/runtime/mirror/string-inl.h
+++ b/runtime/mirror/string-inl.h
@@ -106,9 +106,7 @@
     string->SetCount(count_);
     const uint16_t* const src = src_array_->GetData() + offset_;
     const int32_t length = String::GetLengthFromCount(count_);
-    bool compressible = kUseStringCompression && String::GetCompressionFlagFromCount(count_);
-    DCHECK(!compressible || kUseStringCompression);
-    if (compressible) {
+    if (kUseStringCompression && String::IsCompressed(count_)) {
       for (int i = 0; i < length; ++i) {
         string->GetValueCompressed()[i] = static_cast<uint8_t>(src[i]);
       }
@@ -126,7 +124,8 @@
 // Sets string count and value in the allocation code path to ensure it is guarded by a CAS.
 class SetStringCountAndValueVisitorFromString {
  public:
-  SetStringCountAndValueVisitorFromString(int32_t count, Handle<String> src_string,
+  SetStringCountAndValueVisitorFromString(int32_t count,
+                                          Handle<String> src_string,
                                           int32_t offset) :
     count_(count), src_string_(src_string), offset_(offset) {
   }
@@ -137,8 +136,7 @@
     ObjPtr<String> string = ObjPtr<String>::DownCast(obj);
     string->SetCount(count_);
     const int32_t length = String::GetLengthFromCount(count_);
-    bool compressible = kUseStringCompression && String::GetCompressionFlagFromCount(count_);
-    DCHECK(!compressible || kUseStringCompression);
+    bool compressible = kUseStringCompression && String::IsCompressed(count_);
     if (src_string_->IsCompressed()) {
       const uint8_t* const src = src_string_->GetValueCompressed() + offset_;
       memcpy(string->GetValueCompressed(), src, length * sizeof(uint8_t));
@@ -160,7 +158,7 @@
   const int32_t offset_;
 };
 
-inline String* String::Intern() {
+inline ObjPtr<String> String::Intern() {
   return Runtime::Current()->GetInternTable()->InternWeak(this);
 }
 
@@ -209,8 +207,7 @@
                              gc::AllocatorType allocator_type,
                              const PreFenceVisitor& pre_fence_visitor) {
   constexpr size_t header_size = sizeof(String);
-  const bool compressible = kUseStringCompression &&
-                            String::GetCompressionFlagFromCount(utf16_length_with_flag);
+  const bool compressible = kUseStringCompression && String::IsCompressed(utf16_length_with_flag);
   const size_t block_size = (compressible) ? sizeof(uint8_t) : sizeof(uint16_t);
   size_t length = String::GetLengthFromCount(utf16_length_with_flag);
   static_assert(sizeof(length) <= sizeof(size_t),
@@ -245,7 +242,7 @@
 
 template <bool kIsInstrumented>
 inline String* String::AllocEmptyString(Thread* self, gc::AllocatorType allocator_type) {
-  const int32_t length_with_flag = String::GetFlaggedCount(0);
+  const int32_t length_with_flag = String::GetFlaggedCount(0, /* compressible */ true);
   SetStringCountVisitor visitor(length_with_flag);
   return Alloc<kIsInstrumented>(self, length_with_flag, allocator_type, visitor);
 }
@@ -255,10 +252,9 @@
                                           Handle<ByteArray> array, int32_t offset,
                                           int32_t high_byte, gc::AllocatorType allocator_type) {
   const uint8_t* const src = reinterpret_cast<uint8_t*>(array->GetData()) + offset;
-  const bool compressible = kUseStringCompression && String::AllASCII<uint8_t>(src, byte_length)
-                                            && (high_byte == 0);
-  const int32_t length_with_flag = (compressible) ? String::GetFlaggedCount(byte_length)
-                                                  : byte_length;
+  const bool compressible =
+      kUseStringCompression && String::AllASCII<uint8_t>(src, byte_length) && (high_byte == 0);
+  const int32_t length_with_flag = String::GetFlaggedCount(byte_length, compressible);
   SetStringCountAndBytesVisitor visitor(length_with_flag, array, offset, high_byte << 8);
   String* string = Alloc<kIsInstrumented>(self, length_with_flag, allocator_type, visitor);
   return string;
@@ -272,7 +268,7 @@
   DCHECK_GE(array->GetLength(), count);
   const bool compressible = kUseStringCompression &&
                             String::AllASCII<uint16_t>(array->GetData() + offset, count);
-  const int32_t length_with_flag = (compressible) ? String::GetFlaggedCount(count) : count;
+  const int32_t length_with_flag = String::GetFlaggedCount(count, compressible);
   SetStringCountAndValueVisitorFromCharArray visitor(length_with_flag, array, offset);
   String* new_string = Alloc<kIsInstrumented>(self, length_with_flag, allocator_type, visitor);
   return new_string;
@@ -284,8 +280,7 @@
   const bool compressible = kUseStringCompression &&
       ((string->IsCompressed()) ? true : String::AllASCII<uint16_t>(string->GetValue() + offset,
                                                                     string_length));
-  const int32_t length_with_flag = (compressible) ? String::GetFlaggedCount(string_length)
-                                                  : string_length;
+  const int32_t length_with_flag = String::GetFlaggedCount(string_length, compressible);
   SetStringCountAndValueVisitorFromString visitor(length_with_flag, string, offset);
   String* new_string = Alloc<kIsInstrumented>(self, length_with_flag, allocator_type, visitor);
   return new_string;
@@ -311,7 +306,7 @@
 template<typename MemoryType>
 bool String::AllASCII(const MemoryType* const chars, const int length) {
   for (int i = 0; i < length; ++i) {
-    if (chars[i] > 0x80) {
+    if (chars[i] >= 0x80) {
       return false;
     }
   }
diff --git a/runtime/mirror/string.cc b/runtime/mirror/string.cc
index 4336aa1..0ab0bd6 100644
--- a/runtime/mirror/string.cc
+++ b/runtime/mirror/string.cc
@@ -95,8 +95,7 @@
   gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator();
   const bool compressible = kUseStringCompression &&
       (string->IsCompressed() && string2->IsCompressed());
-  const int32_t length_with_flag = compressible ? String::GetFlaggedCount(length + length2)
-                                                : (length + length2);
+  const int32_t length_with_flag = String::GetFlaggedCount(length + length2, compressible);
 
   SetStringCountVisitor visitor(length_with_flag);
   ObjPtr<String> new_string = Alloc<true>(self, length_with_flag, allocator_type, visitor);
@@ -132,8 +131,7 @@
   gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator();
   const bool compressible = kUseStringCompression &&
                             String::AllASCII<uint16_t>(utf16_data_in, utf16_length);
-  int32_t length_with_flag = (compressible) ? String::GetFlaggedCount(utf16_length)
-                                            : utf16_length;
+  int32_t length_with_flag = String::GetFlaggedCount(utf16_length, compressible);
   SetStringCountVisitor visitor(length_with_flag);
   ObjPtr<String> string = Alloc<true>(self, length_with_flag, allocator_type, visitor);
   if (UNLIKELY(string == nullptr)) {
@@ -169,8 +167,7 @@
                                       int32_t utf8_length) {
   gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator();
   const bool compressible = kUseStringCompression && (utf16_length == utf8_length);
-  const int32_t utf16_length_with_flag = (compressible) ? String::GetFlaggedCount(utf16_length)
-                                                        : utf16_length;
+  const int32_t utf16_length_with_flag = String::GetFlaggedCount(utf16_length, compressible);
   SetStringCountVisitor visitor(utf16_length_with_flag);
   ObjPtr<String> string = Alloc<true>(self, utf16_length_with_flag, allocator_type, visitor);
   if (UNLIKELY(string == nullptr)) {
diff --git a/runtime/mirror/string.h b/runtime/mirror/string.h
index a1b674a..95b6c3e 100644
--- a/runtime/mirror/string.h
+++ b/runtime/mirror/string.h
@@ -33,6 +33,10 @@
 
 // String Compression
 static constexpr bool kUseStringCompression = false;
+enum class StringCompressionFlag : uint32_t {
+    kCompressed = 0u,
+    kUncompressed = 1u
+};
 
 // C++ mirror of java.lang.String
 class MANAGED String FINAL : public Object {
@@ -78,7 +82,6 @@
   void SetCount(int32_t new_count) REQUIRES_SHARED(Locks::mutator_lock_) {
     // Count is invariant so use non-transactional mode. Also disable check as we may run inside
     // a transaction.
-    DCHECK_LE(0, (new_count & INT32_MAX));
     SetField32<false, false>(OFFSET_OF_OBJECT_MEMBER(String, count_), new_count);
   }
 
@@ -93,7 +96,7 @@
 
   void SetCharAt(int32_t index, uint16_t c) REQUIRES_SHARED(Locks::mutator_lock_);
 
-  String* Intern() REQUIRES_SHARED(Locks::mutator_lock_);
+  ObjPtr<String> Intern() REQUIRES_SHARED(Locks::mutator_lock_);
 
   template <bool kIsInstrumented>
   ALWAYS_INLINE static String* AllocFromByteArray(Thread* self, int32_t byte_length,
@@ -175,7 +178,7 @@
 
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
   bool IsCompressed() REQUIRES_SHARED(Locks::mutator_lock_) {
-    return kUseStringCompression && GetCompressionFlagFromCount(GetCount());
+    return kUseStringCompression && IsCompressed(GetCount());
   }
 
   bool IsValueNull() REQUIRES_SHARED(Locks::mutator_lock_);
@@ -183,16 +186,27 @@
   template<typename MemoryType>
   static bool AllASCII(const MemoryType* const chars, const int length);
 
-  ALWAYS_INLINE static bool GetCompressionFlagFromCount(const int32_t count) {
-    return kUseStringCompression && ((count & (1u << 31)) != 0);
+  ALWAYS_INLINE static bool IsCompressed(int32_t count) {
+    return GetCompressionFlagFromCount(count) == StringCompressionFlag::kCompressed;
   }
 
-  ALWAYS_INLINE static int32_t GetLengthFromCount(const int32_t count) {
-    return kUseStringCompression ? (count & INT32_MAX) : count;
+  ALWAYS_INLINE static StringCompressionFlag GetCompressionFlagFromCount(int32_t count) {
+    return kUseStringCompression
+        ? static_cast<StringCompressionFlag>(static_cast<uint32_t>(count) & 1u)
+        : StringCompressionFlag::kUncompressed;
   }
 
-  ALWAYS_INLINE static int32_t GetFlaggedCount(const int32_t count) {
-    return kUseStringCompression ? (count | (1u << 31)) : count;
+  ALWAYS_INLINE static int32_t GetLengthFromCount(int32_t count) {
+    return kUseStringCompression ? static_cast<int32_t>(static_cast<uint32_t>(count) >> 1) : count;
+  }
+
+  ALWAYS_INLINE static int32_t GetFlaggedCount(int32_t length, bool compressible) {
+    return kUseStringCompression
+        ? static_cast<int32_t>((static_cast<uint32_t>(length) << 1) |
+                               (static_cast<uint32_t>(compressible
+                                                          ? StringCompressionFlag::kCompressed
+                                                          : StringCompressionFlag::kUncompressed)))
+        : length;
   }
 
   static Class* GetJavaLangString() REQUIRES_SHARED(Locks::mutator_lock_) {
diff --git a/runtime/mirror/throwable.cc b/runtime/mirror/throwable.cc
index b866a63..ade4e87 100644
--- a/runtime/mirror/throwable.cc
+++ b/runtime/mirror/throwable.cc
@@ -104,7 +104,7 @@
     CHECK_EQ(array_len % 2, 0);
     const auto depth = array_len / 2;
     if (depth == 0) {
-      result += "(Throwable with empty stack trace)";
+      result += "(Throwable with empty stack trace)\n";
     } else {
       const PointerSize ptr_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
       for (int32_t i = 0; i < depth; ++i) {
@@ -124,7 +124,7 @@
       ObjPtr<ObjectArray<StackTraceElement>> ste_array =
           ObjPtr<ObjectArray<StackTraceElement>>::DownCast(stack_trace);
       if (ste_array->GetLength() == 0) {
-        result += "(Throwable with empty stack trace)";
+        result += "(Throwable with empty stack trace)\n";
       } else {
         for (int32_t i = 0; i < ste_array->GetLength(); ++i) {
           StackTraceElement* ste = ste_array->Get(i);
@@ -139,7 +139,7 @@
         }
       }
     } else {
-      result += "(Throwable with no stack trace)";
+      result += "(Throwable with no stack trace)\n";
     }
   }
   ObjPtr<Throwable> cause = GetFieldObject<Throwable>(OFFSET_OF_OBJECT_MEMBER(Throwable, cause_));
diff --git a/runtime/modifiers.h b/runtime/modifiers.h
index dd32df6..8a01043 100644
--- a/runtime/modifiers.h
+++ b/runtime/modifiers.h
@@ -67,6 +67,15 @@
 
 // Set by the verifier for a method that could not be verified to follow structured locking.
 static constexpr uint32_t kAccMustCountLocks =        0x02000000;  // method (runtime)
+// Set to indicate that the ArtMethod is obsolete and has a different DexCache from it's declaring
+// class.
+// TODO Might want to re-arrange some of these so that we can have obsolete + intrinsic methods.
+static constexpr uint32_t kAccObsoleteMethod =        0x04000000;  // method (runtime)
+
+// Set by the class linker for a method that has only one implementation for a
+// virtual call.
+static constexpr uint32_t kAccSingleImplementation =  0x08000000;  // method (runtime)
+
 static constexpr uint32_t kAccIntrinsic  =            0x80000000;  // method (runtime)
 
 // Special runtime-only flags.
diff --git a/runtime/monitor.cc b/runtime/monitor.cc
index eb74fcf..e7de7e6 100644
--- a/runtime/monitor.cc
+++ b/runtime/monitor.cc
@@ -771,7 +771,7 @@
         return false;
       }
       // Can't deflate if our lock count is too high.
-      if (monitor->lock_count_ > LockWord::kThinLockMaxCount) {
+      if (static_cast<uint32_t>(monitor->lock_count_) > LockWord::kThinLockMaxCount) {
         return false;
       }
       // Deflate to a thin lock.
@@ -1330,7 +1330,6 @@
 }
 
 void MonitorList::BroadcastForNewMonitors() {
-  CHECK(kUseReadBarrier);
   Thread* self = Thread::Current();
   MutexLock mu(self, monitor_list_lock_);
   monitor_add_condition_.Broadcast(self);
@@ -1341,6 +1340,9 @@
   MutexLock mu(self, monitor_list_lock_);
   while (UNLIKELY((!kUseReadBarrier && !allow_new_monitors_) ||
                   (kUseReadBarrier && !self->GetWeakRefAccessEnabled()))) {
+    // Check and run the empty checkpoint before blocking so the empty checkpoint will work in the
+    // presence of threads blocking for weak ref access.
+    self->CheckEmptyCheckpoint();
     monitor_add_condition_.WaitHoldingLocks(self);
   }
   list_.push_front(m);
diff --git a/runtime/monitor_test.cc b/runtime/monitor_test.cc
index 4ee46dc..4fbfe47 100644
--- a/runtime/monitor_test.cc
+++ b/runtime/monitor_test.cc
@@ -401,14 +401,11 @@
   Thread* const self = Thread::Current();
   ThreadPool thread_pool("the pool", 2);
   ScopedObjectAccess soa(self);
-  StackHandleScope<3> hs(self);
+  StackHandleScope<1> hs(self);
   Handle<mirror::Object> obj1(
       hs.NewHandle<mirror::Object>(mirror::String::AllocFromModifiedUtf8(self, "hello, world!")));
-  Handle<mirror::Object> obj2(
-      hs.NewHandle<mirror::Object>(mirror::String::AllocFromModifiedUtf8(self, "hello, world!")));
   {
     ObjectLock<mirror::Object> lock1(self, obj1);
-    ObjectLock<mirror::Object> lock2(self, obj1);
     {
       ObjectTryLock<mirror::Object> trylock(self, obj1);
       EXPECT_TRUE(trylock.Acquired());
diff --git a/runtime/native/dalvik_system_DexFile.cc b/runtime/native/dalvik_system_DexFile.cc
index df0849a..1a77072 100644
--- a/runtime/native/dalvik_system_DexFile.cc
+++ b/runtime/native/dalvik_system_DexFile.cc
@@ -413,33 +413,7 @@
 
   OatFileAssistant oat_file_assistant(filename.c_str(), target_instruction_set,
                                       false /* load_executable */);
-
-  std::ostringstream status;
-  bool oat_file_exists = false;
-  bool odex_file_exists = false;
-  if (oat_file_assistant.OatFileExists()) {
-    oat_file_exists = true;
-    status << *oat_file_assistant.OatFileName() << " [compilation_filter=";
-    status << CompilerFilter::NameOfFilter(oat_file_assistant.OatFileCompilerFilter());
-    status << ", status=" << oat_file_assistant.OatFileStatus();
-  }
-
-  if (oat_file_assistant.OdexFileExists()) {
-    odex_file_exists = true;
-    if (oat_file_exists) {
-      status << "] ";
-    }
-    status << *oat_file_assistant.OdexFileName() << " [compilation_filter=";
-    status << CompilerFilter::NameOfFilter(oat_file_assistant.OdexFileCompilerFilter());
-    status << ", status=" << oat_file_assistant.OdexFileStatus();
-  }
-
-  if (!oat_file_exists && !odex_file_exists) {
-    status << "invalid[";
-  }
-
-  status << "]";
-  return env->NewStringUTF(status.str().c_str());
+  return env->NewStringUTF(oat_file_assistant.GetStatusDump().c_str());
 }
 
 static jint DexFile_getDexOptNeeded(JNIEnv* env,
diff --git a/runtime/native/dalvik_system_VMDebug.cc b/runtime/native/dalvik_system_VMDebug.cc
index 8d85425..adf35b6 100644
--- a/runtime/native/dalvik_system_VMDebug.cc
+++ b/runtime/native/dalvik_system_VMDebug.cc
@@ -420,8 +420,10 @@
   }
 }
 
-static bool SetRuntimeStatValue(JNIEnv* env, jobjectArray result, VMDebugRuntimeStatId id,
-                                std::string value) {
+static bool SetRuntimeStatValue(JNIEnv* env,
+                                jobjectArray result,
+                                VMDebugRuntimeStatId id,
+                                const std::string& value) {
   ScopedLocalRef<jstring> jvalue(env, env->NewStringUTF(value.c_str()));
   if (jvalue.get() == nullptr) {
     return false;
diff --git a/runtime/native/dalvik_system_VMRuntime.cc b/runtime/native/dalvik_system_VMRuntime.cc
index 866dc7f..3058df4 100644
--- a/runtime/native/dalvik_system_VMRuntime.cc
+++ b/runtime/native/dalvik_system_VMRuntime.cc
@@ -34,6 +34,7 @@
 #include "common_throws.h"
 #include "debugger.h"
 #include "dex_file-inl.h"
+#include "dex_file_types.h"
 #include "gc/accounting/card_table-inl.h"
 #include "gc/allocator/dlmalloc.h"
 #include "gc/heap.h"
@@ -286,7 +287,7 @@
 
 // Based on ClassLinker::ResolveString.
 static void PreloadDexCachesResolveString(
-    Handle<mirror::DexCache> dex_cache, uint32_t string_idx, StringTable& strings)
+    Handle<mirror::DexCache> dex_cache, dex::StringIndex string_idx, StringTable& strings)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   ObjPtr<mirror::String>  string = dex_cache->GetResolvedString(string_idx);
   if (string != nullptr) {
@@ -305,7 +306,7 @@
 // Based on ClassLinker::ResolveType.
 static void PreloadDexCachesResolveType(Thread* self,
                                         ObjPtr<mirror::DexCache> dex_cache,
-                                        uint32_t type_idx)
+                                        dex::TypeIndex type_idx)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   ObjPtr<mirror::Class> klass = dex_cache->GetResolvedType(type_idx);
   if (klass != nullptr) {
@@ -449,13 +450,13 @@
       continue;
     }
     for (size_t j = 0; j < dex_cache->NumStrings(); j++) {
-      ObjPtr<mirror::String> string = dex_cache->GetResolvedString(j);
+      ObjPtr<mirror::String> string = dex_cache->GetResolvedString(dex::StringIndex(j));
       if (string != nullptr) {
         filled->num_strings++;
       }
     }
     for (size_t j = 0; j < dex_cache->NumResolvedTypes(); j++) {
-      ObjPtr<mirror::Class> klass = dex_cache->GetResolvedType(j);
+      ObjPtr<mirror::Class> klass = dex_cache->GetResolvedType(dex::TypeIndex(j));
       if (klass != nullptr) {
         filled->num_types++;
       }
@@ -513,13 +514,13 @@
 
     if (kPreloadDexCachesStrings) {
       for (size_t j = 0; j < dex_cache->NumStrings(); j++) {
-        PreloadDexCachesResolveString(dex_cache, j, strings);
+        PreloadDexCachesResolveString(dex_cache, dex::StringIndex(j), strings);
       }
     }
 
     if (kPreloadDexCachesTypes) {
       for (size_t j = 0; j < dex_cache->NumResolvedTypes(); j++) {
-        PreloadDexCachesResolveType(soa.Self(), dex_cache.Get(), j);
+        PreloadDexCachesResolveType(soa.Self(), dex_cache.Get(), dex::TypeIndex(j));
       }
     }
 
diff --git a/runtime/native/java_lang_DexCache.cc b/runtime/native/java_lang_DexCache.cc
index 71379a5..f1c350f 100644
--- a/runtime/native/java_lang_DexCache.cc
+++ b/runtime/native/java_lang_DexCache.cc
@@ -17,6 +17,7 @@
 #include "java_lang_DexCache.h"
 
 #include "dex_file.h"
+#include "dex_file_types.h"
 #include "jni_internal.h"
 #include "mirror/class-inl.h"
 #include "mirror/dex_cache-inl.h"
@@ -53,14 +54,15 @@
   ScopedFastNativeObjectAccess soa(env);
   ObjPtr<mirror::DexCache> dex_cache = soa.Decode<mirror::DexCache>(javaDexCache);
   CHECK_LT(static_cast<size_t>(type_index), dex_cache->NumResolvedTypes());
-  return soa.AddLocalReference<jobject>(dex_cache->GetResolvedType(type_index));
+  return soa.AddLocalReference<jobject>(dex_cache->GetResolvedType(dex::TypeIndex(type_index)));
 }
 
 static jobject DexCache_getResolvedString(JNIEnv* env, jobject javaDexCache, jint string_index) {
   ScopedFastNativeObjectAccess soa(env);
   ObjPtr<mirror::DexCache> dex_cache = soa.Decode<mirror::DexCache>(javaDexCache);
   CHECK_LT(static_cast<size_t>(string_index), dex_cache->GetDexFile()->NumStringIds());
-  return soa.AddLocalReference<jobject>(dex_cache->GetResolvedString(string_index));
+  return soa.AddLocalReference<jobject>(
+      dex_cache->GetResolvedString(dex::StringIndex(string_index)));
 }
 
 static void DexCache_setResolvedType(JNIEnv* env, jobject javaDexCache, jint type_index,
@@ -68,7 +70,7 @@
   ScopedFastNativeObjectAccess soa(env);
   ObjPtr<mirror::DexCache> dex_cache = soa.Decode<mirror::DexCache>(javaDexCache);
   CHECK_LT(static_cast<size_t>(type_index), dex_cache->NumResolvedTypes());
-  dex_cache->SetResolvedType(type_index, soa.Decode<mirror::Class>(type));
+  dex_cache->SetResolvedType(dex::TypeIndex(type_index), soa.Decode<mirror::Class>(type));
 }
 
 static void DexCache_setResolvedString(JNIEnv* env, jobject javaDexCache, jint string_index,
@@ -76,7 +78,7 @@
   ScopedFastNativeObjectAccess soa(env);
   ObjPtr<mirror::DexCache> dex_cache = soa.Decode<mirror::DexCache>(javaDexCache);
   CHECK_LT(static_cast<size_t>(string_index), dex_cache->GetDexFile()->NumStringIds());
-  dex_cache->SetResolvedString(string_index, soa.Decode<mirror::String>(string));
+  dex_cache->SetResolvedString(dex::StringIndex(string_index), soa.Decode<mirror::String>(string));
 }
 
 static JNINativeMethod gMethods[] = {
diff --git a/runtime/native/java_lang_VMClassLoader.cc b/runtime/native/java_lang_VMClassLoader.cc
index e5bab36..284d2d1 100644
--- a/runtime/native/java_lang_VMClassLoader.cc
+++ b/runtime/native/java_lang_VMClassLoader.cc
@@ -48,7 +48,7 @@
                                                           Handle<mirror::ClassLoader> class_loader)
       REQUIRES_SHARED(Locks::mutator_lock_) {
     ObjPtr<mirror::Class> result;
-    if (cl->FindClassInPathClassLoader(soa, self, descriptor, hash, class_loader, &result)) {
+    if (cl->FindClassInBaseDexClassLoader(soa, self, descriptor, hash, class_loader, &result)) {
       return result;
     }
     return nullptr;
diff --git a/runtime/native/java_lang_reflect_Executable.cc b/runtime/native/java_lang_reflect_Executable.cc
index 1b128fb..73b81a7 100644
--- a/runtime/native/java_lang_reflect_Executable.cc
+++ b/runtime/native/java_lang_reflect_Executable.cc
@@ -136,7 +136,7 @@
   Handle<mirror::Class> parameter_class =
       hs.NewHandle(soa.Decode<mirror::Class>(WellKnownClasses::java_lang_reflect_Parameter));
   ArtMethod* parameter_init =
-      soa.DecodeMethod(WellKnownClasses::java_lang_reflect_Parameter_init);
+      jni::DecodeArtMethod(WellKnownClasses::java_lang_reflect_Parameter_init);
 
   // Mutable handles used in the loop below to ensure cleanup without scaling the number of
   // handles by the number of parameters.
diff --git a/runtime/native_bridge_art_interface.cc b/runtime/native_bridge_art_interface.cc
index 5ab6097..c58854b 100644
--- a/runtime/native_bridge_art_interface.cc
+++ b/runtime/native_bridge_art_interface.cc
@@ -25,6 +25,7 @@
 #include "base/logging.h"
 #include "base/macros.h"
 #include "dex_file-inl.h"
+#include "jni_internal.h"
 #include "mirror/class-inl.h"
 #include "scoped_thread_state_change-inl.h"
 #include "sigchain.h"
@@ -33,7 +34,7 @@
 
 static const char* GetMethodShorty(JNIEnv* env, jmethodID mid) {
   ScopedObjectAccess soa(env);
-  ArtMethod* m = soa.DecodeMethod(mid);
+  ArtMethod* m = jni::DecodeArtMethod(mid);
   return m->GetShorty();
 }
 
@@ -90,14 +91,14 @@
   GetMethodShorty, GetNativeMethodCount, GetNativeMethods
 };
 
-bool LoadNativeBridge(std::string& native_bridge_library_filename) {
+bool LoadNativeBridge(const std::string& native_bridge_library_filename) {
   VLOG(startup) << "Runtime::Setup native bridge library: "
       << (native_bridge_library_filename.empty() ? "(empty)" : native_bridge_library_filename);
   return android::LoadNativeBridge(native_bridge_library_filename.c_str(),
                                    &native_bridge_art_callbacks_);
 }
 
-void PreInitializeNativeBridge(std::string dir) {
+void PreInitializeNativeBridge(const std::string& dir) {
   VLOG(startup) << "Runtime::Pre-initialize native bridge";
 #ifndef __APPLE__  // Mac OS does not support CLONE_NEWNS.
   if (unshare(CLONE_NEWNS) == -1) {
diff --git a/runtime/native_bridge_art_interface.h b/runtime/native_bridge_art_interface.h
index 090cddb..c86e5da 100644
--- a/runtime/native_bridge_art_interface.h
+++ b/runtime/native_bridge_art_interface.h
@@ -26,10 +26,10 @@
 // Mirror libnativebridge interface. Done to have the ART callbacks out of line, and not require
 // the system/core header file in other files.
 
-bool LoadNativeBridge(std::string& native_bridge_library_filename);
+bool LoadNativeBridge(const std::string& native_bridge_library_filename);
 
 // This is mostly for testing purposes, as in a full system this is called by Zygote code.
-void PreInitializeNativeBridge(std::string dir);
+void PreInitializeNativeBridge(const std::string& dir);
 
 void InitializeNativeBridge(JNIEnv* env, const char* instruction_set);
 
diff --git a/runtime/native_stack_dump.cc b/runtime/native_stack_dump.cc
index 00ab577..5565565 100644
--- a/runtime/native_stack_dump.cc
+++ b/runtime/native_stack_dump.cc
@@ -256,7 +256,7 @@
   Drain(2U, prefix, pipe, os);
 }
 
-static bool RunCommand(std::string cmd) {
+static bool RunCommand(const std::string& cmd) {
   FILE* stream = popen(cmd.c_str(), "r");
   if (stream) {
     pclose(stream);
@@ -272,7 +272,7 @@
   if (code == 0) {
     return pc == 0;
   }
-  uintptr_t code_size = reinterpret_cast<const OatQuickMethodHeader*>(code)[-1].code_size_;
+  uintptr_t code_size = reinterpret_cast<const OatQuickMethodHeader*>(code)[-1].GetCodeSize();
   return code <= pc && pc <= (code + code_size);
 }
 
diff --git a/runtime/oat.h b/runtime/oat.h
index 814a493..8c84d42 100644
--- a/runtime/oat.h
+++ b/runtime/oat.h
@@ -32,7 +32,7 @@
 class PACKED(4) OatHeader {
  public:
   static constexpr uint8_t kOatMagic[] = { 'o', 'a', 't', '\n' };
-  static constexpr uint8_t kOatVersion[] = { '0', '9', '0', '\0' };
+  static constexpr uint8_t kOatVersion[] = { '0', '9', '2', '\0' };
 
   static constexpr const char* kImageLocationKey = "image-location";
   static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline";
diff --git a/runtime/oat_file-inl.h b/runtime/oat_file-inl.h
index d7d0c4f..721fab9 100644
--- a/runtime/oat_file-inl.h
+++ b/runtime/oat_file-inl.h
@@ -44,7 +44,7 @@
   if (method_header == nullptr) {
     return 0u;
   }
-  return reinterpret_cast<const uint8_t*>(&method_header->code_size_) - begin_;
+  return reinterpret_cast<const uint8_t*>(method_header->GetCodeSizeAddr()) - begin_;
 }
 
 inline size_t OatFile::OatMethod::GetFrameSizeInBytes() const {
@@ -52,7 +52,7 @@
   if (code == nullptr) {
     return 0u;
   }
-  return reinterpret_cast<const OatQuickMethodHeader*>(code)[-1].frame_info_.FrameSizeInBytes();
+  return reinterpret_cast<const OatQuickMethodHeader*>(code)[-1].GetFrameInfo().FrameSizeInBytes();
 }
 
 inline uint32_t OatFile::OatMethod::GetCoreSpillMask() const {
@@ -60,7 +60,7 @@
   if (code == nullptr) {
     return 0u;
   }
-  return reinterpret_cast<const OatQuickMethodHeader*>(code)[-1].frame_info_.CoreSpillMask();
+  return reinterpret_cast<const OatQuickMethodHeader*>(code)[-1].GetFrameInfo().CoreSpillMask();
 }
 
 inline uint32_t OatFile::OatMethod::GetFpSpillMask() const {
@@ -68,7 +68,7 @@
   if (code == nullptr) {
     return 0u;
   }
-  return reinterpret_cast<const OatQuickMethodHeader*>(code)[-1].frame_info_.FpSpillMask();
+  return reinterpret_cast<const OatQuickMethodHeader*>(code)[-1].GetFrameInfo().FpSpillMask();
 }
 
 inline uint32_t OatFile::OatMethod::GetVmapTableOffset() const {
@@ -81,7 +81,7 @@
   if (method_header == nullptr) {
     return 0u;
   }
-  return reinterpret_cast<const uint8_t*>(&method_header->vmap_table_offset_) - begin_;
+  return reinterpret_cast<const uint8_t*>(method_header->GetVmapTableOffsetAddr()) - begin_;
 }
 
 inline const uint8_t* OatFile::OatMethod::GetVmapTable() const {
@@ -89,7 +89,7 @@
   if (code == nullptr) {
     return nullptr;
   }
-  uint32_t offset = reinterpret_cast<const OatQuickMethodHeader*>(code)[-1].vmap_table_offset_;
+  uint32_t offset = reinterpret_cast<const OatQuickMethodHeader*>(code)[-1].GetVmapTableOffset();
   if (UNLIKELY(offset == 0u)) {
     return nullptr;
   }
@@ -101,7 +101,7 @@
   if (code == nullptr) {
     return 0u;
   }
-  return reinterpret_cast<const OatQuickMethodHeader*>(code)[-1].code_size_;
+  return reinterpret_cast<const OatQuickMethodHeader*>(code)[-1].GetCodeSize();
 }
 
 inline uint32_t OatFile::OatMethod::GetCodeOffset() const {
diff --git a/runtime/oat_file.cc b/runtime/oat_file.cc
index c14b616..bdf8b0e 100644
--- a/runtime/oat_file.cc
+++ b/runtime/oat_file.cc
@@ -38,6 +38,7 @@
 #include "base/stl_util.h"
 #include "base/systrace.h"
 #include "base/unix_file/fd_file.h"
+#include "dex_file_types.h"
 #include "elf_file.h"
 #include "elf_utils.h"
 #include "gc_root.h"
@@ -718,7 +719,7 @@
     dlopen_handle_ = android_dlopen_ext(absolute_path.get(), RTLD_NOW, &extinfo);
 #else
     UNUSED(oat_file_begin);
-    static_assert(!kIsTargetBuild, "host_dlopen_handles_ will leak handles");
+    static_assert(!kIsTargetBuild || kIsTargetLinux, "host_dlopen_handles_ will leak handles");
     MutexLock mu(Thread::Current(), *Locks::host_dlopen_handles_lock_);
     dlopen_handle_ = dlopen(absolute_path.get(), RTLD_NOW);
     if (dlopen_handle_ != nullptr) {
@@ -1252,13 +1253,14 @@
     if (lookup_table_data_ + TypeLookupTable::RawDataLength(num_class_defs) > GetOatFile()->End()) {
       LOG(WARNING) << "found truncated lookup table in " << dex_file_location_;
     } else {
-      lookup_table_.reset(TypeLookupTable::Open(dex_file_pointer_,
-                                                lookup_table_data_,
-                                                num_class_defs));
+      lookup_table_ = TypeLookupTable::Open(dex_file_pointer_, lookup_table_data_, num_class_defs);
     }
   }
 }
 
+OatFile::OatDexFile::OatDexFile(std::unique_ptr<TypeLookupTable>&& lookup_table)
+    : lookup_table_(std::move(lookup_table)) {}
+
 OatFile::OatDexFile::~OatDexFile() {}
 
 size_t OatFile::OatDexFile::FileSize() const {
@@ -1342,7 +1344,7 @@
   }
   const DexFile::TypeId* type_id = dex_file.FindTypeId(descriptor);
   if (type_id != nullptr) {
-    uint16_t type_idx = dex_file.GetIndexForTypeId(*type_id);
+    dex::TypeIndex type_idx = dex_file.GetIndexForTypeId(*type_id);
     return dex_file.FindClassDef(type_idx);
   }
   return nullptr;
@@ -1540,7 +1542,7 @@
                                         bool* found) {
   DCHECK_NE(class_def_idx, DexFile::kDexNoIndex16);
   const OatFile::OatDexFile* oat_dex_file = dex_file.GetOatDexFile();
-  if (oat_dex_file == nullptr) {
+  if (oat_dex_file == nullptr || oat_dex_file->GetOatFile() == nullptr) {
     *found = false;
     return OatFile::OatClass::Invalid();
   }
@@ -1548,4 +1550,8 @@
   return oat_dex_file->GetOatClass(class_def_idx);
 }
 
+void OatFile::OatDexFile::AssertAotCompiler() {
+  CHECK(Runtime::Current()->IsAotCompiler());
+}
+
 }  // namespace art
diff --git a/runtime/oat_file.h b/runtime/oat_file.h
index 63a0e14..29add5b 100644
--- a/runtime/oat_file.h
+++ b/runtime/oat_file.h
@@ -384,7 +384,13 @@
   // Opens the DexFile referred to by this OatDexFile from within the containing OatFile.
   std::unique_ptr<const DexFile> OpenDexFile(std::string* error_msg) const;
 
+  // May return null if the OatDexFile only contains a type lookup table. This case only happens
+  // for the compiler to speed up compilation.
   const OatFile* GetOatFile() const {
+    // Avoid pulling in runtime.h in the header file.
+    if (kIsDebugBuild && oat_file_ == nullptr) {
+      AssertAotCompiler();
+    }
     return oat_file_;
   }
 
@@ -436,6 +442,9 @@
 
   ~OatDexFile();
 
+  // Create only with a type lookup table, used by the compiler to speed up compilation.
+  explicit OatDexFile(std::unique_ptr<TypeLookupTable>&& lookup_table);
+
  private:
   OatDexFile(const OatFile* oat_file,
              const std::string& dex_file_location,
@@ -446,14 +455,16 @@
              const uint32_t* oat_class_offsets_pointer,
              uint8_t* dex_cache_arrays);
 
-  const OatFile* const oat_file_;
+  static void AssertAotCompiler();
+
+  const OatFile* const oat_file_ = nullptr;
   const std::string dex_file_location_;
   const std::string canonical_dex_file_location_;
-  const uint32_t dex_file_location_checksum_;
-  const uint8_t* const dex_file_pointer_;
-  const uint8_t* lookup_table_data_;
-  const uint32_t* const oat_class_offsets_pointer_;
-  uint8_t* const dex_cache_arrays_;
+  const uint32_t dex_file_location_checksum_ = 0u;
+  const uint8_t* const dex_file_pointer_ = nullptr;
+  const uint8_t* lookup_table_data_ = nullptr;
+  const uint32_t* const oat_class_offsets_pointer_ = 0u;
+  uint8_t* const dex_cache_arrays_ = nullptr;
   mutable std::unique_ptr<TypeLookupTable> lookup_table_;
 
   friend class OatFile;
diff --git a/runtime/oat_file_assistant.cc b/runtime/oat_file_assistant.cc
index ff00451..4d1e1ea 100644
--- a/runtime/oat_file_assistant.cc
+++ b/runtime/oat_file_assistant.cc
@@ -16,6 +16,8 @@
 
 #include "oat_file_assistant.h"
 
+#include <sstream>
+
 #include <sys/stat.h>
 #include "base/logging.h"
 #include "base/stringprintf.h"
@@ -34,15 +36,21 @@
 
 std::ostream& operator << (std::ostream& stream, const OatFileAssistant::OatStatus status) {
   switch (status) {
-    case OatFileAssistant::kOatOutOfDate:
-      stream << "kOatOutOfDate";
+    case OatFileAssistant::kOatCannotOpen:
+      stream << "kOatCannotOpen";
+      break;
+    case OatFileAssistant::kOatDexOutOfDate:
+      stream << "kOatDexOutOfDate";
+      break;
+    case OatFileAssistant::kOatBootImageOutOfDate:
+      stream << "kOatBootImageOutOfDate";
+      break;
+    case OatFileAssistant::kOatRelocationOutOfDate:
+      stream << "kOatRelocationOutOfDate";
       break;
     case OatFileAssistant::kOatUpToDate:
       stream << "kOatUpToDate";
       break;
-    case OatFileAssistant::kOatNeedsRelocation:
-      stream << "kOatNeedsRelocation";
-      break;
     default:
       UNREACHABLE();
   }
@@ -60,7 +68,10 @@
                                    const char* oat_location,
                                    const InstructionSet isa,
                                    bool load_executable)
-    : isa_(isa), load_executable_(load_executable), odex_(this), oat_(this) {
+    : isa_(isa),
+      load_executable_(load_executable),
+      odex_(this, /*is_oat_location*/ false),
+      oat_(this, /*is_oat_location*/ true) {
   CHECK(dex_location != nullptr) << "OatFileAssistant: null dex location";
   dex_location_.assign(dex_location);
 
@@ -135,51 +146,13 @@
   return true;
 }
 
-OatFileAssistant::DexOptNeeded
-OatFileAssistant::GetDexOptNeeded(CompilerFilter::Filter target, bool profile_changed) {
-  bool compilation_desired = CompilerFilter::IsBytecodeCompilationEnabled(target);
-
-  // See if the oat file is in good shape as is.
-  bool oat_okay = oat_.CompilerFilterIsOkay(target, profile_changed);
-  if (oat_okay) {
-    if (compilation_desired) {
-      if (oat_.IsUpToDate()) {
-        return kNoDexOptNeeded;
-      }
-    } else {
-      if (!oat_.IsOutOfDate()) {
-        return kNoDexOptNeeded;
-      }
-    }
+int OatFileAssistant::GetDexOptNeeded(CompilerFilter::Filter target, bool profile_changed) {
+  OatFileInfo& info = GetBestInfo();
+  DexOptNeeded dexopt_needed = info.GetDexOptNeeded(target, profile_changed);
+  if (info.IsOatLocation() || dexopt_needed == kDex2OatFromScratch) {
+    return dexopt_needed;
   }
-
-  // See if the odex file is in good shape as is.
-  bool odex_okay = odex_.CompilerFilterIsOkay(target, profile_changed);
-  if (odex_okay) {
-    if (compilation_desired) {
-      if (odex_.IsUpToDate()) {
-        return kNoDexOptNeeded;
-      }
-    } else {
-      if (!odex_.IsOutOfDate()) {
-        return kNoDexOptNeeded;
-      }
-    }
-  }
-
-  // See if we can get an up-to-date file by running patchoat.
-  if (compilation_desired) {
-    if (odex_okay && odex_.NeedsRelocation() && odex_.HasPatchInfo()) {
-      return kPatchOatNeeded;
-    }
-
-    if (oat_okay && oat_.NeedsRelocation() && oat_.HasPatchInfo()) {
-      return kSelfPatchOatNeeded;
-    }
-  }
-
-  // We can only run dex2oat if there are original dex files.
-  return HasOriginalDexFiles() ? kDex2OatNeeded : kNoDexOptNeeded;
+  return -dexopt_needed;
 }
 
 // Figure out the currently specified compile filter option in the runtime.
@@ -205,7 +178,7 @@
 }
 
 bool OatFileAssistant::IsUpToDate() {
-  return OatFileIsUpToDate() || OdexFileIsUpToDate();
+  return GetBestInfo().Status() == kOatUpToDate;
 }
 
 OatFileAssistant::ResultOfAttemptToUpdate
@@ -215,59 +188,66 @@
     return kUpdateNotAttempted;
   }
 
-  switch (GetDexOptNeeded(target, profile_changed)) {
-    case kNoDexOptNeeded: return kUpdateSucceeded;
-    case kDex2OatNeeded: return GenerateOatFile(error_msg);
-    case kPatchOatNeeded: return RelocateOatFile(odex_.Filename(), error_msg);
-    case kSelfPatchOatNeeded: return RelocateOatFile(oat_.Filename(), error_msg);
+  OatFileInfo& info = GetBestInfo();
+  switch (info.GetDexOptNeeded(target, profile_changed)) {
+    case kNoDexOptNeeded:
+      return kUpdateSucceeded;
+
+    // TODO: For now, don't bother with all the different ways we can call
+    // dex2oat to generate the oat file. Always generate the oat file as if it
+    // were kDex2OatFromScratch.
+    case kDex2OatFromScratch:
+    case kDex2OatForBootImage:
+    case kDex2OatForRelocation:
+    case kDex2OatForFilter:
+      return GenerateOatFile(error_msg);
+
+    case kPatchoatForRelocation: {
+      return RelocateOatFile(info.Filename(), error_msg);
+    }
   }
   UNREACHABLE();
 }
 
 std::unique_ptr<OatFile> OatFileAssistant::GetBestOatFile() {
-  // The best oat files are, in descending order of bestness:
-  // 1. Properly relocated files. These may be opened executable.
-  // 2. Not out-of-date files that are already opened non-executable.
-  // 3. Not out-of-date files that we must reopen non-executable.
+  return GetBestInfo().ReleaseFileForUse();
+}
 
-  if (oat_.IsUpToDate()) {
-    return oat_.ReleaseFile();
+std::string OatFileAssistant::GetStatusDump() {
+  std::ostringstream status;
+  bool oat_file_exists = false;
+  bool odex_file_exists = false;
+  if (oat_.Status() != kOatCannotOpen) {
+    // If we can open the file, neither Filename nor GetFile should return null.
+    CHECK(oat_.Filename() != nullptr);
+    CHECK(oat_.GetFile() != nullptr);
+
+    oat_file_exists = true;
+    status << *oat_.Filename() << " [compilation_filter=";
+    status << CompilerFilter::NameOfFilter(oat_.GetFile()->GetCompilerFilter());
+    status << ", status=" << oat_.Status();
   }
 
-  if (odex_.IsUpToDate()) {
-    return odex_.ReleaseFile();
-  }
+  if (odex_.Status() != kOatCannotOpen) {
+    // If we can open the file, neither Filename nor GetFile should return null.
+    CHECK(odex_.Filename() != nullptr);
+    CHECK(odex_.GetFile() != nullptr);
 
-  VLOG(oat) << "Oat File Assistant: No relocated oat file found,"
-    << " attempting to fall back to interpreting oat file instead.";
-
-  if (!oat_.IsOutOfDate() && !oat_.IsExecutable()) {
-    return oat_.ReleaseFile();
-  }
-
-  if (!odex_.IsOutOfDate() && !odex_.IsExecutable()) {
-    return odex_.ReleaseFile();
-  }
-
-  if (!oat_.IsOutOfDate()) {
-    load_executable_ = false;
-    oat_.Reset();
-    if (!oat_.IsOutOfDate()) {
-      CHECK(!oat_.IsExecutable());
-      return oat_.ReleaseFile();
+    odex_file_exists = true;
+    if (oat_file_exists) {
+      status << "] ";
     }
+    status << *odex_.Filename() << " [compilation_filter=";
+    status << CompilerFilter::NameOfFilter(odex_.GetFile()->GetCompilerFilter());
+    status << ", status=" << odex_.Status();
   }
 
-  if (!odex_.IsOutOfDate()) {
-    load_executable_ = false;
-    odex_.Reset();
-    if (!odex_.IsOutOfDate()) {
-      CHECK(!odex_.IsExecutable());
-      return odex_.ReleaseFile();
-    }
+  if (!oat_file_exists && !odex_file_exists) {
+    status << "invalid[";
   }
 
-  return std::unique_ptr<OatFile>();
+  status << "]";
+  return status.str();
 }
 
 std::vector<std::unique_ptr<const DexFile>> OatFileAssistant::LoadDexFiles(
@@ -317,62 +297,14 @@
   return has_original_dex_files_;
 }
 
-const std::string* OatFileAssistant::OdexFileName() {
-  return odex_.Filename();
-}
-
-bool OatFileAssistant::OdexFileExists() {
-  return odex_.Exists();
-}
-
 OatFileAssistant::OatStatus OatFileAssistant::OdexFileStatus() {
   return odex_.Status();
 }
 
-bool OatFileAssistant::OdexFileIsOutOfDate() {
-  return odex_.IsOutOfDate();
-}
-
-bool OatFileAssistant::OdexFileNeedsRelocation() {
-  return odex_.NeedsRelocation();
-}
-
-bool OatFileAssistant::OdexFileIsUpToDate() {
-  return odex_.IsUpToDate();
-}
-
-CompilerFilter::Filter OatFileAssistant::OdexFileCompilerFilter() {
-  return odex_.CompilerFilter();
-}
-
-const std::string* OatFileAssistant::OatFileName() {
-  return oat_.Filename();
-}
-
-bool OatFileAssistant::OatFileExists() {
-  return oat_.Exists();
-}
-
 OatFileAssistant::OatStatus OatFileAssistant::OatFileStatus() {
   return oat_.Status();
 }
 
-bool OatFileAssistant::OatFileIsOutOfDate() {
-  return oat_.IsOutOfDate();
-}
-
-bool OatFileAssistant::OatFileNeedsRelocation() {
-  return oat_.NeedsRelocation();
-}
-
-bool OatFileAssistant::OatFileIsUpToDate() {
-  return oat_.IsUpToDate();
-}
-
-CompilerFilter::Filter OatFileAssistant::OatFileCompilerFilter() {
-  return oat_.CompilerFilter();
-}
-
 OatFileAssistant::OatStatus OatFileAssistant::GivenOatFileStatus(const OatFile& file) {
   // Verify the dex checksum.
   // Note: GetOatDexFile will return null if the dex checksum doesn't match
@@ -383,7 +315,7 @@
       dex_location_.c_str(), dex_checksum_pointer, &error_msg);
   if (oat_dex_file == nullptr) {
     VLOG(oat) << error_msg;
-    return kOatOutOfDate;
+    return kOatDexOutOfDate;
   }
 
   // Verify the dex checksums for any secondary multidex files
@@ -406,7 +338,7 @@
           << secondary_dex_location
           << ". Expected: " << expected_secondary_checksum
           << ", Actual: " << actual_secondary_checksum;
-        return kOatOutOfDate;
+        return kOatDexOutOfDate;
       }
     } else {
       // If we can't get the checksum for the secondary location, we assume
@@ -425,7 +357,7 @@
       VLOG(oat) << "No image for oat image checksum to match against.";
 
       if (HasOriginalDexFiles()) {
-        return kOatOutOfDate;
+        return kOatBootImageOutOfDate;
       }
 
       // If there is no original dex file to fall back to, grudgingly accept
@@ -439,7 +371,7 @@
     } else if (file.GetOatHeader().GetImageFileLocationOatChecksum()
         != GetCombinedImageChecksum()) {
       VLOG(oat) << "Oat image checksum does not match image checksum.";
-      return kOatOutOfDate;
+      return kOatBootImageOutOfDate;
     }
   } else {
     VLOG(oat) << "Image checksum test skipped for compiler filter " << current_compiler_filter;
@@ -450,7 +382,7 @@
       const ImageInfo* image_info = GetImageInfo();
       if (image_info == nullptr) {
         VLOG(oat) << "No image to check oat relocation against.";
-        return kOatNeedsRelocation;
+        return kOatRelocationOutOfDate;
       }
 
       // Verify the oat_data_begin recorded for the image in the oat file matches
@@ -462,7 +394,7 @@
           ": Oat file image oat_data_begin (" << oat_data_begin << ")"
           << " does not match actual image oat_data_begin ("
           << image_info->oat_data_begin << ")";
-        return kOatNeedsRelocation;
+        return kOatRelocationOutOfDate;
       }
 
       // Verify the oat_patch_delta recorded for the image in the oat file matches
@@ -473,7 +405,7 @@
           ": Oat file image patch delta (" << oat_patch_delta << ")"
           << " does not match actual image patch delta ("
           << image_info->patch_delta << ")";
-        return kOatNeedsRelocation;
+        return kOatRelocationOutOfDate;
       }
     } else {
       // Oat files compiled in PIC mode do not require relocation.
@@ -595,7 +527,7 @@
 
   std::vector<std::string> args;
   args.push_back("--dex-file=" + dex_location_);
-  args.push_back("--vdex-fd=" + std::to_string(vdex_file->Fd()));
+  args.push_back("--output-vdex-fd=" + std::to_string(vdex_file->Fd()));
   args.push_back("--oat-fd=" + std::to_string(oat_file->Fd()));
   args.push_back("--oat-location=" + oat_file_name);
 
@@ -841,6 +773,11 @@
   return combined_image_checksum_;
 }
 
+OatFileAssistant::OatFileInfo& OatFileAssistant::GetBestInfo() {
+  bool use_oat = oat_.IsUseable() || odex_.Status() == kOatCannotOpen;
+  return use_oat ? oat_ : odex_;
+}
+
 std::unique_ptr<gc::space::ImageSpace> OatFileAssistant::OpenImageSpace(const OatFile* oat_file) {
   DCHECK(oat_file != nullptr);
   std::string art_file = ReplaceFileExtension(oat_file->GetLocation(), "art");
@@ -857,16 +794,29 @@
   return ret;
 }
 
-OatFileAssistant::OatFileInfo::OatFileInfo(OatFileAssistant* oat_file_assistant)
-  : oat_file_assistant_(oat_file_assistant)
+OatFileAssistant::OatFileInfo::OatFileInfo(OatFileAssistant* oat_file_assistant,
+                                           bool is_oat_location)
+  : oat_file_assistant_(oat_file_assistant), is_oat_location_(is_oat_location)
 {}
 
+bool OatFileAssistant::OatFileInfo::IsOatLocation() {
+  return is_oat_location_;
+}
+
 const std::string* OatFileAssistant::OatFileInfo::Filename() {
   return filename_provided_ ? &filename_ : nullptr;
 }
 
-bool OatFileAssistant::OatFileInfo::Exists() {
-  return GetFile() != nullptr;
+bool OatFileAssistant::OatFileInfo::IsUseable() {
+  switch (Status()) {
+    case kOatCannotOpen:
+    case kOatDexOutOfDate:
+    case kOatBootImageOutOfDate: return false;
+
+    case kOatRelocationOutOfDate:
+    case kOatUpToDate: return true;
+  }
+  UNREACHABLE();
 }
 
 OatFileAssistant::OatStatus OatFileAssistant::OatFileInfo::Status() {
@@ -874,7 +824,7 @@
     status_attempted_ = true;
     const OatFile* file = GetFile();
     if (file == nullptr) {
-      status_ = kOatOutOfDate;
+      status_ = kOatCannotOpen;
     } else {
       status_ = oat_file_assistant_->GivenOatFileStatus(*file);
       VLOG(oat) << file->GetLocation() << " is " << status_
@@ -884,22 +834,46 @@
   return status_;
 }
 
-bool OatFileAssistant::OatFileInfo::IsOutOfDate() {
-  return Status() == kOatOutOfDate;
-}
+OatFileAssistant::DexOptNeeded OatFileAssistant::OatFileInfo::GetDexOptNeeded(
+    CompilerFilter::Filter target, bool profile_changed) {
+  bool compilation_desired = CompilerFilter::IsBytecodeCompilationEnabled(target);
+  bool filter_okay = CompilerFilterIsOkay(target, profile_changed);
 
-bool OatFileAssistant::OatFileInfo::NeedsRelocation() {
-  return Status() == kOatNeedsRelocation;
-}
+  if (filter_okay && Status() == kOatUpToDate) {
+    // The oat file is in good shape as is.
+    return kNoDexOptNeeded;
+  }
 
-bool OatFileAssistant::OatFileInfo::IsUpToDate() {
-  return Status() == kOatUpToDate;
-}
+  if (filter_okay && !compilation_desired && Status() == kOatRelocationOutOfDate) {
+    // If no compilation is desired, then it doesn't matter if the oat
+    // file needs relocation. It's in good shape as is.
+    return kNoDexOptNeeded;
+  }
 
-CompilerFilter::Filter OatFileAssistant::OatFileInfo::CompilerFilter() {
-  const OatFile* file = GetFile();
-  CHECK(file != nullptr);
-  return file->GetCompilerFilter();
+  if (filter_okay && Status() == kOatRelocationOutOfDate && HasPatchInfo()) {
+    return kPatchoatForRelocation;
+  }
+
+  if (oat_file_assistant_->HasOriginalDexFiles()) {
+    // Run dex2oat for relocation if we didn't have the patch info necessary
+    // to use patchoat.
+    if (filter_okay && Status() == kOatRelocationOutOfDate) {
+      return kDex2OatForRelocation;
+    }
+
+    if (IsUseable()) {
+      return kDex2OatForFilter;
+    }
+
+    if (Status() == kOatBootImageOutOfDate) {
+      return kDex2OatForBootImage;
+    }
+
+    return kDex2OatFromScratch;
+  }
+
+  // Otherwise there is nothing we can do, even if we want to.
+  return kNoDexOptNeeded;
 }
 
 const OatFile* OatFileAssistant::OatFileInfo::GetFile() {
@@ -967,5 +941,31 @@
   return std::move(file_);
 }
 
+std::unique_ptr<OatFile> OatFileAssistant::OatFileInfo::ReleaseFileForUse() {
+  if (Status() == kOatUpToDate) {
+    return ReleaseFile();
+  }
+
+  VLOG(oat) << "Oat File Assistant: No relocated oat file found,"
+    << " attempting to fall back to interpreting oat file instead.";
+
+  if (Status() == kOatRelocationOutOfDate && !IsExecutable()) {
+    return ReleaseFile();
+  }
+
+  if (Status() == kOatRelocationOutOfDate) {
+    // We are loading an oat file for runtime use that needs relocation.
+    // Reload the file non-executable to ensure that we interpret out of the
+    // dex code in the oat file rather than trying to execute the unrelocated
+    // compiled code.
+    oat_file_assistant_->load_executable_ = false;
+    Reset();
+    if (IsUseable()) {
+      CHECK(!IsExecutable());
+      return ReleaseFile();
+    }
+  }
+  return std::unique_ptr<OatFile>();
+}
 }  // namespace art
 
diff --git a/runtime/oat_file_assistant.h b/runtime/oat_file_assistant.h
index 3f018dc..bed1edc 100644
--- a/runtime/oat_file_assistant.h
+++ b/runtime/oat_file_assistant.h
@@ -48,41 +48,56 @@
 class OatFileAssistant {
  public:
   enum DexOptNeeded {
-    // kNoDexOptNeeded - The code for this dex location is up to date and can
-    // be used as is.
+    // No dexopt should (or can) be done to update the apk/jar.
     // Matches Java: dalvik.system.DexFile.NO_DEXOPT_NEEDED = 0
     kNoDexOptNeeded = 0,
 
-    // kDex2OatNeeded - In order to make the code for this dex location up to
-    // date, dex2oat must be run on the dex file.
-    // Matches Java: dalvik.system.DexFile.DEX2OAT_NEEDED = 1
-    kDex2OatNeeded = 1,
+    // dex2oat should be run to update the apk/jar from scratch.
+    // Matches Java: dalvik.system.DexFile.DEX2OAT_FROM_SCRATCH = 1
+    kDex2OatFromScratch = 1,
 
-    // kPatchOatNeeded - In order to make the code for this dex location up to
-    // date, patchoat must be run on the odex file.
-    // Matches Java: dalvik.system.DexFile.PATCHOAT_NEEDED = 2
-    kPatchOatNeeded = 2,
+    // dex2oat should be run to update the apk/jar because the existing code
+    // is out of date with respect to the boot image.
+    // Matches Java: dalvik.system.DexFile.DEX2OAT_FOR_BOOT_IMAGE
+    kDex2OatForBootImage = 2,
 
-    // kSelfPatchOatNeeded - In order to make the code for this dex location
-    // up to date, patchoat must be run on the oat file.
-    // Matches Java: dalvik.system.DexFile.SELF_PATCHOAT_NEEDED = 3
-    kSelfPatchOatNeeded = 3,
+    // dex2oat should be run to update the apk/jar because the existing code
+    // is out of date with respect to the target compiler filter.
+    // Matches Java: dalvik.system.DexFile.DEX2OAT_FOR_FILTER
+    kDex2OatForFilter = 3,
+
+    // dex2oat should be run to update the apk/jar because the existing code
+    // is not relocated to match the boot image and does not have the
+    // necessary patch information to use patchoat.
+    // Matches Java: dalvik.system.DexFile.DEX2OAT_FOR_RELOCATION
+    kDex2OatForRelocation = 4,
+
+    // patchoat should be run to update the apk/jar.
+    // Matches Java: dalvik.system.DexFile.PATCHOAT_FOR_RELOCATION
+    kPatchoatForRelocation = 5,
   };
 
   enum OatStatus {
-    // kOatOutOfDate - An oat file is said to be out of date if the file does
-    // not exist, is out of date with respect to the dex file or boot image,
-    // or does not meet the target compilation type.
-    kOatOutOfDate,
+    // kOatCannotOpen - The oat file cannot be opened, because it does not
+    // exist, is unreadable, or otherwise corrupted.
+    kOatCannotOpen,
 
-    // kOatNeedsRelocation - An oat file is said to need relocation if the
-    // code is up to date, but not yet properly relocated for address space
-    // layout randomization (ASLR). In this case, the oat file is neither
-    // "out of date" nor "up to date".
-    kOatNeedsRelocation,
+    // kOatDexOutOfDate - The oat file is out of date with respect to the dex file.
+    kOatDexOutOfDate,
 
-    // kOatUpToDate - An oat file is said to be up to date if it is not out of
-    // date and has been properly relocated for the purposes of ASLR.
+    // kOatBootImageOutOfDate - The oat file is up to date with respect to the
+    // dex file, but is out of date with respect to the boot image.
+    kOatBootImageOutOfDate,
+
+    // kOatRelocationOutOfDate - The oat file is up to date with respect to
+    // the dex file and boot image, but contains compiled code that has the
+    // wrong patch delta with respect to the boot image. Patchoat should be
+    // run on the oat file to update the patch delta of the compiled code to
+    // match the boot image.
+    kOatRelocationOutOfDate,
+
+    // kOatUpToDate - The oat file is completely up to date with respect to
+    // the dex file and boot image.
     kOatUpToDate,
   };
 
@@ -142,8 +157,10 @@
   // dex location that is at least as good as an oat file generated with the
   // given compiler filter. profile_changed should be true to indicate the
   // profile has recently changed for this dex location.
-  DexOptNeeded GetDexOptNeeded(CompilerFilter::Filter target_compiler_filter,
-                               bool profile_changed = false);
+  // Returns a positive status code if the status refers to the oat file in
+  // the oat location. Returns a negative status code if the status refers to
+  // the oat file in the odex location.
+  int GetDexOptNeeded(CompilerFilter::Filter target_compiler_filter, bool profile_changed = false);
 
   // Returns true if there is up-to-date code for this dex location,
   // irrespective of the compiler filter of the up-to-date code.
@@ -179,6 +196,10 @@
   // the OatFileAssistant object.
   std::unique_ptr<OatFile> GetBestOatFile();
 
+  // Returns a human readable description of the status of the code for the
+  // dex file. The returned description is for debugging purposes only.
+  std::string GetStatusDump();
+
   // Open and returns an image space associated with the oat file.
   static std::unique_ptr<gc::space::ImageSpace> OpenImageSpace(const OatFile* oat_file);
 
@@ -205,43 +226,16 @@
   // really an oat file. The odex file will often, but not always, have a
   // patch delta of 0 and need to be relocated before use for the purposes of
   // ASLR. The odex file is treated as if it were read-only.
-  // These methods return the location and status of the odex file for the dex
-  // location.
-  // Notes:
-  //  * OdexFileName may return null if the odex file name could not be
-  //    determined.
-  const std::string* OdexFileName();
-  bool OdexFileExists();
+  //
+  // Returns the status of the odex file for the dex location.
   OatStatus OdexFileStatus();
-  bool OdexFileIsOutOfDate();
-  bool OdexFileNeedsRelocation();
-  bool OdexFileIsUpToDate();
-  // Must only be called if the associated odex file exists, i.e, if
-  // |OdexFileExists() == true|.
-  CompilerFilter::Filter OdexFileCompilerFilter();
 
   // When the dex files is compiled on the target device, the oat file is the
   // result. The oat file will have been relocated to some
   // (possibly-out-of-date) offset for ASLR.
-  // These methods return the location and status of the target oat file for
-  // the dex location.
   //
-  // Notes:
-  //  * OatFileName may return null if the oat file name could not be
-  //    determined.
-  const std::string* OatFileName();
-  bool OatFileExists();
+  // Returns the status of the oat file for the dex location.
   OatStatus OatFileStatus();
-  bool OatFileIsOutOfDate();
-  bool OatFileNeedsRelocation();
-  bool OatFileIsUpToDate();
-  // Must only be called if the associated oat file exists, i.e, if
-  // |OatFileExists() == true|.
-  CompilerFilter::Filter OatFileCompilerFilter();
-
-  // Return the status for a given opened oat file with respect to the dex
-  // location.
-  OatStatus GivenOatFileStatus(const OatFile& file);
 
   // Generates the oat file by relocation from the named input file.
   // This does not check the current status before attempting to relocate the
@@ -311,29 +305,39 @@
     // Initially the info is for no file in particular. It will treat the
     // file as out of date until Reset is called with a real filename to use
     // the cache for.
-    explicit OatFileInfo(OatFileAssistant* oat_file_assistant);
+    // Pass true for is_oat_location if the information associated with this
+    // OatFileInfo is for the oat location, as opposed to the odex location.
+    OatFileInfo(OatFileAssistant* oat_file_assistant, bool is_oat_location);
+
+    bool IsOatLocation();
 
     const std::string* Filename();
-    bool Exists();
+
+    // Returns true if this oat file can be used for running code. The oat
+    // file can be used for running code as long as it is not out of date with
+    // respect to the dex code or boot image. An oat file that is out of date
+    // with respect to relocation is considered useable, because it's possible
+    // to interpret the dex code rather than run the unrelocated compiled
+    // code.
+    bool IsUseable();
+
+    // Returns the status of this oat file.
     OatStatus Status();
-    bool IsOutOfDate();
-    bool NeedsRelocation();
-    bool IsUpToDate();
-    // Must only be called if the associated file exists, i.e, if
-    // |Exists() == true|.
-    CompilerFilter::Filter CompilerFilter();
+
+    // Return the DexOptNeeded value for this oat file with respect to the
+    // given target_compilation_filter.
+    // profile_changed should be true to indicate the profile has recently
+    // changed for this dex location.
+    // If patchoat is needed, this function will return the kPatchOatNeeded
+    // status, not the kSelfPatchOatNeeded status.
+    DexOptNeeded GetDexOptNeeded(CompilerFilter::Filter target_compiler_filter,
+                                 bool profile_changed);
 
     // Returns the loaded file.
     // Loads the file if needed. Returns null if the file failed to load.
     // The caller shouldn't clean up or free the returned pointer.
     const OatFile* GetFile();
 
-    // Returns true if the compiler filter used to generate the file is at
-    // least as good as the given target filter. profile_changed should be
-    // true to indicate the profile has recently changed for this dex
-    // location.
-    bool CompilerFilterIsOkay(CompilerFilter::Filter target, bool profile_changed);
-
     // Returns true if the file is opened executable.
     bool IsExecutable();
 
@@ -348,6 +352,23 @@
     // file with the given filename.
     void Reset(const std::string& filename);
 
+    // Release the loaded oat file for runtime use.
+    // Returns null if the oat file hasn't been loaded or is out of date.
+    // Ensures the returned file is not loaded executable if it has unuseable
+    // compiled code.
+    //
+    // After this call, no other methods of the OatFileInfo should be
+    // called, because access to the loaded oat file has been taken away from
+    // the OatFileInfo object.
+    std::unique_ptr<OatFile> ReleaseFileForUse();
+
+   private:
+    // Returns true if the compiler filter used to generate the file is at
+    // least as good as the given target filter. profile_changed should be
+    // true to indicate the profile has recently changed for this dex
+    // location.
+    bool CompilerFilterIsOkay(CompilerFilter::Filter target, bool profile_changed);
+
     // Release the loaded oat file.
     // Returns null if the oat file hasn't been loaded.
     //
@@ -356,8 +377,8 @@
     // the OatFileInfo object.
     std::unique_ptr<OatFile> ReleaseFile();
 
-   private:
     OatFileAssistant* oat_file_assistant_;
+    const bool is_oat_location_;
 
     bool filename_provided_ = false;
     std::string filename_;
@@ -374,6 +395,13 @@
     bool file_released_ = false;
   };
 
+  // Return info for the best oat file.
+  OatFileInfo& GetBestInfo();
+
+  // Return the status for a given opened oat file with respect to the dex
+  // location.
+  OatStatus GivenOatFileStatus(const OatFile& file);
+
   // Returns the current image location.
   // Returns an empty string if the image location could not be retrieved.
   //
diff --git a/runtime/oat_file_assistant_test.cc b/runtime/oat_file_assistant_test.cc
index d18e946..5730cf2 100644
--- a/runtime/oat_file_assistant_test.cc
+++ b/runtime/oat_file_assistant_test.cc
@@ -49,9 +49,9 @@
   // Pre-Relocate the image to a known non-zero offset so we don't have to
   // deal with the runtime randomly relocating the image by 0 and messing up
   // the expected results of the tests.
-  bool PreRelocateImage(std::string* error_msg) {
+  bool PreRelocateImage(const std::string& image_location, std::string* error_msg) {
     std::string image;
-    if (!GetCachedImageFile(&image, error_msg)) {
+    if (!GetCachedImageFile(image_location, &image, error_msg)) {
       return false;
     }
 
@@ -60,7 +60,7 @@
 
     std::vector<std::string> argv;
     argv.push_back(patchoat);
-    argv.push_back("--input-image-location=" + GetImageLocation());
+    argv.push_back("--input-image-location=" + image_location);
     argv.push_back("--output-image-file=" + image);
     argv.push_back("--instruction-set=" + std::string(GetInstructionSetString(kRuntimeISA)));
     argv.push_back("--base-offset-delta=0x00008000");
@@ -69,8 +69,8 @@
 
   virtual void PreRuntimeCreate() {
     std::string error_msg;
-    ASSERT_TRUE(PreRelocateImage(&error_msg)) << error_msg;
-
+    ASSERT_TRUE(PreRelocateImage(GetImageLocation(), &error_msg)) << error_msg;
+    ASSERT_TRUE(PreRelocateImage(GetImageLocation2(), &error_msg)) << error_msg;
     UnreserveImageSpace();
   }
 
@@ -78,24 +78,32 @@
     ReserveImageSpace();
   }
 
-  // Generate a non-PIC odex file for the purposes of test.
-  // The generated odex file will be un-relocated.
-  void GenerateOdexForTest(const std::string& dex_location,
-                           const std::string& odex_location,
-                           CompilerFilter::Filter filter,
-                           bool pic = false,
-                           bool with_patch_info = true) {
-    // Temporarily redirect the dalvik cache so dex2oat doesn't find the
-    // relocated image file.
+  // Generate an oat file for the purposes of test.
+  void GenerateOatForTest(const std::string& dex_location,
+                          const std::string& oat_location,
+                          CompilerFilter::Filter filter,
+                          bool relocate,
+                          bool pic,
+                          bool with_patch_info,
+                          bool with_alternate_image) {
     std::string dalvik_cache = GetDalvikCache(GetInstructionSetString(kRuntimeISA));
     std::string dalvik_cache_tmp = dalvik_cache + ".redirected";
-    ASSERT_EQ(0, rename(dalvik_cache.c_str(), dalvik_cache_tmp.c_str())) << strerror(errno);
+
+    if (!relocate) {
+      // Temporarily redirect the dalvik cache so dex2oat doesn't find the
+      // relocated image file.
+      ASSERT_EQ(0, rename(dalvik_cache.c_str(), dalvik_cache_tmp.c_str())) << strerror(errno);
+    }
 
     std::vector<std::string> args;
     args.push_back("--dex-file=" + dex_location);
-    args.push_back("--oat-file=" + odex_location);
+    args.push_back("--oat-file=" + oat_location);
     args.push_back("--compiler-filter=" + CompilerFilter::NameOfFilter(filter));
     args.push_back("--runtime-arg");
+
+    // Use -Xnorelocate regardless of the relocate argument.
+    // We control relocation by redirecting the dalvik cache when needed
+    // rather than use this flag.
     args.push_back("-Xnorelocate");
 
     if (pic) {
@@ -106,14 +114,22 @@
       args.push_back("--include-patch-information");
     }
 
+    std::string image_location = GetImageLocation();
+    if (with_alternate_image) {
+      args.push_back("--boot-image=" + GetImageLocation2());
+    }
+
     std::string error_msg;
     ASSERT_TRUE(OatFileAssistant::Dex2Oat(args, &error_msg)) << error_msg;
-    ASSERT_EQ(0, rename(dalvik_cache_tmp.c_str(), dalvik_cache.c_str())) << strerror(errno);
 
-    // Verify the odex file was generated as expected and really is
-    // unrelocated.
-    std::unique_ptr<OatFile> odex_file(OatFile::Open(odex_location.c_str(),
-                                                     odex_location.c_str(),
+    if (!relocate) {
+      // Restore the dalvik cache if needed.
+      ASSERT_EQ(0, rename(dalvik_cache_tmp.c_str(), dalvik_cache.c_str())) << strerror(errno);
+    }
+
+    // Verify the odex file was generated as expected.
+    std::unique_ptr<OatFile> odex_file(OatFile::Open(oat_location.c_str(),
+                                                     oat_location.c_str(),
                                                      nullptr,
                                                      nullptr,
                                                      false,
@@ -125,24 +141,59 @@
     EXPECT_EQ(with_patch_info, odex_file->HasPatchInfo());
     EXPECT_EQ(filter, odex_file->GetCompilerFilter());
 
-    if (CompilerFilter::IsBytecodeCompilationEnabled(filter)) {
-      const std::vector<gc::space::ImageSpace*> image_spaces =
-        Runtime::Current()->GetHeap()->GetBootImageSpaces();
-      ASSERT_TRUE(!image_spaces.empty() && image_spaces[0] != nullptr);
-      const ImageHeader& image_header = image_spaces[0]->GetImageHeader();
-      const OatHeader& oat_header = odex_file->GetOatHeader();
-      uint32_t combined_checksum = OatFileAssistant::CalculateCombinedImageChecksum();
-      EXPECT_EQ(combined_checksum, oat_header.GetImageFileLocationOatChecksum());
-      EXPECT_NE(reinterpret_cast<uintptr_t>(image_header.GetOatDataBegin()),
-          oat_header.GetImageFileLocationOatDataBegin());
-      EXPECT_NE(image_header.GetPatchDelta(), oat_header.GetImagePatchDelta());
+    std::unique_ptr<ImageHeader> image_header(
+            gc::space::ImageSpace::ReadImageHeader(image_location.c_str(),
+                                                   kRuntimeISA,
+                                                   &error_msg));
+    ASSERT_TRUE(image_header != nullptr) << error_msg;
+    const OatHeader& oat_header = odex_file->GetOatHeader();
+    uint32_t combined_checksum = OatFileAssistant::CalculateCombinedImageChecksum();
+
+    if (CompilerFilter::DependsOnImageChecksum(filter)) {
+      if (with_alternate_image) {
+        EXPECT_NE(combined_checksum, oat_header.GetImageFileLocationOatChecksum());
+      } else {
+        EXPECT_EQ(combined_checksum, oat_header.GetImageFileLocationOatChecksum());
+      }
     }
+
+    if (CompilerFilter::IsBytecodeCompilationEnabled(filter)) {
+      if (relocate) {
+        EXPECT_EQ(reinterpret_cast<uintptr_t>(image_header->GetOatDataBegin()),
+            oat_header.GetImageFileLocationOatDataBegin());
+        EXPECT_EQ(image_header->GetPatchDelta(), oat_header.GetImagePatchDelta());
+      } else {
+        EXPECT_NE(reinterpret_cast<uintptr_t>(image_header->GetOatDataBegin()),
+            oat_header.GetImageFileLocationOatDataBegin());
+        EXPECT_NE(image_header->GetPatchDelta(), oat_header.GetImagePatchDelta());
+      }
+    }
+  }
+
+  // Generate a non-PIC odex file for the purposes of test.
+  // The generated odex file will be un-relocated.
+  void GenerateOdexForTest(const std::string& dex_location,
+                           const std::string& odex_location,
+                           CompilerFilter::Filter filter) {
+    GenerateOatForTest(dex_location,
+                       odex_location,
+                       filter,
+                       /*relocate*/false,
+                       /*pic*/false,
+                       /*with_patch_info*/true,
+                       /*with_alternate_image*/false);
   }
 
   void GeneratePicOdexForTest(const std::string& dex_location,
                               const std::string& odex_location,
                               CompilerFilter::Filter filter) {
-    GenerateOdexForTest(dex_location, odex_location, filter, true, false);
+    GenerateOatForTest(dex_location,
+                       odex_location,
+                       filter,
+                       /*relocate*/false,
+                       /*pic*/true,
+                       /*with_patch_info*/false,
+                       /*with_alternate_image*/false);
   }
 
   // Generate a non-PIC odex file without patch information for the purposes
@@ -150,7 +201,43 @@
   void GenerateNoPatchOdexForTest(const std::string& dex_location,
                                   const std::string& odex_location,
                                   CompilerFilter::Filter filter) {
-    GenerateOdexForTest(dex_location, odex_location, filter, false, false);
+    GenerateOatForTest(dex_location,
+                       odex_location,
+                       filter,
+                       /*relocate*/false,
+                       /*pic*/false,
+                       /*with_patch_info*/false,
+                       /*with_alternate_image*/false);
+  }
+
+  // Generate an oat file in the oat location.
+  void GenerateOatForTest(const char* dex_location,
+                          CompilerFilter::Filter filter,
+                          bool relocate,
+                          bool pic,
+                          bool with_patch_info,
+                          bool with_alternate_image) {
+    std::string oat_location;
+    std::string error_msg;
+    ASSERT_TRUE(OatFileAssistant::DexLocationToOatFilename(
+          dex_location, kRuntimeISA, &oat_location, &error_msg)) << error_msg;
+    GenerateOatForTest(dex_location,
+                       oat_location,
+                       filter,
+                       relocate,
+                       pic,
+                       with_patch_info,
+                       with_alternate_image);
+  }
+
+  // Generate a standard oat file in the oat location.
+  void GenerateOatForTest(const char* dex_location, CompilerFilter::Filter filter) {
+    GenerateOatForTest(dex_location,
+                       filter,
+                       /*relocate*/true,
+                       /*pic*/false,
+                       /*with_patch_info*/false,
+                       /*with_alternate_image*/false);
   }
 
  private:
@@ -211,36 +298,6 @@
   }
 };
 
-// Generate an oat file for the purposes of test, as opposed to testing
-// generation of oat files.
-static void GenerateOatForTest(const char* dex_location, CompilerFilter::Filter filter) {
-  // Use an oat file assistant to find the proper oat location.
-  std::string oat_location;
-  std::string error_msg;
-  ASSERT_TRUE(OatFileAssistant::DexLocationToOatFilename(
-        dex_location, kRuntimeISA, &oat_location, &error_msg)) << error_msg;
-
-  std::vector<std::string> args;
-  args.push_back("--dex-file=" + std::string(dex_location));
-  args.push_back("--oat-file=" + oat_location);
-  args.push_back("--compiler-filter=" + CompilerFilter::NameOfFilter(filter));
-  args.push_back("--runtime-arg");
-  args.push_back("-Xnorelocate");
-  ASSERT_TRUE(OatFileAssistant::Dex2Oat(args, &error_msg)) << error_msg;
-
-  // Verify the oat file was generated as expected.
-  std::unique_ptr<OatFile> oat_file(OatFile::Open(oat_location.c_str(),
-                                                  oat_location.c_str(),
-                                                  nullptr,
-                                                  nullptr,
-                                                  false,
-                                                  /*low_4gb*/false,
-                                                  dex_location,
-                                                  &error_msg));
-  ASSERT_TRUE(oat_file.get() != nullptr) << error_msg;
-  EXPECT_EQ(filter, oat_file->GetCompilerFilter());
-}
-
 // Case: We have a DEX file, but no OAT file for it.
 // Expect: The status is kDex2OatNeeded.
 TEST_F(OatFileAssistantTest, DexNoOat) {
@@ -249,26 +306,18 @@
 
   OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false);
 
-  EXPECT_EQ(OatFileAssistant::kDex2OatNeeded,
+  EXPECT_EQ(OatFileAssistant::kDex2OatFromScratch,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kVerifyAtRuntime));
-  EXPECT_EQ(OatFileAssistant::kDex2OatNeeded,
+  EXPECT_EQ(OatFileAssistant::kDex2OatFromScratch,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kInterpretOnly));
-  EXPECT_EQ(OatFileAssistant::kDex2OatNeeded,
+  EXPECT_EQ(OatFileAssistant::kDex2OatFromScratch,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeedProfile));
-  EXPECT_EQ(OatFileAssistant::kDex2OatNeeded,
+  EXPECT_EQ(OatFileAssistant::kDex2OatFromScratch,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
 
   EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
-  EXPECT_FALSE(oat_file_assistant.OdexFileExists());
-  EXPECT_TRUE(oat_file_assistant.OdexFileIsOutOfDate());
-  EXPECT_FALSE(oat_file_assistant.OdexFileNeedsRelocation());
-  EXPECT_FALSE(oat_file_assistant.OdexFileIsUpToDate());
-  EXPECT_EQ(OatFileAssistant::kOatOutOfDate, oat_file_assistant.OdexFileStatus());
-  EXPECT_FALSE(oat_file_assistant.OatFileExists());
-  EXPECT_TRUE(oat_file_assistant.OatFileIsOutOfDate());
-  EXPECT_FALSE(oat_file_assistant.OatFileNeedsRelocation());
-  EXPECT_FALSE(oat_file_assistant.OatFileIsUpToDate());
-  EXPECT_EQ(OatFileAssistant::kOatOutOfDate, oat_file_assistant.OatFileStatus());
+  EXPECT_EQ(OatFileAssistant::kOatCannotOpen, oat_file_assistant.OdexFileStatus());
+  EXPECT_EQ(OatFileAssistant::kOatCannotOpen, oat_file_assistant.OatFileStatus());
   EXPECT_TRUE(oat_file_assistant.HasOriginalDexFiles());
 }
 
@@ -307,17 +356,11 @@
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kInterpretOnly));
   EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kVerifyAtRuntime));
-  EXPECT_EQ(OatFileAssistant::kDex2OatNeeded,
+  EXPECT_EQ(OatFileAssistant::kDex2OatForFilter,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kEverything));
 
   EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
-  EXPECT_FALSE(oat_file_assistant.OdexFileExists());
-  EXPECT_TRUE(oat_file_assistant.OdexFileIsOutOfDate());
-  EXPECT_FALSE(oat_file_assistant.OdexFileIsUpToDate());
-  EXPECT_TRUE(oat_file_assistant.OatFileExists());
-  EXPECT_FALSE(oat_file_assistant.OatFileIsOutOfDate());
-  EXPECT_FALSE(oat_file_assistant.OatFileNeedsRelocation());
-  EXPECT_TRUE(oat_file_assistant.OatFileIsUpToDate());
+  EXPECT_EQ(OatFileAssistant::kOatCannotOpen, oat_file_assistant.OdexFileStatus());
   EXPECT_EQ(OatFileAssistant::kOatUpToDate, oat_file_assistant.OatFileStatus());
   EXPECT_TRUE(oat_file_assistant.HasOriginalDexFiles());
 }
@@ -337,17 +380,12 @@
 
   OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false);
 
-  EXPECT_EQ(OatFileAssistant::kDex2OatNeeded,
+  EXPECT_EQ(OatFileAssistant::kDex2OatFromScratch,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
 
   EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
-  EXPECT_TRUE(oat_file_assistant.OdexFileExists());
-  EXPECT_TRUE(oat_file_assistant.OdexFileIsOutOfDate());
-  EXPECT_FALSE(oat_file_assistant.OdexFileIsUpToDate());
-  EXPECT_FALSE(oat_file_assistant.OatFileExists());
-  EXPECT_TRUE(oat_file_assistant.OatFileIsOutOfDate());
-  EXPECT_FALSE(oat_file_assistant.OatFileNeedsRelocation());
-  EXPECT_FALSE(oat_file_assistant.OatFileIsUpToDate());
+  EXPECT_EQ(OatFileAssistant::kOatDexOutOfDate, oat_file_assistant.OdexFileStatus());
+  EXPECT_EQ(OatFileAssistant::kOatCannotOpen, oat_file_assistant.OatFileStatus());
 }
 
 // Case: We have a DEX file and speed-profile OAT file for it.
@@ -364,19 +402,13 @@
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeedProfile, false));
   EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kInterpretOnly, false));
-  EXPECT_EQ(OatFileAssistant::kDex2OatNeeded,
+  EXPECT_EQ(OatFileAssistant::kDex2OatForFilter,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeedProfile, true));
-  EXPECT_EQ(OatFileAssistant::kDex2OatNeeded,
+  EXPECT_EQ(OatFileAssistant::kDex2OatForFilter,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kInterpretOnly, true));
 
   EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
-  EXPECT_FALSE(oat_file_assistant.OdexFileExists());
-  EXPECT_TRUE(oat_file_assistant.OdexFileIsOutOfDate());
-  EXPECT_FALSE(oat_file_assistant.OdexFileIsUpToDate());
-  EXPECT_TRUE(oat_file_assistant.OatFileExists());
-  EXPECT_FALSE(oat_file_assistant.OatFileIsOutOfDate());
-  EXPECT_FALSE(oat_file_assistant.OatFileNeedsRelocation());
-  EXPECT_TRUE(oat_file_assistant.OatFileIsUpToDate());
+  EXPECT_EQ(OatFileAssistant::kOatCannotOpen, oat_file_assistant.OdexFileStatus());
   EXPECT_EQ(OatFileAssistant::kOatUpToDate, oat_file_assistant.OatFileStatus());
   EXPECT_TRUE(oat_file_assistant.HasOriginalDexFiles());
 }
@@ -416,7 +448,7 @@
   Copy(GetMultiDexSrc2(), dex_location);
 
   OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true);
-  EXPECT_EQ(OatFileAssistant::kDex2OatNeeded,
+  EXPECT_EQ(OatFileAssistant::kDex2OatFromScratch,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed, false));
   EXPECT_TRUE(oat_file_assistant.HasOriginalDexFiles());
 }
@@ -453,10 +485,10 @@
   EXPECT_EQ(2u, dex_files.size());
 }
 
-// Case: We have a DEX file and out-of-date OAT file.
-// Expect: The status is kDex2OatNeeded.
-TEST_F(OatFileAssistantTest, OatOutOfDate) {
-  std::string dex_location = GetScratchDir() + "/OatOutOfDate.jar";
+// Case: We have a DEX file and an OAT file out of date with respect to the
+// dex checksum.
+TEST_F(OatFileAssistantTest, OatDexOutOfDate) {
+  std::string dex_location = GetScratchDir() + "/OatDexOutOfDate.jar";
 
   // We create a dex, generate an oat for it, then overwrite the dex with a
   // different dex to make the oat out of date.
@@ -465,18 +497,68 @@
   Copy(GetDexSrc2(), dex_location);
 
   OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false);
-  EXPECT_EQ(OatFileAssistant::kDex2OatNeeded,
+  EXPECT_EQ(OatFileAssistant::kDex2OatFromScratch,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kVerifyAtRuntime));
-  EXPECT_EQ(OatFileAssistant::kDex2OatNeeded,
+  EXPECT_EQ(OatFileAssistant::kDex2OatFromScratch,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
 
   EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
-  EXPECT_FALSE(oat_file_assistant.OdexFileExists());
-  EXPECT_TRUE(oat_file_assistant.OdexFileIsOutOfDate());
-  EXPECT_FALSE(oat_file_assistant.OdexFileIsUpToDate());
-  EXPECT_TRUE(oat_file_assistant.OatFileExists());
-  EXPECT_TRUE(oat_file_assistant.OatFileIsOutOfDate());
-  EXPECT_FALSE(oat_file_assistant.OatFileIsUpToDate());
+  EXPECT_EQ(OatFileAssistant::kOatCannotOpen, oat_file_assistant.OdexFileStatus());
+  EXPECT_EQ(OatFileAssistant::kOatDexOutOfDate, oat_file_assistant.OatFileStatus());
+  EXPECT_TRUE(oat_file_assistant.HasOriginalDexFiles());
+}
+
+// Case: We have a DEX file and an OAT file out of date with respect to the
+// boot image.
+TEST_F(OatFileAssistantTest, OatImageOutOfDate) {
+  std::string dex_location = GetScratchDir() + "/OatImageOutOfDate.jar";
+
+  Copy(GetDexSrc1(), dex_location);
+  GenerateOatForTest(dex_location.c_str(),
+                     CompilerFilter::kSpeed,
+                     /*relocate*/true,
+                     /*pic*/false,
+                     /*with_patch_info*/false,
+                     /*with_alternate_image*/true);
+
+  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false);
+  EXPECT_EQ(OatFileAssistant::kDex2OatForBootImage,
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kVerifyAtRuntime));
+  EXPECT_EQ(OatFileAssistant::kDex2OatForBootImage,
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kInterpretOnly));
+  EXPECT_EQ(OatFileAssistant::kDex2OatForBootImage,
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
+
+  EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
+  EXPECT_EQ(OatFileAssistant::kOatCannotOpen, oat_file_assistant.OdexFileStatus());
+  EXPECT_EQ(OatFileAssistant::kOatBootImageOutOfDate, oat_file_assistant.OatFileStatus());
+  EXPECT_TRUE(oat_file_assistant.HasOriginalDexFiles());
+}
+
+// Case: We have a DEX file and a verify-at-runtime OAT file out of date with
+// respect to the boot image.
+// It shouldn't matter that the OAT file is out of date, because it is
+// verify-at-runtime.
+TEST_F(OatFileAssistantTest, OatVerifyAtRuntimeImageOutOfDate) {
+  std::string dex_location = GetScratchDir() + "/OatVerifyAtRuntimeImageOutOfDate.jar";
+
+  Copy(GetDexSrc1(), dex_location);
+  GenerateOatForTest(dex_location.c_str(),
+                     CompilerFilter::kVerifyAtRuntime,
+                     /*relocate*/true,
+                     /*pic*/false,
+                     /*with_patch_info*/false,
+                     /*with_alternate_image*/true);
+
+  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false);
+  EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kVerifyAtRuntime));
+  EXPECT_EQ(OatFileAssistant::kDex2OatForFilter,
+      oat_file_assistant.GetDexOptNeeded(CompilerFilter::kInterpretOnly));
+
+  EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
+  EXPECT_EQ(OatFileAssistant::kOatCannotOpen, oat_file_assistant.OdexFileStatus());
+  EXPECT_EQ(OatFileAssistant::kOatUpToDate, oat_file_assistant.OatFileStatus());
   EXPECT_TRUE(oat_file_assistant.HasOriginalDexFiles());
 }
 
@@ -495,17 +577,12 @@
 
   EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kVerifyAtRuntime));
-  EXPECT_EQ(OatFileAssistant::kPatchOatNeeded,
+  EXPECT_EQ(-OatFileAssistant::kPatchoatForRelocation,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
 
   EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
-  EXPECT_TRUE(oat_file_assistant.OdexFileExists());
-  EXPECT_FALSE(oat_file_assistant.OdexFileIsOutOfDate());
-  EXPECT_FALSE(oat_file_assistant.OdexFileIsUpToDate());
-  EXPECT_TRUE(oat_file_assistant.OdexFileNeedsRelocation());
-  EXPECT_FALSE(oat_file_assistant.OatFileExists());
-  EXPECT_TRUE(oat_file_assistant.OatFileIsOutOfDate());
-  EXPECT_FALSE(oat_file_assistant.OatFileIsUpToDate());
+  EXPECT_EQ(OatFileAssistant::kOatRelocationOutOfDate, oat_file_assistant.OdexFileStatus());
+  EXPECT_EQ(OatFileAssistant::kOatCannotOpen, oat_file_assistant.OatFileStatus());
   EXPECT_TRUE(oat_file_assistant.HasOriginalDexFiles());
 
   // We should still be able to get the non-executable odex file to run from.
@@ -529,16 +606,12 @@
   // Verify the status.
   OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true);
 
-  EXPECT_EQ(OatFileAssistant::kPatchOatNeeded,
+  EXPECT_EQ(-OatFileAssistant::kPatchoatForRelocation,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
 
   EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
-  EXPECT_TRUE(oat_file_assistant.OdexFileExists());
-  EXPECT_FALSE(oat_file_assistant.OdexFileIsOutOfDate());
-  EXPECT_FALSE(oat_file_assistant.OdexFileIsUpToDate());
-  EXPECT_FALSE(oat_file_assistant.OatFileExists());
-  EXPECT_TRUE(oat_file_assistant.OatFileIsOutOfDate());
-  EXPECT_FALSE(oat_file_assistant.OatFileIsUpToDate());
+  EXPECT_EQ(OatFileAssistant::kOatRelocationOutOfDate, oat_file_assistant.OdexFileStatus());
+  EXPECT_EQ(OatFileAssistant::kOatCannotOpen, oat_file_assistant.OatFileStatus());
   EXPECT_FALSE(oat_file_assistant.HasOriginalDexFiles());
 
   // Make the oat file up to date.
@@ -551,12 +624,8 @@
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
 
   EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
-  EXPECT_TRUE(oat_file_assistant.OdexFileExists());
-  EXPECT_FALSE(oat_file_assistant.OdexFileIsOutOfDate());
-  EXPECT_FALSE(oat_file_assistant.OdexFileIsUpToDate());
-  EXPECT_TRUE(oat_file_assistant.OatFileExists());
-  EXPECT_FALSE(oat_file_assistant.OatFileIsOutOfDate());
-  EXPECT_TRUE(oat_file_assistant.OatFileIsUpToDate());
+  EXPECT_EQ(OatFileAssistant::kOatRelocationOutOfDate, oat_file_assistant.OdexFileStatus());
+  EXPECT_EQ(OatFileAssistant::kOatUpToDate, oat_file_assistant.OatFileStatus());
   EXPECT_FALSE(oat_file_assistant.HasOriginalDexFiles());
 
   // Verify we can load the dex files from it.
@@ -590,19 +659,14 @@
 
   EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kVerifyAtRuntime));
-  EXPECT_EQ(OatFileAssistant::kPatchOatNeeded,
+  EXPECT_EQ(-OatFileAssistant::kPatchoatForRelocation,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
   EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,  // Can't run dex2oat because dex file is stripped.
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kEverything));
 
   EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
-  EXPECT_TRUE(oat_file_assistant.OdexFileExists());
-  EXPECT_FALSE(oat_file_assistant.OdexFileIsOutOfDate());
-  EXPECT_TRUE(oat_file_assistant.OdexFileNeedsRelocation());
-  EXPECT_FALSE(oat_file_assistant.OdexFileIsUpToDate());
-  EXPECT_TRUE(oat_file_assistant.OatFileExists());
-  EXPECT_TRUE(oat_file_assistant.OatFileIsOutOfDate());
-  EXPECT_FALSE(oat_file_assistant.OatFileIsUpToDate());
+  EXPECT_EQ(OatFileAssistant::kOatRelocationOutOfDate, oat_file_assistant.OdexFileStatus());
+  EXPECT_EQ(OatFileAssistant::kOatDexOutOfDate, oat_file_assistant.OatFileStatus());
   EXPECT_FALSE(oat_file_assistant.HasOriginalDexFiles());
 
   // Make the oat file up to date.
@@ -617,14 +681,8 @@
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kEverything));
 
   EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
-  EXPECT_TRUE(oat_file_assistant.OdexFileExists());
-  EXPECT_FALSE(oat_file_assistant.OdexFileIsOutOfDate());
-  EXPECT_TRUE(oat_file_assistant.OdexFileNeedsRelocation());
-  EXPECT_FALSE(oat_file_assistant.OdexFileIsUpToDate());
-  EXPECT_TRUE(oat_file_assistant.OatFileExists());
-  EXPECT_FALSE(oat_file_assistant.OatFileIsOutOfDate());
-  EXPECT_FALSE(oat_file_assistant.OatFileNeedsRelocation());
-  EXPECT_TRUE(oat_file_assistant.OatFileIsUpToDate());
+  EXPECT_EQ(OatFileAssistant::kOatRelocationOutOfDate, oat_file_assistant.OdexFileStatus());
+  EXPECT_EQ(OatFileAssistant::kOatUpToDate, oat_file_assistant.OatFileStatus());
   EXPECT_FALSE(oat_file_assistant.HasOriginalDexFiles());
 
   // Verify we can load the dex files from it.
@@ -654,13 +712,8 @@
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kInterpretOnly));
 
   EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
-  EXPECT_FALSE(oat_file_assistant.OdexFileExists());
-  EXPECT_TRUE(oat_file_assistant.OdexFileIsOutOfDate());
-  EXPECT_FALSE(oat_file_assistant.OdexFileNeedsRelocation());
-  EXPECT_FALSE(oat_file_assistant.OdexFileIsUpToDate());
-  EXPECT_FALSE(oat_file_assistant.OatFileExists());
-  EXPECT_TRUE(oat_file_assistant.OatFileIsOutOfDate());
-  EXPECT_FALSE(oat_file_assistant.OatFileIsUpToDate());
+  EXPECT_EQ(OatFileAssistant::kOatCannotOpen, oat_file_assistant.OdexFileStatus());
+  EXPECT_EQ(OatFileAssistant::kOatCannotOpen, oat_file_assistant.OatFileStatus());
   EXPECT_FALSE(oat_file_assistant.HasOriginalDexFiles());
 
   // Make the oat file up to date. This should have no effect.
@@ -673,13 +726,8 @@
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
 
   EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
-  EXPECT_FALSE(oat_file_assistant.OdexFileExists());
-  EXPECT_TRUE(oat_file_assistant.OdexFileIsOutOfDate());
-  EXPECT_FALSE(oat_file_assistant.OdexFileNeedsRelocation());
-  EXPECT_FALSE(oat_file_assistant.OdexFileIsUpToDate());
-  EXPECT_FALSE(oat_file_assistant.OatFileExists());
-  EXPECT_TRUE(oat_file_assistant.OatFileIsOutOfDate());
-  EXPECT_FALSE(oat_file_assistant.OatFileIsUpToDate());
+  EXPECT_EQ(OatFileAssistant::kOatCannotOpen, oat_file_assistant.OdexFileStatus());
+  EXPECT_EQ(OatFileAssistant::kOatCannotOpen, oat_file_assistant.OatFileStatus());
   EXPECT_FALSE(oat_file_assistant.HasOriginalDexFiles());
 }
 
@@ -699,20 +747,14 @@
 
   EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kInterpretOnly));
-  EXPECT_EQ(OatFileAssistant::kSelfPatchOatNeeded,
+  EXPECT_EQ(OatFileAssistant::kPatchoatForRelocation,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
-  EXPECT_EQ(OatFileAssistant::kDex2OatNeeded,
+  EXPECT_EQ(OatFileAssistant::kDex2OatForFilter,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kEverything));
 
   EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
-  EXPECT_FALSE(oat_file_assistant.OdexFileExists());
-  EXPECT_TRUE(oat_file_assistant.OdexFileIsOutOfDate());
-  EXPECT_FALSE(oat_file_assistant.OdexFileNeedsRelocation());
-  EXPECT_FALSE(oat_file_assistant.OdexFileIsUpToDate());
-  EXPECT_TRUE(oat_file_assistant.OatFileExists());
-  EXPECT_TRUE(oat_file_assistant.OatFileNeedsRelocation());
-  EXPECT_FALSE(oat_file_assistant.OatFileIsOutOfDate());
-  EXPECT_FALSE(oat_file_assistant.OatFileIsUpToDate());
+  EXPECT_EQ(OatFileAssistant::kOatCannotOpen, oat_file_assistant.OdexFileStatus());
+  EXPECT_EQ(OatFileAssistant::kOatRelocationOutOfDate, oat_file_assistant.OatFileStatus());
   EXPECT_TRUE(oat_file_assistant.HasOriginalDexFiles());
 
   // Make the oat file up to date.
@@ -725,14 +767,8 @@
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
 
   EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
-  EXPECT_FALSE(oat_file_assistant.OdexFileExists());
-  EXPECT_TRUE(oat_file_assistant.OdexFileIsOutOfDate());
-  EXPECT_FALSE(oat_file_assistant.OdexFileNeedsRelocation());
-  EXPECT_FALSE(oat_file_assistant.OdexFileIsUpToDate());
-  EXPECT_TRUE(oat_file_assistant.OatFileExists());
-  EXPECT_FALSE(oat_file_assistant.OatFileIsOutOfDate());
-  EXPECT_FALSE(oat_file_assistant.OatFileNeedsRelocation());
-  EXPECT_TRUE(oat_file_assistant.OatFileIsUpToDate());
+  EXPECT_EQ(OatFileAssistant::kOatCannotOpen, oat_file_assistant.OdexFileStatus());
+  EXPECT_EQ(OatFileAssistant::kOatUpToDate, oat_file_assistant.OatFileStatus());
   EXPECT_TRUE(oat_file_assistant.HasOriginalDexFiles());
 
   std::unique_ptr<OatFile> oat_file = oat_file_assistant.GetBestOatFile();
@@ -757,7 +793,7 @@
   OatFileAssistant oat_file_assistant(dex_location.c_str(),
       oat_location.c_str(), kRuntimeISA, true);
 
-  EXPECT_EQ(OatFileAssistant::kDex2OatNeeded,
+  EXPECT_EQ(OatFileAssistant::kDex2OatForRelocation,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
 
   // Make the oat file up to date.
@@ -778,7 +814,7 @@
 
 // Case: We have a DEX file, an ODEX file and an OAT file, where the ODEX and
 // OAT files both have patch delta of 0.
-// Expect: It shouldn't crash, and status is kPatchOatNeeded.
+// Expect: It shouldn't crash, and status is kSelfPatchOatNeeded.
 TEST_F(OatFileAssistantTest, OdexOatOverlap) {
   std::string dex_location = GetScratchDir() + "/OdexOatOverlap.jar";
   std::string odex_location = GetOdexDir() + "/OdexOatOverlap.odex";
@@ -796,16 +832,15 @@
   OatFileAssistant oat_file_assistant(dex_location.c_str(),
       oat_location.c_str(), kRuntimeISA, true);
 
-  EXPECT_EQ(OatFileAssistant::kPatchOatNeeded,
+  // kPatchoatForRelocation is expected rather than -kPatchoatForRelocation
+  // based on the assumption that the oat location is more up-to-date than the odex
+  // location, even if they both need relocation.
+  EXPECT_EQ(OatFileAssistant::kPatchoatForRelocation,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
 
   EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
-  EXPECT_TRUE(oat_file_assistant.OdexFileExists());
-  EXPECT_FALSE(oat_file_assistant.OdexFileIsOutOfDate());
-  EXPECT_FALSE(oat_file_assistant.OdexFileIsUpToDate());
-  EXPECT_TRUE(oat_file_assistant.OatFileExists());
-  EXPECT_FALSE(oat_file_assistant.OatFileIsOutOfDate());
-  EXPECT_FALSE(oat_file_assistant.OatFileIsUpToDate());
+  EXPECT_EQ(OatFileAssistant::kOatRelocationOutOfDate, oat_file_assistant.OdexFileStatus());
+  EXPECT_EQ(OatFileAssistant::kOatRelocationOutOfDate, oat_file_assistant.OatFileStatus());
   EXPECT_TRUE(oat_file_assistant.HasOriginalDexFiles());
 
   // Things aren't relocated, so it should fall back to interpreted.
@@ -833,16 +868,12 @@
 
   EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
-  EXPECT_EQ(OatFileAssistant::kDex2OatNeeded,
+  EXPECT_EQ(-OatFileAssistant::kDex2OatForFilter,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kEverything));
 
   EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
-  EXPECT_TRUE(oat_file_assistant.OdexFileExists());
-  EXPECT_FALSE(oat_file_assistant.OdexFileIsOutOfDate());
-  EXPECT_TRUE(oat_file_assistant.OdexFileIsUpToDate());
-  EXPECT_FALSE(oat_file_assistant.OatFileExists());
-  EXPECT_TRUE(oat_file_assistant.OatFileIsOutOfDate());
-  EXPECT_FALSE(oat_file_assistant.OatFileIsUpToDate());
+  EXPECT_EQ(OatFileAssistant::kOatUpToDate, oat_file_assistant.OdexFileStatus());
+  EXPECT_EQ(OatFileAssistant::kOatCannotOpen, oat_file_assistant.OatFileStatus());
   EXPECT_TRUE(oat_file_assistant.HasOriginalDexFiles());
 }
 
@@ -861,16 +892,12 @@
 
   EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kVerifyAtRuntime));
-  EXPECT_EQ(OatFileAssistant::kDex2OatNeeded,
+  EXPECT_EQ(-OatFileAssistant::kDex2OatForFilter,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
 
   EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
-  EXPECT_TRUE(oat_file_assistant.OdexFileExists());
-  EXPECT_FALSE(oat_file_assistant.OdexFileIsOutOfDate());
-  EXPECT_TRUE(oat_file_assistant.OdexFileIsUpToDate());
-  EXPECT_FALSE(oat_file_assistant.OatFileExists());
-  EXPECT_TRUE(oat_file_assistant.OatFileIsOutOfDate());
-  EXPECT_FALSE(oat_file_assistant.OatFileIsUpToDate());
+  EXPECT_EQ(OatFileAssistant::kOatUpToDate, oat_file_assistant.OdexFileStatus());
+  EXPECT_EQ(OatFileAssistant::kOatCannotOpen, oat_file_assistant.OatFileStatus());
   EXPECT_TRUE(oat_file_assistant.HasOriginalDexFiles());
 }
 
@@ -958,7 +985,7 @@
 
   // Verify it didn't create an oat in the default location.
   OatFileAssistant ofm(dex_location.c_str(), kRuntimeISA, false);
-  EXPECT_FALSE(ofm.OatFileExists());
+  EXPECT_EQ(OatFileAssistant::kOatCannotOpen, ofm.OatFileStatus());
 }
 
 // Case: We have a DEX file but can't write the oat file.
@@ -999,7 +1026,7 @@
 
 // Turn an absolute path into a path relative to the current working
 // directory.
-static std::string MakePathRelative(std::string target) {
+static std::string MakePathRelative(const std::string& target) {
   char buf[MAXPATHLEN];
   std::string cwd = getcwd(buf, MAXPATHLEN);
 
@@ -1043,14 +1070,10 @@
   OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true);
 
   EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
-  EXPECT_EQ(OatFileAssistant::kDex2OatNeeded,
+  EXPECT_EQ(OatFileAssistant::kDex2OatFromScratch,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
-  EXPECT_FALSE(oat_file_assistant.OdexFileExists());
-  EXPECT_FALSE(oat_file_assistant.OatFileExists());
-  EXPECT_TRUE(oat_file_assistant.OdexFileIsOutOfDate());
-  EXPECT_FALSE(oat_file_assistant.OdexFileIsUpToDate());
-  EXPECT_TRUE(oat_file_assistant.OatFileIsOutOfDate());
-  EXPECT_FALSE(oat_file_assistant.OatFileIsUpToDate());
+  EXPECT_EQ(OatFileAssistant::kOatCannotOpen, oat_file_assistant.OdexFileStatus());
+  EXPECT_EQ(OatFileAssistant::kOatCannotOpen, oat_file_assistant.OatFileStatus());
 }
 
 // Case: Very short, non-existent Dex location.
@@ -1063,12 +1086,8 @@
   EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
   EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
-  EXPECT_FALSE(oat_file_assistant.OdexFileExists());
-  EXPECT_FALSE(oat_file_assistant.OatFileExists());
-  EXPECT_TRUE(oat_file_assistant.OdexFileIsOutOfDate());
-  EXPECT_FALSE(oat_file_assistant.OdexFileIsUpToDate());
-  EXPECT_TRUE(oat_file_assistant.OatFileIsOutOfDate());
-  EXPECT_FALSE(oat_file_assistant.OatFileIsUpToDate());
+  EXPECT_EQ(OatFileAssistant::kOatCannotOpen, oat_file_assistant.OdexFileStatus());
+  EXPECT_EQ(OatFileAssistant::kOatCannotOpen, oat_file_assistant.OatFileStatus());
   EXPECT_FALSE(oat_file_assistant.HasOriginalDexFiles());
 
   // Trying to make it up to date should have no effect.
@@ -1087,16 +1106,12 @@
 
   OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false);
 
-  EXPECT_EQ(OatFileAssistant::kDex2OatNeeded,
+  EXPECT_EQ(OatFileAssistant::kDex2OatFromScratch,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
 
   EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
-  EXPECT_FALSE(oat_file_assistant.OdexFileExists());
-  EXPECT_TRUE(oat_file_assistant.OdexFileIsOutOfDate());
-  EXPECT_FALSE(oat_file_assistant.OdexFileIsUpToDate());
-  EXPECT_FALSE(oat_file_assistant.OatFileExists());
-  EXPECT_TRUE(oat_file_assistant.OatFileIsOutOfDate());
-  EXPECT_FALSE(oat_file_assistant.OatFileIsUpToDate());
+  EXPECT_EQ(OatFileAssistant::kOatCannotOpen, oat_file_assistant.OdexFileStatus());
+  EXPECT_EQ(OatFileAssistant::kOatCannotOpen, oat_file_assistant.OatFileStatus());
 }
 
 // A task to generate a dex location. Used by the RaceToGenerate test.
@@ -1226,7 +1241,7 @@
       oat_file_assistant.MakeUpToDate(false, &error_msg)) << error_msg;
   EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kInterpretOnly));
-  EXPECT_EQ(OatFileAssistant::kDex2OatNeeded,
+  EXPECT_EQ(OatFileAssistant::kDex2OatForFilter,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
 
   Runtime::Current()->AddCompilerOption("--compiler-filter=speed");
@@ -1263,6 +1278,15 @@
 // Verify the dexopt status values from dalvik.system.DexFile
 // match the OatFileAssistant::DexOptStatus values.
 TEST_F(OatFileAssistantTest, DexOptStatusValues) {
+  std::pair<OatFileAssistant::DexOptNeeded, const char*> mapping[] = {
+    {OatFileAssistant::kNoDexOptNeeded, "NO_DEXOPT_NEEDED"},
+    {OatFileAssistant::kDex2OatFromScratch, "DEX2OAT_FROM_SCRATCH"},
+    {OatFileAssistant::kDex2OatForBootImage, "DEX2OAT_FOR_BOOT_IMAGE"},
+    {OatFileAssistant::kDex2OatForFilter, "DEX2OAT_FOR_FILTER"},
+    {OatFileAssistant::kDex2OatForRelocation, "DEX2OAT_FOR_RELOCATION"},
+    {OatFileAssistant::kPatchoatForRelocation, "PATCHOAT_FOR_RELOCATION"}
+  };
+
   ScopedObjectAccess soa(Thread::Current());
   StackHandleScope<1> hs(soa.Self());
   ClassLinker* linker = Runtime::Current()->GetClassLinker();
@@ -1271,35 +1295,16 @@
   ASSERT_FALSE(dexfile.Get() == nullptr);
   linker->EnsureInitialized(soa.Self(), dexfile, true, true);
 
-  ArtField* no_dexopt_needed = mirror::Class::FindStaticField(
-      soa.Self(), dexfile, "NO_DEXOPT_NEEDED", "I");
-  ASSERT_FALSE(no_dexopt_needed == nullptr);
-  EXPECT_EQ(no_dexopt_needed->GetTypeAsPrimitiveType(), Primitive::kPrimInt);
-  EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded, no_dexopt_needed->GetInt(dexfile.Get()));
-
-  ArtField* dex2oat_needed = mirror::Class::FindStaticField(
-      soa.Self(), dexfile, "DEX2OAT_NEEDED", "I");
-  ASSERT_FALSE(dex2oat_needed == nullptr);
-  EXPECT_EQ(dex2oat_needed->GetTypeAsPrimitiveType(), Primitive::kPrimInt);
-  EXPECT_EQ(OatFileAssistant::kDex2OatNeeded, dex2oat_needed->GetInt(dexfile.Get()));
-
-  ArtField* patchoat_needed = mirror::Class::FindStaticField(
-      soa.Self(), dexfile, "PATCHOAT_NEEDED", "I");
-  ASSERT_FALSE(patchoat_needed == nullptr);
-  EXPECT_EQ(patchoat_needed->GetTypeAsPrimitiveType(), Primitive::kPrimInt);
-  EXPECT_EQ(OatFileAssistant::kPatchOatNeeded, patchoat_needed->GetInt(dexfile.Get()));
-
-  ArtField* self_patchoat_needed = mirror::Class::FindStaticField(
-      soa.Self(), dexfile, "SELF_PATCHOAT_NEEDED", "I");
-  ASSERT_FALSE(self_patchoat_needed == nullptr);
-  EXPECT_EQ(self_patchoat_needed->GetTypeAsPrimitiveType(), Primitive::kPrimInt);
-  EXPECT_EQ(OatFileAssistant::kSelfPatchOatNeeded, self_patchoat_needed->GetInt(dexfile.Get()));
+  for (std::pair<OatFileAssistant::DexOptNeeded, const char*> field : mapping) {
+    ArtField* art_field = mirror::Class::FindStaticField(
+        soa.Self(), dexfile, field.second, "I");
+    ASSERT_FALSE(art_field == nullptr);
+    EXPECT_EQ(art_field->GetTypeAsPrimitiveType(), Primitive::kPrimInt);
+    EXPECT_EQ(field.first, art_field->GetInt(dexfile.Get()));
+  }
 }
 
 // TODO: More Tests:
-//  * Image checksum change is out of date for kIntepretOnly, but not
-//    kVerifyAtRuntime. But target of kVerifyAtRuntime still says current
-//    kInterpretOnly is out of date.
 //  * Test class linker falls back to unquickened dex for DexNoOat
 //  * Test class linker falls back to unquickened dex for MultiDexNoOat
 //  * Test using secondary isa
@@ -1313,5 +1318,4 @@
 //    because it's unrelocated and no dex2oat
 //  * Test unrelocated specific target compilation type can be relocated to
 //    make it up to date.
-
 }  // namespace art
diff --git a/runtime/oat_file_manager.cc b/runtime/oat_file_manager.cc
index 68f71f7..5641459 100644
--- a/runtime/oat_file_manager.cc
+++ b/runtime/oat_file_manager.cc
@@ -28,6 +28,7 @@
 #include "gc/scoped_gc_critical_section.h"
 #include "gc/space/image_space.h"
 #include "handle_scope-inl.h"
+#include "jni_internal.h"
 #include "mirror/class_loader.h"
 #include "oat_file_assistant.h"
 #include "obj_ptr-inl.h"
@@ -224,9 +225,10 @@
   }
 }
 
+template <typename T>
 static void IterateOverJavaDexFile(ObjPtr<mirror::Object> dex_file,
                                    ArtField* const cookie_field,
-                                   std::function<bool(const DexFile*)> fn)
+                                   const T& fn)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   if (dex_file != nullptr) {
     mirror::LongArray* long_array = cookie_field->GetObject(dex_file)->AsLongArray();
@@ -247,26 +249,27 @@
   }
 }
 
+template <typename T>
 static void IterateOverPathClassLoader(
-    ScopedObjectAccessAlreadyRunnable& soa,
     Handle<mirror::ClassLoader> class_loader,
     MutableHandle<mirror::ObjectArray<mirror::Object>> dex_elements,
-    std::function<bool(const DexFile*)> fn) REQUIRES_SHARED(Locks::mutator_lock_) {
+    const T& fn) REQUIRES_SHARED(Locks::mutator_lock_) {
   // Handle this step.
   // Handle as if this is the child PathClassLoader.
   // The class loader is a PathClassLoader which inherits from BaseDexClassLoader.
   // We need to get the DexPathList and loop through it.
-  ArtField* const cookie_field = soa.DecodeField(WellKnownClasses::dalvik_system_DexFile_cookie);
+  ArtField* const cookie_field =
+      jni::DecodeArtField(WellKnownClasses::dalvik_system_DexFile_cookie);
   ArtField* const dex_file_field =
-      soa.DecodeField(WellKnownClasses::dalvik_system_DexPathList__Element_dexFile);
+      jni::DecodeArtField(WellKnownClasses::dalvik_system_DexPathList__Element_dexFile);
   ObjPtr<mirror::Object> dex_path_list =
-      soa.DecodeField(WellKnownClasses::dalvik_system_PathClassLoader_pathList)->
-      GetObject(class_loader.Get());
+      jni::DecodeArtField(WellKnownClasses::dalvik_system_BaseDexClassLoader_pathList)->
+          GetObject(class_loader.Get());
   if (dex_path_list != nullptr && dex_file_field != nullptr && cookie_field != nullptr) {
     // DexPathList has an array dexElements of Elements[] which each contain a dex file.
     ObjPtr<mirror::Object> dex_elements_obj =
-        soa.DecodeField(WellKnownClasses::dalvik_system_DexPathList_dexElements)->
-        GetObject(dex_path_list);
+        jni::DecodeArtField(WellKnownClasses::dalvik_system_DexPathList_dexElements)->
+            GetObject(dex_path_list);
     // Loop through each dalvik.system.DexPathList$Element's dalvik.system.DexFile and look
     // at the mCookie which is a DexFile vector.
     if (dex_elements_obj != nullptr) {
@@ -323,7 +326,7 @@
       hs.NewHandle<mirror::ObjectArray<mirror::Object>>(nullptr));
   Handle<mirror::ClassLoader> h_class_loader(hs.NewHandle(class_loader));
 
-  IterateOverPathClassLoader(soa, h_class_loader, dex_elements, GetDexFilesFn);
+  IterateOverPathClassLoader(h_class_loader, dex_elements, GetDexFilesFn);
 
   return true;
 }
@@ -337,9 +340,10 @@
     return;
   }
 
-  ArtField* const cookie_field = soa.DecodeField(WellKnownClasses::dalvik_system_DexFile_cookie);
+  ArtField* const cookie_field =
+      jni::DecodeArtField(WellKnownClasses::dalvik_system_DexFile_cookie);
   ArtField* const dex_file_field =
-      soa.DecodeField(WellKnownClasses::dalvik_system_DexPathList__Element_dexFile);
+      jni::DecodeArtField(WellKnownClasses::dalvik_system_DexPathList__Element_dexFile);
   ObjPtr<mirror::Class> const element_class = soa.Decode<mirror::Class>(
       WellKnownClasses::dalvik_system_DexPathList__Element);
   ObjPtr<mirror::Class> const dexfile_class = soa.Decode<mirror::Class>(
@@ -377,7 +381,7 @@
   }
 }
 
-static bool AreSharedLibrariesOk(const std::string shared_libraries,
+static bool AreSharedLibrariesOk(const std::string& shared_libraries,
                                  std::priority_queue<DexFileAndClassPair>& queue) {
   if (shared_libraries.empty()) {
     if (queue.empty()) {
@@ -398,10 +402,14 @@
     while (!temp.empty() && index < shared_libraries_split.size() - 1) {
       DexFileAndClassPair pair(temp.top());
       const DexFile* dex_file = pair.GetDexFile();
-      std::string dex_filename(dex_file->GetLocation());
+      const std::string& dex_filename = dex_file->GetLocation();
+      if (dex_filename != shared_libraries_split[index]) {
+        break;
+      }
+      char* end;
+      size_t shared_lib_checksum = strtoul(shared_libraries_split[index + 1].c_str(), &end, 10);
       uint32_t dex_checksum = dex_file->GetLocationChecksum();
-      if (dex_filename != shared_libraries_split[index] ||
-          dex_checksum != std::stoul(shared_libraries_split[index + 1])) {
+      if (*end != '\0' || dex_checksum != shared_lib_checksum) {
         break;
       }
       temp.pop();
diff --git a/runtime/oat_quick_method_header.h b/runtime/oat_quick_method_header.h
index ee5002f..3cdde5a 100644
--- a/runtime/oat_quick_method_header.h
+++ b/runtime/oat_quick_method_header.h
@@ -58,7 +58,7 @@
   }
 
   bool IsOptimized() const {
-    return code_size_ != 0 && vmap_table_offset_ != 0;
+    return GetCodeSize() != 0 && vmap_table_offset_ != 0;
   }
 
   const void* GetOptimizedCodeInfoPtr() const {
@@ -67,6 +67,11 @@
     return data;
   }
 
+  uint8_t* GetOptimizedCodeInfoPtr() {
+    DCHECK(IsOptimized());
+    return code_ - vmap_table_offset_;
+  }
+
   CodeInfo GetOptimizedCodeInfo() const {
     return CodeInfo(GetOptimizedCodeInfoPtr());
   }
@@ -76,7 +81,23 @@
   }
 
   uint32_t GetCodeSize() const {
-    return code_size_;
+    return code_size_ & kCodeSizeMask;
+  }
+
+  const uint32_t* GetCodeSizeAddr() const {
+    return &code_size_;
+  }
+
+  uint32_t GetVmapTableOffset() const {
+    return vmap_table_offset_;
+  }
+
+  void SetVmapTableOffset(uint32_t offset) {
+    vmap_table_offset_ = offset;
+  }
+
+  const uint32_t* GetVmapTableOffsetAddr() const {
+    return &vmap_table_offset_;
   }
 
   const uint8_t* GetVmapTable() const {
@@ -91,7 +112,7 @@
       // On Thumb-2, the pc is offset by one.
       code_start++;
     }
-    return code_start <= pc && pc <= (code_start + code_size_);
+    return code_start <= pc && pc <= (code_start + GetCodeSize());
   }
 
   const uint8_t* GetEntryPoint() const {
@@ -125,11 +146,25 @@
 
   uint32_t ToDexPc(ArtMethod* method, const uintptr_t pc, bool abort_on_failure = true) const;
 
+  void SetHasShouldDeoptimizeFlag() {
+    DCHECK_EQ(code_size_ & kShouldDeoptimizeMask, 0u);
+    code_size_ |= kShouldDeoptimizeMask;
+  }
+
+  bool HasShouldDeoptimizeFlag() const {
+    return (code_size_ & kShouldDeoptimizeMask) != 0;
+  }
+
+ private:
+  static constexpr uint32_t kShouldDeoptimizeMask = 0x80000000;
+  static constexpr uint32_t kCodeSizeMask = ~kShouldDeoptimizeMask;
+
   // The offset in bytes from the start of the vmap table to the end of the header.
   uint32_t vmap_table_offset_;
   // The stack frame information.
   QuickMethodFrameInfo frame_info_;
-  // The code size in bytes.
+  // The code size in bytes. The highest bit is used to signify if the compiled
+  // code with the method header has should_deoptimize flag.
   uint32_t code_size_;
   // The actual code.
   uint8_t code_[0];
diff --git a/runtime/obj_ptr.h b/runtime/obj_ptr.h
index 9318232..d24c6fb 100644
--- a/runtime/obj_ptr.h
+++ b/runtime/obj_ptr.h
@@ -20,6 +20,7 @@
 #include <ostream>
 #include <type_traits>
 
+#include "base/macros.h"
 #include "base/mutex.h"  // For Locks::mutator_lock_.
 #include "globals.h"
 
@@ -41,17 +42,26 @@
  public:
   ALWAYS_INLINE ObjPtr() REQUIRES_SHARED(Locks::mutator_lock_) : reference_(0u) {}
 
-  ALWAYS_INLINE ObjPtr(std::nullptr_t) REQUIRES_SHARED(Locks::mutator_lock_) : reference_(0u) {}
+  // Note: The following constructors allow implicit conversion. This simplifies code that uses
+  //       them, e.g., for parameter passing. However, in general, implicit-conversion constructors
+  //       are discouraged and detected by cpplint and clang-tidy. So mark these constructors
+  //       as NOLINT (without category, as the categories are different).
+
+  ALWAYS_INLINE ObjPtr(std::nullptr_t)  // NOLINT
+      REQUIRES_SHARED(Locks::mutator_lock_)
+      : reference_(0u) {}
 
   template <typename Type>
-  ALWAYS_INLINE ObjPtr(Type* ptr) REQUIRES_SHARED(Locks::mutator_lock_)
+  ALWAYS_INLINE ObjPtr(Type* ptr)  // NOLINT
+      REQUIRES_SHARED(Locks::mutator_lock_)
       : reference_(Encode(static_cast<MirrorType*>(ptr))) {
     static_assert(std::is_base_of<MirrorType, Type>::value,
                   "Input type must be a subtype of the ObjPtr type");
   }
 
   template <typename Type>
-  ALWAYS_INLINE ObjPtr(const ObjPtr<Type, kPoison>& other) REQUIRES_SHARED(Locks::mutator_lock_)
+  ALWAYS_INLINE ObjPtr(const ObjPtr<Type, kPoison>& other)  // NOLINT
+      REQUIRES_SHARED(Locks::mutator_lock_)
       : reference_(Encode(static_cast<MirrorType*>(other.Ptr()))) {
     static_assert(std::is_base_of<MirrorType, Type>::value,
                   "Input type must be a subtype of the ObjPtr type");
@@ -154,6 +164,9 @@
   uintptr_t reference_;
 };
 
+static_assert(std::is_trivially_copyable<ObjPtr<void>>::value,
+              "ObjPtr should be trivially copyable");
+
 // Hash function for stl data structures.
 class HashObjPtr {
  public:
diff --git a/runtime/object_lock.cc b/runtime/object_lock.cc
index b8754a4..39ab52f 100644
--- a/runtime/object_lock.cc
+++ b/runtime/object_lock.cc
@@ -17,6 +17,7 @@
 #include "object_lock.h"
 
 #include "mirror/object-inl.h"
+#include "mirror/class_ext.h"
 #include "monitor.h"
 
 namespace art {
@@ -61,6 +62,7 @@
 }
 
 template class ObjectLock<mirror::Class>;
+template class ObjectLock<mirror::ClassExt>;
 template class ObjectLock<mirror::Object>;
 template class ObjectTryLock<mirror::Class>;
 template class ObjectTryLock<mirror::Object>;
diff --git a/runtime/openjdkjvmti/Android.bp b/runtime/openjdkjvmti/Android.bp
index 5095cfd..0f9fbb2 100644
--- a/runtime/openjdkjvmti/Android.bp
+++ b/runtime/openjdkjvmti/Android.bp
@@ -18,12 +18,13 @@
     defaults: ["art_defaults"],
     host_supported: true,
     srcs: ["events.cc",
-           "heap.cc",
            "object_tagging.cc",
            "OpenjdkJvmTi.cc",
            "ti_class.cc",
+           "ti_heap.cc",
            "ti_method.cc",
            "ti_stack.cc",
+           "ti_redefine.cc",
            "transform.cc"],
     include_dirs: ["art/runtime"],
     shared_libs: [
diff --git a/runtime/openjdkjvmti/OpenjdkJvmTi.cc b/runtime/openjdkjvmti/OpenjdkJvmTi.cc
index 50b50d6..d1c2293 100644
--- a/runtime/openjdkjvmti/OpenjdkJvmTi.cc
+++ b/runtime/openjdkjvmti/OpenjdkJvmTi.cc
@@ -39,16 +39,17 @@
 #include "art_jvmti.h"
 #include "base/mutex.h"
 #include "events-inl.h"
-#include "heap.h"
 #include "jni_env_ext-inl.h"
-#include "object_tagging.h"
 #include "obj_ptr-inl.h"
+#include "object_tagging.h"
 #include "runtime.h"
 #include "scoped_thread_state_change-inl.h"
-#include "thread_list.h"
 #include "thread-inl.h"
+#include "thread_list.h"
 #include "ti_class.h"
+#include "ti_heap.h"
 #include "ti_method.h"
+#include "ti_redefine.h"
 #include "ti_stack.h"
 #include "transform.h"
 
@@ -61,20 +62,38 @@
 EventHandler gEventHandler;
 ObjectTagTable gObjectTagTable(&gEventHandler);
 
+#define ENSURE_NON_NULL(n)      \
+  do {                          \
+    if ((n) == nullptr) {       \
+      return ERR(NULL_POINTER); \
+    }                           \
+  } while (false)
+
 class JvmtiFunctions {
  private:
   static bool IsValidEnv(jvmtiEnv* env) {
     return env != nullptr;
   }
 
+#define ENSURE_VALID_ENV(env)          \
+  do {                                 \
+    if (!IsValidEnv(env)) {            \
+      return ERR(INVALID_ENVIRONMENT); \
+    }                                  \
+  } while (false)
+
+#define ENSURE_HAS_CAP(env, cap) \
+  do { \
+    ENSURE_VALID_ENV(env); \
+    if (ArtJvmTiEnv::AsArtJvmTiEnv(env)->capabilities.cap != 1) { \
+      return ERR(MUST_POSSESS_CAPABILITY); \
+    } \
+  } while (false)
+
  public:
   static jvmtiError Allocate(jvmtiEnv* env, jlong size, unsigned char** mem_ptr) {
-    if (!IsValidEnv(env)) {
-      return ERR(INVALID_ENVIRONMENT);
-    }
-    if (mem_ptr == nullptr) {
-      return ERR(NULL_POINTER);
-    }
+    ENSURE_VALID_ENV(env);
+    ENSURE_NON_NULL(mem_ptr);
     if (size < 0) {
       return ERR(ILLEGAL_ARGUMENT);
     } else if (size == 0) {
@@ -86,9 +105,7 @@
   }
 
   static jvmtiError Deallocate(jvmtiEnv* env, unsigned char* mem) {
-    if (!IsValidEnv(env)) {
-      return ERR(INVALID_ENVIRONMENT);
-    }
+    ENSURE_VALID_ENV(env);
     if (mem != nullptr) {
       free(mem);
     }
@@ -158,7 +175,7 @@
   static jvmtiError GetCurrentContendedMonitor(jvmtiEnv* env,
                                                jthread thread,
                                                jobject* monitor_ptr) {
-  return ERR(NOT_IMPLEMENTED);
+    return ERR(NOT_IMPLEMENTED);
   }
 
   static jvmtiError RunAgentThread(jvmtiEnv* env,
@@ -277,7 +294,13 @@
                                      jobject initial_object,
                                      const jvmtiHeapCallbacks* callbacks,
                                      const void* user_data) {
-    return ERR(NOT_IMPLEMENTED);
+    HeapUtil heap_util(&gObjectTagTable);
+    return heap_util.FollowReferences(env,
+                                      heap_filter,
+                                      klass,
+                                      initial_object,
+                                      callbacks,
+                                      user_data);
   }
 
   static jvmtiError IterateThroughHeap(jvmtiEnv* env,
@@ -285,14 +308,13 @@
                                        jclass klass,
                                        const jvmtiHeapCallbacks* callbacks,
                                        const void* user_data) {
+    ENSURE_HAS_CAP(env, can_tag_objects);
     HeapUtil heap_util(&gObjectTagTable);
     return heap_util.IterateThroughHeap(env, heap_filter, klass, callbacks, user_data);
   }
 
   static jvmtiError GetTag(jvmtiEnv* env, jobject object, jlong* tag_ptr) {
-    if (object == nullptr || tag_ptr == nullptr) {
-      return ERR(NULL_POINTER);
-    }
+    ENSURE_HAS_CAP(env, can_tag_objects);
 
     JNIEnv* jni_env = GetJniEnv(env);
     if (jni_env == nullptr) {
@@ -309,6 +331,8 @@
   }
 
   static jvmtiError SetTag(jvmtiEnv* env, jobject object, jlong tag) {
+    ENSURE_HAS_CAP(env, can_tag_objects);
+
     if (object == nullptr) {
       return ERR(NULL_POINTER);
     }
@@ -331,6 +355,8 @@
                                        jint* count_ptr,
                                        jobject** object_result_ptr,
                                        jlong** tag_result_ptr) {
+    ENSURE_HAS_CAP(env, can_tag_objects);
+
     JNIEnv* jni_env = GetJniEnv(env);
     if (jni_env == nullptr) {
       return ERR(INTERNAL);
@@ -346,7 +372,7 @@
   }
 
   static jvmtiError ForceGarbageCollection(jvmtiEnv* env) {
-    return ERR(NOT_IMPLEMENTED);
+    return HeapUtil::ForceGarbageCollection(env);
   }
 
   static jvmtiError IterateOverObjectsReachableFromObject(
@@ -759,9 +785,7 @@
   static jvmtiError SetEventCallbacks(jvmtiEnv* env,
                                       const jvmtiEventCallbacks* callbacks,
                                       jint size_of_callbacks) {
-    if (env == nullptr) {
-      return ERR(NULL_POINTER);
-    }
+    ENSURE_VALID_ENV(env);
     if (size_of_callbacks < 0) {
       return ERR(ILLEGAL_ARGUMENT);
     }
@@ -788,6 +812,8 @@
                                              jvmtiEvent event_type,
                                              jthread event_thread,
                                              ...) {
+    ENSURE_VALID_ENV(env);
+    // TODO: Check for capabilities.
     art::Thread* art_thread = nullptr;
     if (event_thread != nullptr) {
       // TODO: Need non-aborting call here, to return JVMTI_ERROR_INVALID_THREAD.
@@ -828,20 +854,136 @@
   }
 
   static jvmtiError GetPotentialCapabilities(jvmtiEnv* env, jvmtiCapabilities* capabilities_ptr) {
-    return ERR(NOT_IMPLEMENTED);
+    ENSURE_VALID_ENV(env);
+    ENSURE_NON_NULL(capabilities_ptr);
+    *capabilities_ptr = kPotentialCapabilities;
+    return OK;
   }
 
   static jvmtiError AddCapabilities(jvmtiEnv* env, const jvmtiCapabilities* capabilities_ptr) {
-    return ERR(NOT_IMPLEMENTED);
+    ENSURE_VALID_ENV(env);
+    ENSURE_NON_NULL(capabilities_ptr);
+    ArtJvmTiEnv* art_env = static_cast<ArtJvmTiEnv*>(env);
+    jvmtiError ret = OK;
+#define ADD_CAPABILITY(e) \
+    do { \
+      if (capabilities_ptr->e == 1) { \
+        if (kPotentialCapabilities.e == 1) { \
+          art_env->capabilities.e = 1;\
+        } else { \
+          ret = ERR(NOT_AVAILABLE); \
+        } \
+      } \
+    } while (false)
+
+    ADD_CAPABILITY(can_tag_objects);
+    ADD_CAPABILITY(can_generate_field_modification_events);
+    ADD_CAPABILITY(can_generate_field_access_events);
+    ADD_CAPABILITY(can_get_bytecodes);
+    ADD_CAPABILITY(can_get_synthetic_attribute);
+    ADD_CAPABILITY(can_get_owned_monitor_info);
+    ADD_CAPABILITY(can_get_current_contended_monitor);
+    ADD_CAPABILITY(can_get_monitor_info);
+    ADD_CAPABILITY(can_pop_frame);
+    ADD_CAPABILITY(can_redefine_classes);
+    ADD_CAPABILITY(can_signal_thread);
+    ADD_CAPABILITY(can_get_source_file_name);
+    ADD_CAPABILITY(can_get_line_numbers);
+    ADD_CAPABILITY(can_get_source_debug_extension);
+    ADD_CAPABILITY(can_access_local_variables);
+    ADD_CAPABILITY(can_maintain_original_method_order);
+    ADD_CAPABILITY(can_generate_single_step_events);
+    ADD_CAPABILITY(can_generate_exception_events);
+    ADD_CAPABILITY(can_generate_frame_pop_events);
+    ADD_CAPABILITY(can_generate_breakpoint_events);
+    ADD_CAPABILITY(can_suspend);
+    ADD_CAPABILITY(can_redefine_any_class);
+    ADD_CAPABILITY(can_get_current_thread_cpu_time);
+    ADD_CAPABILITY(can_get_thread_cpu_time);
+    ADD_CAPABILITY(can_generate_method_entry_events);
+    ADD_CAPABILITY(can_generate_method_exit_events);
+    ADD_CAPABILITY(can_generate_all_class_hook_events);
+    ADD_CAPABILITY(can_generate_compiled_method_load_events);
+    ADD_CAPABILITY(can_generate_monitor_events);
+    ADD_CAPABILITY(can_generate_vm_object_alloc_events);
+    ADD_CAPABILITY(can_generate_native_method_bind_events);
+    ADD_CAPABILITY(can_generate_garbage_collection_events);
+    ADD_CAPABILITY(can_generate_object_free_events);
+    ADD_CAPABILITY(can_force_early_return);
+    ADD_CAPABILITY(can_get_owned_monitor_stack_depth_info);
+    ADD_CAPABILITY(can_get_constant_pool);
+    ADD_CAPABILITY(can_set_native_method_prefix);
+    ADD_CAPABILITY(can_retransform_classes);
+    ADD_CAPABILITY(can_retransform_any_class);
+    ADD_CAPABILITY(can_generate_resource_exhaustion_heap_events);
+    ADD_CAPABILITY(can_generate_resource_exhaustion_threads_events);
+#undef ADD_CAPABILITY
+    return ret;
   }
 
   static jvmtiError RelinquishCapabilities(jvmtiEnv* env,
                                            const jvmtiCapabilities* capabilities_ptr) {
-    return ERR(NOT_IMPLEMENTED);
+    ENSURE_VALID_ENV(env);
+    ENSURE_NON_NULL(capabilities_ptr);
+    ArtJvmTiEnv* art_env = reinterpret_cast<ArtJvmTiEnv*>(env);
+#define DEL_CAPABILITY(e) \
+    do { \
+      if (capabilities_ptr->e == 1) { \
+        art_env->capabilities.e = 0;\
+      } \
+    } while (false)
+
+    DEL_CAPABILITY(can_tag_objects);
+    DEL_CAPABILITY(can_generate_field_modification_events);
+    DEL_CAPABILITY(can_generate_field_access_events);
+    DEL_CAPABILITY(can_get_bytecodes);
+    DEL_CAPABILITY(can_get_synthetic_attribute);
+    DEL_CAPABILITY(can_get_owned_monitor_info);
+    DEL_CAPABILITY(can_get_current_contended_monitor);
+    DEL_CAPABILITY(can_get_monitor_info);
+    DEL_CAPABILITY(can_pop_frame);
+    DEL_CAPABILITY(can_redefine_classes);
+    DEL_CAPABILITY(can_signal_thread);
+    DEL_CAPABILITY(can_get_source_file_name);
+    DEL_CAPABILITY(can_get_line_numbers);
+    DEL_CAPABILITY(can_get_source_debug_extension);
+    DEL_CAPABILITY(can_access_local_variables);
+    DEL_CAPABILITY(can_maintain_original_method_order);
+    DEL_CAPABILITY(can_generate_single_step_events);
+    DEL_CAPABILITY(can_generate_exception_events);
+    DEL_CAPABILITY(can_generate_frame_pop_events);
+    DEL_CAPABILITY(can_generate_breakpoint_events);
+    DEL_CAPABILITY(can_suspend);
+    DEL_CAPABILITY(can_redefine_any_class);
+    DEL_CAPABILITY(can_get_current_thread_cpu_time);
+    DEL_CAPABILITY(can_get_thread_cpu_time);
+    DEL_CAPABILITY(can_generate_method_entry_events);
+    DEL_CAPABILITY(can_generate_method_exit_events);
+    DEL_CAPABILITY(can_generate_all_class_hook_events);
+    DEL_CAPABILITY(can_generate_compiled_method_load_events);
+    DEL_CAPABILITY(can_generate_monitor_events);
+    DEL_CAPABILITY(can_generate_vm_object_alloc_events);
+    DEL_CAPABILITY(can_generate_native_method_bind_events);
+    DEL_CAPABILITY(can_generate_garbage_collection_events);
+    DEL_CAPABILITY(can_generate_object_free_events);
+    DEL_CAPABILITY(can_force_early_return);
+    DEL_CAPABILITY(can_get_owned_monitor_stack_depth_info);
+    DEL_CAPABILITY(can_get_constant_pool);
+    DEL_CAPABILITY(can_set_native_method_prefix);
+    DEL_CAPABILITY(can_retransform_classes);
+    DEL_CAPABILITY(can_retransform_any_class);
+    DEL_CAPABILITY(can_generate_resource_exhaustion_heap_events);
+    DEL_CAPABILITY(can_generate_resource_exhaustion_threads_events);
+#undef DEL_CAPABILITY
+    return OK;
   }
 
   static jvmtiError GetCapabilities(jvmtiEnv* env, jvmtiCapabilities* capabilities_ptr) {
-    return ERR(NOT_IMPLEMENTED);
+    ENSURE_VALID_ENV(env);
+    ENSURE_NON_NULL(capabilities_ptr);
+    ArtJvmTiEnv* artenv = reinterpret_cast<ArtJvmTiEnv*>(env);
+    *capabilities_ptr = artenv->capabilities;
+    return OK;
   }
 
   static jvmtiError GetCurrentThreadCpuTimerInfo(jvmtiEnv* env, jvmtiTimerInfo* info_ptr) {
@@ -897,44 +1039,31 @@
   }
 
   static jvmtiError DisposeEnvironment(jvmtiEnv* env) {
-    if (!IsValidEnv(env)) {
-      return ERR(INVALID_ENVIRONMENT);
-    }
+    ENSURE_VALID_ENV(env);
     delete env;
     return OK;
   }
 
   static jvmtiError SetEnvironmentLocalStorage(jvmtiEnv* env, const void* data) {
-    if (!IsValidEnv(env)) {
-      return ERR(INVALID_ENVIRONMENT);
-    }
+    ENSURE_VALID_ENV(env);
     reinterpret_cast<ArtJvmTiEnv*>(env)->local_data = const_cast<void*>(data);
     return OK;
   }
 
   static jvmtiError GetEnvironmentLocalStorage(jvmtiEnv* env, void** data_ptr) {
-    if (!IsValidEnv(env)) {
-      return ERR(INVALID_ENVIRONMENT);
-    }
+    ENSURE_VALID_ENV(env);
     *data_ptr = reinterpret_cast<ArtJvmTiEnv*>(env)->local_data;
     return OK;
   }
 
   static jvmtiError GetVersionNumber(jvmtiEnv* env, jint* version_ptr) {
-    if (!IsValidEnv(env)) {
-      return ERR(INVALID_ENVIRONMENT);
-    }
+    ENSURE_VALID_ENV(env);
     *version_ptr = JVMTI_VERSION;
     return OK;
   }
 
   static jvmtiError GetErrorName(jvmtiEnv* env, jvmtiError error,  char** name_ptr) {
-    if (!IsValidEnv(env)) {
-      return ERR(INVALID_ENVIRONMENT);
-    }
-    if (name_ptr == nullptr) {
-      return ERR(NULL_POINTER);
-    }
+    ENSURE_NON_NULL(name_ptr);
     switch (error) {
 #define ERROR_CASE(e) case (JVMTI_ERROR_ ## e) : do { \
           *name_ptr = const_cast<char*>("JVMTI_ERROR_"#e); \
@@ -1020,6 +1149,8 @@
     if (!IsValidEnv(env)) {
       return ERR(INVALID_ENVIRONMENT);
     }
+    jvmtiError res = OK;
+    std::string error;
     for (jclass klass : classes) {
       JNIEnv* jni_env = nullptr;
       jobject loader = nullptr;
@@ -1055,11 +1186,22 @@
            /*out*/&new_dex_data);
       // Check if anything actually changed.
       if ((new_data_len != 0 || new_dex_data != nullptr) && new_dex_data != dex_data) {
-        MoveTransformedFileIntoRuntime(klass, std::move(location), new_data_len, new_dex_data);
+        res = Redefiner::RedefineClass(env,
+                                       art::Runtime::Current(),
+                                       art::Thread::Current(),
+                                       klass,
+                                       location,
+                                       new_data_len,
+                                       new_dex_data,
+                                       &error);
         env->Deallocate(new_dex_data);
       }
       // Deallocate the old dex data.
       env->Deallocate(dex_data);
+      if (res != OK) {
+        LOG(ERROR) << "FAILURE TO REDEFINE " << error;
+        return res;
+      }
     }
     return OK;
   }
diff --git a/runtime/openjdkjvmti/art_jvmti.h b/runtime/openjdkjvmti/art_jvmti.h
index a321124..48b29a3 100644
--- a/runtime/openjdkjvmti/art_jvmti.h
+++ b/runtime/openjdkjvmti/art_jvmti.h
@@ -52,11 +52,13 @@
 struct ArtJvmTiEnv : public jvmtiEnv {
   art::JavaVMExt* art_vm;
   void* local_data;
+  jvmtiCapabilities capabilities;
 
   EventMasks event_masks;
   std::unique_ptr<jvmtiEventCallbacks> event_callbacks;
 
-  explicit ArtJvmTiEnv(art::JavaVMExt* runtime) : art_vm(runtime), local_data(nullptr) {
+  explicit ArtJvmTiEnv(art::JavaVMExt* runtime)
+      : art_vm(runtime), local_data(nullptr), capabilities() {
     functions = &gJvmtiInterface;
   }
 
@@ -121,6 +123,50 @@
   return ret;
 }
 
+const jvmtiCapabilities kPotentialCapabilities = {
+    .can_tag_objects                                 = 1,
+    .can_generate_field_modification_events          = 0,
+    .can_generate_field_access_events                = 0,
+    .can_get_bytecodes                               = 0,
+    .can_get_synthetic_attribute                     = 0,
+    .can_get_owned_monitor_info                      = 0,
+    .can_get_current_contended_monitor               = 0,
+    .can_get_monitor_info                            = 0,
+    .can_pop_frame                                   = 0,
+    .can_redefine_classes                            = 0,
+    .can_signal_thread                               = 0,
+    .can_get_source_file_name                        = 0,
+    .can_get_line_numbers                            = 0,
+    .can_get_source_debug_extension                  = 0,
+    .can_access_local_variables                      = 0,
+    .can_maintain_original_method_order              = 0,
+    .can_generate_single_step_events                 = 0,
+    .can_generate_exception_events                   = 0,
+    .can_generate_frame_pop_events                   = 0,
+    .can_generate_breakpoint_events                  = 0,
+    .can_suspend                                     = 0,
+    .can_redefine_any_class                          = 0,
+    .can_get_current_thread_cpu_time                 = 0,
+    .can_get_thread_cpu_time                         = 0,
+    .can_generate_method_entry_events                = 0,
+    .can_generate_method_exit_events                 = 0,
+    .can_generate_all_class_hook_events              = 0,
+    .can_generate_compiled_method_load_events        = 0,
+    .can_generate_monitor_events                     = 0,
+    .can_generate_vm_object_alloc_events             = 0,
+    .can_generate_native_method_bind_events          = 0,
+    .can_generate_garbage_collection_events          = 0,
+    .can_generate_object_free_events                 = 0,
+    .can_force_early_return                          = 0,
+    .can_get_owned_monitor_stack_depth_info          = 0,
+    .can_get_constant_pool                           = 0,
+    .can_set_native_method_prefix                    = 0,
+    .can_retransform_classes                         = 0,
+    .can_retransform_any_class                       = 0,
+    .can_generate_resource_exhaustion_heap_events    = 0,
+    .can_generate_resource_exhaustion_threads_events = 0,
+};
+
 }  // namespace openjdkjvmti
 
 #endif  // ART_RUNTIME_OPENJDKJVMTI_ART_JVMTI_H_
diff --git a/runtime/openjdkjvmti/heap.cc b/runtime/openjdkjvmti/heap.cc
deleted file mode 100644
index 1799e19..0000000
--- a/runtime/openjdkjvmti/heap.cc
+++ /dev/null
@@ -1,213 +0,0 @@
-/*
- * Copyright (C) 2016 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "heap.h"
-
-#include "art_jvmti.h"
-#include "base/macros.h"
-#include "base/mutex.h"
-#include "class_linker.h"
-#include "gc/heap.h"
-#include "jni_env_ext.h"
-#include "mirror/class.h"
-#include "object_callbacks.h"
-#include "object_tagging.h"
-#include "obj_ptr-inl.h"
-#include "runtime.h"
-#include "scoped_thread_state_change-inl.h"
-#include "thread-inl.h"
-
-namespace openjdkjvmti {
-
-struct IterateThroughHeapData {
-  IterateThroughHeapData(HeapUtil* _heap_util,
-                         jint heap_filter,
-                         art::ObjPtr<art::mirror::Class> klass,
-                         const jvmtiHeapCallbacks* _callbacks,
-                         const void* _user_data)
-      : heap_util(_heap_util),
-        filter_klass(klass),
-        callbacks(_callbacks),
-        user_data(_user_data),
-        filter_out_tagged((heap_filter & JVMTI_HEAP_FILTER_TAGGED) != 0),
-        filter_out_untagged((heap_filter & JVMTI_HEAP_FILTER_UNTAGGED) != 0),
-        filter_out_class_tagged((heap_filter & JVMTI_HEAP_FILTER_CLASS_TAGGED) != 0),
-        filter_out_class_untagged((heap_filter & JVMTI_HEAP_FILTER_CLASS_UNTAGGED) != 0),
-        any_filter(filter_out_tagged ||
-                   filter_out_untagged ||
-                   filter_out_class_tagged ||
-                   filter_out_class_untagged),
-        stop_reports(false) {
-  }
-
-  bool ShouldReportByHeapFilter(jlong tag, jlong class_tag) {
-    if (!any_filter) {
-      return true;
-    }
-
-    if ((tag == 0 && filter_out_untagged) || (tag != 0 && filter_out_tagged)) {
-      return false;
-    }
-
-    if ((class_tag == 0 && filter_out_class_untagged) ||
-        (class_tag != 0 && filter_out_class_tagged)) {
-      return false;
-    }
-
-    return true;
-  }
-
-  HeapUtil* heap_util;
-  art::ObjPtr<art::mirror::Class> filter_klass;
-  const jvmtiHeapCallbacks* callbacks;
-  const void* user_data;
-  const bool filter_out_tagged;
-  const bool filter_out_untagged;
-  const bool filter_out_class_tagged;
-  const bool filter_out_class_untagged;
-  const bool any_filter;
-
-  bool stop_reports;
-};
-
-static void IterateThroughHeapObjectCallback(art::mirror::Object* obj, void* arg)
-    REQUIRES_SHARED(art::Locks::mutator_lock_) {
-  IterateThroughHeapData* ithd = reinterpret_cast<IterateThroughHeapData*>(arg);
-  // Early return, as we can't really stop visiting.
-  if (ithd->stop_reports) {
-    return;
-  }
-
-  art::ScopedAssertNoThreadSuspension no_suspension("IterateThroughHeapCallback");
-
-  jlong tag = 0;
-  ithd->heap_util->GetTags()->GetTag(obj, &tag);
-
-  jlong class_tag = 0;
-  art::ObjPtr<art::mirror::Class> klass = obj->GetClass();
-  ithd->heap_util->GetTags()->GetTag(klass.Ptr(), &class_tag);
-  // For simplicity, even if we find a tag = 0, assume 0 = not tagged.
-
-  if (!ithd->ShouldReportByHeapFilter(tag, class_tag)) {
-    return;
-  }
-
-  // TODO: Handle array_primitive_value_callback.
-
-  if (ithd->filter_klass != nullptr) {
-    if (ithd->filter_klass != klass) {
-      return;
-    }
-  }
-
-  jlong size = obj->SizeOf();
-
-  jint length = -1;
-  if (obj->IsArrayInstance()) {
-    length = obj->AsArray()->GetLength();
-  }
-
-  jlong saved_tag = tag;
-  jint ret = ithd->callbacks->heap_iteration_callback(class_tag,
-                                                      size,
-                                                      &tag,
-                                                      length,
-                                                      const_cast<void*>(ithd->user_data));
-
-  if (tag != saved_tag) {
-    ithd->heap_util->GetTags()->Set(obj, tag);
-  }
-
-  ithd->stop_reports = (ret & JVMTI_VISIT_ABORT) != 0;
-
-  // TODO Implement array primitive and string primitive callback.
-  // TODO Implement primitive field callback.
-}
-
-jvmtiError HeapUtil::IterateThroughHeap(jvmtiEnv* env ATTRIBUTE_UNUSED,
-                                        jint heap_filter,
-                                        jclass klass,
-                                        const jvmtiHeapCallbacks* callbacks,
-                                        const void* user_data) {
-  if (callbacks == nullptr) {
-    return ERR(NULL_POINTER);
-  }
-
-  if (callbacks->array_primitive_value_callback != nullptr) {
-    // TODO: Implement.
-    return ERR(NOT_IMPLEMENTED);
-  }
-
-  art::Thread* self = art::Thread::Current();
-  art::ScopedObjectAccess soa(self);      // Now we know we have the shared lock.
-
-  IterateThroughHeapData ithd(this,
-                              heap_filter,
-                              soa.Decode<art::mirror::Class>(klass),
-                              callbacks,
-                              user_data);
-
-  art::Runtime::Current()->GetHeap()->VisitObjects(IterateThroughHeapObjectCallback, &ithd);
-
-  return ERR(NONE);
-}
-
-jvmtiError HeapUtil::GetLoadedClasses(jvmtiEnv* env,
-                                      jint* class_count_ptr,
-                                      jclass** classes_ptr) {
-  if (class_count_ptr == nullptr || classes_ptr == nullptr) {
-    return ERR(NULL_POINTER);
-  }
-
-  class ReportClassVisitor : public art::ClassVisitor {
-   public:
-    explicit ReportClassVisitor(art::Thread* self) : self_(self) {}
-
-    bool operator()(art::ObjPtr<art::mirror::Class> klass)
-        OVERRIDE REQUIRES_SHARED(art::Locks::mutator_lock_) {
-      classes_.push_back(self_->GetJniEnv()->AddLocalReference<jclass>(klass));
-      return true;
-    }
-
-    art::Thread* self_;
-    std::vector<jclass> classes_;
-  };
-
-  art::Thread* self = art::Thread::Current();
-  ReportClassVisitor rcv(self);
-  {
-    art::ScopedObjectAccess soa(self);
-    art::Runtime::Current()->GetClassLinker()->VisitClasses(&rcv);
-  }
-
-  size_t size = rcv.classes_.size();
-  jclass* classes = nullptr;
-  jvmtiError alloc_ret = env->Allocate(static_cast<jlong>(size * sizeof(jclass)),
-                                       reinterpret_cast<unsigned char**>(&classes));
-  if (alloc_ret != ERR(NONE)) {
-    return alloc_ret;
-  }
-
-  for (size_t i = 0; i < size; ++i) {
-    classes[i] = rcv.classes_[i];
-  }
-  *classes_ptr = classes;
-  *class_count_ptr = static_cast<jint>(size);
-
-  return ERR(NONE);
-}
-
-}  // namespace openjdkjvmti
diff --git a/runtime/openjdkjvmti/object_tagging.cc b/runtime/openjdkjvmti/object_tagging.cc
index b359f36..b983e79 100644
--- a/runtime/openjdkjvmti/object_tagging.cc
+++ b/runtime/openjdkjvmti/object_tagging.cc
@@ -47,6 +47,16 @@
 
 namespace openjdkjvmti {
 
+void ObjectTagTable::Lock() {
+  allow_disallow_lock_.ExclusiveLock(art::Thread::Current());
+}
+void ObjectTagTable::Unlock() {
+  allow_disallow_lock_.ExclusiveUnlock(art::Thread::Current());
+}
+void ObjectTagTable::AssertLocked() {
+  allow_disallow_lock_.AssertHeld(art::Thread::Current());
+}
+
 void ObjectTagTable::UpdateTableWithReadBarrier() {
   update_since_last_sweep_ = true;
 
@@ -80,6 +90,13 @@
 
   return RemoveLocked(self, obj, tag);
 }
+bool ObjectTagTable::RemoveLocked(art::mirror::Object* obj, jlong* tag) {
+  art::Thread* self = art::Thread::Current();
+  allow_disallow_lock_.AssertHeld(self);
+  Wait(self);
+
+  return RemoveLocked(self, obj, tag);
+}
 
 bool ObjectTagTable::RemoveLocked(art::Thread* self, art::mirror::Object* obj, jlong* tag) {
   auto it = tagged_objects_.find(art::GcRoot<art::mirror::Object>(obj));
@@ -109,12 +126,29 @@
 }
 
 bool ObjectTagTable::Set(art::mirror::Object* obj, jlong new_tag) {
+  if (new_tag == 0) {
+    jlong tmp;
+    return Remove(obj, &tmp);
+  }
+
   art::Thread* self = art::Thread::Current();
   art::MutexLock mu(self, allow_disallow_lock_);
   Wait(self);
 
   return SetLocked(self, obj, new_tag);
 }
+bool ObjectTagTable::SetLocked(art::mirror::Object* obj, jlong new_tag) {
+  if (new_tag == 0) {
+    jlong tmp;
+    return RemoveLocked(obj, &tmp);
+  }
+
+  art::Thread* self = art::Thread::Current();
+  allow_disallow_lock_.AssertHeld(self);
+  Wait(self);
+
+  return SetLocked(self, obj, new_tag);
+}
 
 bool ObjectTagTable::SetLocked(art::Thread* self, art::mirror::Object* obj, jlong new_tag) {
   auto it = tagged_objects_.find(art::GcRoot<art::mirror::Object>(obj));
diff --git a/runtime/openjdkjvmti/object_tagging.h b/runtime/openjdkjvmti/object_tagging.h
index 071d139..0296f1a 100644
--- a/runtime/openjdkjvmti/object_tagging.h
+++ b/runtime/openjdkjvmti/object_tagging.h
@@ -34,7 +34,7 @@
 class ObjectTagTable : public art::gc::SystemWeakHolder {
  public:
   explicit ObjectTagTable(EventHandler* event_handler)
-      : art::gc::SystemWeakHolder(art::LockLevel::kAllocTrackerLock),
+      : art::gc::SystemWeakHolder(kTaggingLockLevel),
         update_since_last_sweep_(false),
         event_handler_(event_handler) {
   }
@@ -46,10 +46,16 @@
   bool Remove(art::mirror::Object* obj, jlong* tag)
       REQUIRES_SHARED(art::Locks::mutator_lock_)
       REQUIRES(!allow_disallow_lock_);
+  bool RemoveLocked(art::mirror::Object* obj, jlong* tag)
+      REQUIRES_SHARED(art::Locks::mutator_lock_)
+      REQUIRES(allow_disallow_lock_);
 
   bool Set(art::mirror::Object* obj, jlong tag)
       REQUIRES_SHARED(art::Locks::mutator_lock_)
       REQUIRES(!allow_disallow_lock_);
+  bool SetLocked(art::mirror::Object* obj, jlong tag)
+      REQUIRES_SHARED(art::Locks::mutator_lock_)
+      REQUIRES(allow_disallow_lock_);
 
   bool GetTag(art::mirror::Object* obj, jlong* result)
       REQUIRES_SHARED(art::Locks::mutator_lock_)
@@ -60,6 +66,30 @@
 
     return GetTagLocked(self, obj, result);
   }
+  bool GetTagLocked(art::mirror::Object* obj, jlong* result)
+      REQUIRES_SHARED(art::Locks::mutator_lock_)
+      REQUIRES(allow_disallow_lock_) {
+    art::Thread* self = art::Thread::Current();
+    allow_disallow_lock_.AssertHeld(self);
+    Wait(self);
+
+    return GetTagLocked(self, obj, result);
+  }
+
+  jlong GetTagOrZero(art::mirror::Object* obj)
+      REQUIRES_SHARED(art::Locks::mutator_lock_)
+      REQUIRES(!allow_disallow_lock_) {
+    jlong tmp = 0;
+    GetTag(obj, &tmp);
+    return tmp;
+  }
+  jlong GetTagOrZeroLocked(art::mirror::Object* obj)
+      REQUIRES_SHARED(art::Locks::mutator_lock_)
+      REQUIRES(allow_disallow_lock_) {
+    jlong tmp = 0;
+    GetTagLocked(obj, &tmp);
+    return tmp;
+  }
 
   void Sweep(art::IsMarkedVisitor* visitor)
       REQUIRES_SHARED(art::Locks::mutator_lock_)
@@ -74,6 +104,10 @@
       REQUIRES_SHARED(art::Locks::mutator_lock_)
       REQUIRES(!allow_disallow_lock_);
 
+  void Lock() ACQUIRE(allow_disallow_lock_);
+  void Unlock() RELEASE(allow_disallow_lock_);
+  void AssertLocked() ASSERT_CAPABILITY(allow_disallow_lock_);
+
  private:
   bool SetLocked(art::Thread* self, art::mirror::Object* obj, jlong tag)
       REQUIRES_SHARED(art::Locks::mutator_lock_)
@@ -146,6 +180,10 @@
     }
   };
 
+  // The tag table is used when visiting roots. So it needs to have a low lock level.
+  static constexpr art::LockLevel kTaggingLockLevel =
+      static_cast<art::LockLevel>(art::LockLevel::kAbortLock + 1);
+
   std::unordered_map<art::GcRoot<art::mirror::Object>,
                      jlong,
                      HashGcRoot,
diff --git a/runtime/openjdkjvmti/ti_heap.cc b/runtime/openjdkjvmti/ti_heap.cc
new file mode 100644
index 0000000..5e588a8
--- /dev/null
+++ b/runtime/openjdkjvmti/ti_heap.cc
@@ -0,0 +1,717 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ti_heap.h"
+
+#include "art_field-inl.h"
+#include "art_jvmti.h"
+#include "base/macros.h"
+#include "base/mutex.h"
+#include "class_linker.h"
+#include "gc/heap.h"
+#include "gc_root-inl.h"
+#include "jni_env_ext.h"
+#include "jni_internal.h"
+#include "mirror/class.h"
+#include "mirror/object-inl.h"
+#include "mirror/object_array-inl.h"
+#include "object_callbacks.h"
+#include "object_tagging.h"
+#include "obj_ptr-inl.h"
+#include "runtime.h"
+#include "scoped_thread_state_change-inl.h"
+#include "thread-inl.h"
+#include "thread_list.h"
+
+namespace openjdkjvmti {
+
+struct IterateThroughHeapData {
+  IterateThroughHeapData(HeapUtil* _heap_util,
+                         jint heap_filter,
+                         art::ObjPtr<art::mirror::Class> klass,
+                         const jvmtiHeapCallbacks* _callbacks,
+                         const void* _user_data)
+      : heap_util(_heap_util),
+        filter_klass(klass),
+        callbacks(_callbacks),
+        user_data(_user_data),
+        filter_out_tagged((heap_filter & JVMTI_HEAP_FILTER_TAGGED) != 0),
+        filter_out_untagged((heap_filter & JVMTI_HEAP_FILTER_UNTAGGED) != 0),
+        filter_out_class_tagged((heap_filter & JVMTI_HEAP_FILTER_CLASS_TAGGED) != 0),
+        filter_out_class_untagged((heap_filter & JVMTI_HEAP_FILTER_CLASS_UNTAGGED) != 0),
+        any_filter(filter_out_tagged ||
+                   filter_out_untagged ||
+                   filter_out_class_tagged ||
+                   filter_out_class_untagged),
+        stop_reports(false) {
+  }
+
+  bool ShouldReportByHeapFilter(jlong tag, jlong class_tag) {
+    if (!any_filter) {
+      return true;
+    }
+
+    if ((tag == 0 && filter_out_untagged) || (tag != 0 && filter_out_tagged)) {
+      return false;
+    }
+
+    if ((class_tag == 0 && filter_out_class_untagged) ||
+        (class_tag != 0 && filter_out_class_tagged)) {
+      return false;
+    }
+
+    return true;
+  }
+
+  HeapUtil* heap_util;
+  art::ObjPtr<art::mirror::Class> filter_klass;
+  const jvmtiHeapCallbacks* callbacks;
+  const void* user_data;
+  const bool filter_out_tagged;
+  const bool filter_out_untagged;
+  const bool filter_out_class_tagged;
+  const bool filter_out_class_untagged;
+  const bool any_filter;
+
+  bool stop_reports;
+};
+
+static void IterateThroughHeapObjectCallback(art::mirror::Object* obj, void* arg)
+    REQUIRES_SHARED(art::Locks::mutator_lock_) {
+  IterateThroughHeapData* ithd = reinterpret_cast<IterateThroughHeapData*>(arg);
+  // Early return, as we can't really stop visiting.
+  if (ithd->stop_reports) {
+    return;
+  }
+
+  art::ScopedAssertNoThreadSuspension no_suspension("IterateThroughHeapCallback");
+
+  jlong tag = 0;
+  ithd->heap_util->GetTags()->GetTag(obj, &tag);
+
+  jlong class_tag = 0;
+  art::ObjPtr<art::mirror::Class> klass = obj->GetClass();
+  ithd->heap_util->GetTags()->GetTag(klass.Ptr(), &class_tag);
+  // For simplicity, even if we find a tag = 0, assume 0 = not tagged.
+
+  if (!ithd->ShouldReportByHeapFilter(tag, class_tag)) {
+    return;
+  }
+
+  // TODO: Handle array_primitive_value_callback.
+
+  if (ithd->filter_klass != nullptr) {
+    if (ithd->filter_klass != klass) {
+      return;
+    }
+  }
+
+  jlong size = obj->SizeOf();
+
+  jint length = -1;
+  if (obj->IsArrayInstance()) {
+    length = obj->AsArray()->GetLength();
+  }
+
+  jlong saved_tag = tag;
+  jint ret = ithd->callbacks->heap_iteration_callback(class_tag,
+                                                      size,
+                                                      &tag,
+                                                      length,
+                                                      const_cast<void*>(ithd->user_data));
+
+  if (tag != saved_tag) {
+    ithd->heap_util->GetTags()->Set(obj, tag);
+  }
+
+  ithd->stop_reports = (ret & JVMTI_VISIT_ABORT) != 0;
+
+  // TODO Implement array primitive and string primitive callback.
+  // TODO Implement primitive field callback.
+}
+
+jvmtiError HeapUtil::IterateThroughHeap(jvmtiEnv* env ATTRIBUTE_UNUSED,
+                                        jint heap_filter,
+                                        jclass klass,
+                                        const jvmtiHeapCallbacks* callbacks,
+                                        const void* user_data) {
+  if (callbacks == nullptr) {
+    return ERR(NULL_POINTER);
+  }
+
+  if (callbacks->array_primitive_value_callback != nullptr) {
+    // TODO: Implement.
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  art::Thread* self = art::Thread::Current();
+  art::ScopedObjectAccess soa(self);      // Now we know we have the shared lock.
+
+  IterateThroughHeapData ithd(this,
+                              heap_filter,
+                              soa.Decode<art::mirror::Class>(klass),
+                              callbacks,
+                              user_data);
+
+  art::Runtime::Current()->GetHeap()->VisitObjects(IterateThroughHeapObjectCallback, &ithd);
+
+  return ERR(NONE);
+}
+
+class FollowReferencesHelper FINAL {
+ public:
+  FollowReferencesHelper(HeapUtil* h,
+                         art::ObjPtr<art::mirror::Object> initial_object ATTRIBUTE_UNUSED,
+                         const jvmtiHeapCallbacks* callbacks,
+                         const void* user_data)
+      : tag_table_(h->GetTags()),
+        callbacks_(callbacks),
+        user_data_(user_data),
+        start_(0),
+        stop_reports_(false) {
+  }
+
+  void Init()
+      REQUIRES_SHARED(art::Locks::mutator_lock_)
+      REQUIRES(!*tag_table_->GetAllowDisallowLock()) {
+    CollectAndReportRootsVisitor carrv(this, tag_table_, &worklist_, &visited_);
+    art::Runtime::Current()->VisitRoots(&carrv);
+    art::Runtime::Current()->VisitImageRoots(&carrv);
+    stop_reports_ = carrv.IsStopReports();
+
+    if (stop_reports_) {
+      worklist_.clear();
+    }
+  }
+
+  void Work()
+      REQUIRES_SHARED(art::Locks::mutator_lock_)
+      REQUIRES(!*tag_table_->GetAllowDisallowLock()) {
+    // Currently implemented as a BFS. To lower overhead, we don't erase elements immediately
+    // from the head of the work list, instead postponing until there's a gap that's "large."
+    //
+    // Alternatively, we can implement a DFS and use the work list as a stack.
+    while (start_ < worklist_.size()) {
+      art::mirror::Object* cur_obj = worklist_[start_];
+      start_++;
+
+      if (start_ >= kMaxStart) {
+        worklist_.erase(worklist_.begin(), worklist_.begin() + start_);
+        start_ = 0;
+      }
+
+      VisitObject(cur_obj);
+
+      if (stop_reports_) {
+        break;
+      }
+    }
+  }
+
+ private:
+  class CollectAndReportRootsVisitor FINAL : public art::RootVisitor {
+   public:
+    CollectAndReportRootsVisitor(FollowReferencesHelper* helper,
+                                 ObjectTagTable* tag_table,
+                                 std::vector<art::mirror::Object*>* worklist,
+                                 std::unordered_set<art::mirror::Object*>* visited)
+        : helper_(helper),
+          tag_table_(tag_table),
+          worklist_(worklist),
+          visited_(visited),
+          stop_reports_(false) {}
+
+    void VisitRoots(art::mirror::Object*** roots, size_t count, const art::RootInfo& info)
+        OVERRIDE
+        REQUIRES_SHARED(art::Locks::mutator_lock_)
+        REQUIRES(!*helper_->tag_table_->GetAllowDisallowLock()) {
+      for (size_t i = 0; i != count; ++i) {
+        AddRoot(*roots[i], info);
+      }
+    }
+
+    void VisitRoots(art::mirror::CompressedReference<art::mirror::Object>** roots,
+                    size_t count,
+                    const art::RootInfo& info)
+        OVERRIDE REQUIRES_SHARED(art::Locks::mutator_lock_)
+        REQUIRES(!*helper_->tag_table_->GetAllowDisallowLock()) {
+      for (size_t i = 0; i != count; ++i) {
+        AddRoot(roots[i]->AsMirrorPtr(), info);
+      }
+    }
+
+    bool IsStopReports() {
+      return stop_reports_;
+    }
+
+   private:
+    void AddRoot(art::mirror::Object* root_obj, const art::RootInfo& info)
+        REQUIRES_SHARED(art::Locks::mutator_lock_)
+        REQUIRES(!*tag_table_->GetAllowDisallowLock()) {
+      // We use visited_ to mark roots already so we do not need another set.
+      if (visited_->find(root_obj) == visited_->end()) {
+        visited_->insert(root_obj);
+        worklist_->push_back(root_obj);
+      }
+      ReportRoot(root_obj, info);
+    }
+
+    // Remove NO_THREAD_SAFETY_ANALYSIS once ASSERT_CAPABILITY works correctly.
+    art::Thread* FindThread(const art::RootInfo& info) NO_THREAD_SAFETY_ANALYSIS {
+      art::Locks::thread_list_lock_->AssertExclusiveHeld(art::Thread::Current());
+      return art::Runtime::Current()->GetThreadList()->FindThreadByThreadId(info.GetThreadId());
+    }
+
+    jvmtiHeapReferenceKind GetReferenceKind(const art::RootInfo& info,
+                                            jvmtiHeapReferenceInfo* ref_info)
+        REQUIRES_SHARED(art::Locks::mutator_lock_) {
+      // TODO: Fill in ref_info.
+      memset(ref_info, 0, sizeof(jvmtiHeapReferenceInfo));
+
+      switch (info.GetType()) {
+        case art::RootType::kRootJNIGlobal:
+          return JVMTI_HEAP_REFERENCE_JNI_GLOBAL;
+
+        case art::RootType::kRootJNILocal:
+        {
+          uint32_t thread_id = info.GetThreadId();
+          ref_info->jni_local.thread_id = thread_id;
+
+          art::Thread* thread = FindThread(info);
+          if (thread != nullptr) {
+            art::mirror::Object* thread_obj = thread->GetPeer();
+            if (thread->IsStillStarting()) {
+              thread_obj = nullptr;
+            } else {
+              thread_obj = thread->GetPeer();
+            }
+            if (thread_obj != nullptr) {
+              ref_info->jni_local.thread_tag = tag_table_->GetTagOrZero(thread_obj);
+            }
+          }
+
+          // TODO: We don't have this info.
+          if (thread != nullptr) {
+            ref_info->jni_local.depth = 0;
+            art::ArtMethod* method = thread->GetCurrentMethod(nullptr, false /* abort_on_error */);
+            if (method != nullptr) {
+              ref_info->jni_local.method = art::jni::EncodeArtMethod(method);
+            }
+          }
+
+          return JVMTI_HEAP_REFERENCE_JNI_LOCAL;
+        }
+
+        case art::RootType::kRootJavaFrame:
+          return JVMTI_HEAP_REFERENCE_STACK_LOCAL;
+
+        case art::RootType::kRootNativeStack:
+        case art::RootType::kRootThreadBlock:
+        case art::RootType::kRootThreadObject:
+          return JVMTI_HEAP_REFERENCE_THREAD;
+
+        case art::RootType::kRootStickyClass:
+        case art::RootType::kRootInternedString:
+          // Note: this isn't a root in the RI.
+          return JVMTI_HEAP_REFERENCE_SYSTEM_CLASS;
+
+        case art::RootType::kRootMonitorUsed:
+        case art::RootType::kRootJNIMonitor:
+          return JVMTI_HEAP_REFERENCE_MONITOR;
+
+        case art::RootType::kRootFinalizing:
+        case art::RootType::kRootDebugger:
+        case art::RootType::kRootReferenceCleanup:
+        case art::RootType::kRootVMInternal:
+        case art::RootType::kRootUnknown:
+          return JVMTI_HEAP_REFERENCE_OTHER;
+      }
+      LOG(FATAL) << "Unreachable";
+      UNREACHABLE();
+    }
+
+    void ReportRoot(art::mirror::Object* root_obj, const art::RootInfo& info)
+        REQUIRES_SHARED(art::Locks::mutator_lock_)
+        REQUIRES(!*tag_table_->GetAllowDisallowLock()) {
+      jvmtiHeapReferenceInfo ref_info;
+      jvmtiHeapReferenceKind kind = GetReferenceKind(info, &ref_info);
+      jint result = helper_->ReportReference(kind, &ref_info, nullptr, root_obj);
+      if ((result & JVMTI_VISIT_ABORT) != 0) {
+        stop_reports_ = true;
+      }
+    }
+
+   private:
+    FollowReferencesHelper* helper_;
+    ObjectTagTable* tag_table_;
+    std::vector<art::mirror::Object*>* worklist_;
+    std::unordered_set<art::mirror::Object*>* visited_;
+    bool stop_reports_;
+  };
+
+  void VisitObject(art::mirror::Object* obj)
+      REQUIRES_SHARED(art::Locks::mutator_lock_)
+      REQUIRES(!*tag_table_->GetAllowDisallowLock()) {
+    if (obj->IsClass()) {
+      VisitClass(obj->AsClass());
+      return;
+    }
+    if (obj->IsArrayInstance()) {
+      VisitArray(obj);
+      return;
+    }
+
+    // TODO: We'll probably have to rewrite this completely with our own visiting logic, if we
+    //       want to have a chance of getting the field indices computed halfway efficiently. For
+    //       now, ignore them altogether.
+
+    struct InstanceReferenceVisitor {
+      explicit InstanceReferenceVisitor(FollowReferencesHelper* helper_)
+          : helper(helper_), stop_reports(false) {}
+
+      void operator()(art::mirror::Object* src,
+                      art::MemberOffset field_offset,
+                      bool is_static ATTRIBUTE_UNUSED) const
+          REQUIRES_SHARED(art::Locks::mutator_lock_)
+          REQUIRES(!*helper->tag_table_->GetAllowDisallowLock()) {
+        if (stop_reports) {
+          return;
+        }
+
+        art::mirror::Object* trg = src->GetFieldObjectReferenceAddr(field_offset)->AsMirrorPtr();
+        jvmtiHeapReferenceInfo reference_info;
+        memset(&reference_info, 0, sizeof(reference_info));
+
+        // TODO: Implement spec-compliant numbering.
+        reference_info.field.index = field_offset.Int32Value();
+
+        jvmtiHeapReferenceKind kind =
+            field_offset.Int32Value() == art::mirror::Object::ClassOffset().Int32Value()
+                ? JVMTI_HEAP_REFERENCE_CLASS
+                : JVMTI_HEAP_REFERENCE_FIELD;
+        const jvmtiHeapReferenceInfo* reference_info_ptr =
+            kind == JVMTI_HEAP_REFERENCE_CLASS ? nullptr : &reference_info;
+
+        stop_reports = !helper->ReportReferenceMaybeEnqueue(kind, reference_info_ptr, src, trg);
+      }
+
+      void VisitRoot(art::mirror::CompressedReference<art::mirror::Object>* root ATTRIBUTE_UNUSED)
+          const {
+        LOG(FATAL) << "Unreachable";
+      }
+      void VisitRootIfNonNull(
+          art::mirror::CompressedReference<art::mirror::Object>* root ATTRIBUTE_UNUSED) const {
+        LOG(FATAL) << "Unreachable";
+      }
+
+      // "mutable" required by the visitor API.
+      mutable FollowReferencesHelper* helper;
+      mutable bool stop_reports;
+    };
+
+    InstanceReferenceVisitor visitor(this);
+    // Visit references, not native roots.
+    obj->VisitReferences<false>(visitor, art::VoidFunctor());
+
+    stop_reports_ = visitor.stop_reports;
+  }
+
+  void VisitArray(art::mirror::Object* array)
+      REQUIRES_SHARED(art::Locks::mutator_lock_)
+      REQUIRES(!*tag_table_->GetAllowDisallowLock()) {
+    stop_reports_ = !ReportReferenceMaybeEnqueue(JVMTI_HEAP_REFERENCE_CLASS,
+                                                 nullptr,
+                                                 array,
+                                                 array->GetClass());
+    if (stop_reports_) {
+      return;
+    }
+
+    if (array->IsObjectArray()) {
+      art::mirror::ObjectArray<art::mirror::Object>* obj_array =
+          array->AsObjectArray<art::mirror::Object>();
+      int32_t length = obj_array->GetLength();
+      for (int32_t i = 0; i != length; ++i) {
+        art::mirror::Object* elem = obj_array->GetWithoutChecks(i);
+        if (elem != nullptr) {
+          jvmtiHeapReferenceInfo reference_info;
+          reference_info.array.index = i;
+          stop_reports_ = !ReportReferenceMaybeEnqueue(JVMTI_HEAP_REFERENCE_ARRAY_ELEMENT,
+                                                       &reference_info,
+                                                       array,
+                                                       elem);
+          if (stop_reports_) {
+            break;
+          }
+        }
+      }
+    }
+  }
+
+  void VisitClass(art::mirror::Class* klass)
+      REQUIRES_SHARED(art::Locks::mutator_lock_)
+      REQUIRES(!*tag_table_->GetAllowDisallowLock()) {
+    // TODO: Are erroneous classes reported? Are non-prepared ones? For now, just use resolved ones.
+    if (!klass->IsResolved()) {
+      return;
+    }
+
+    // Superclass.
+    stop_reports_ = !ReportReferenceMaybeEnqueue(JVMTI_HEAP_REFERENCE_SUPERCLASS,
+                                                 nullptr,
+                                                 klass,
+                                                 klass->GetSuperClass());
+    if (stop_reports_) {
+      return;
+    }
+
+    // Directly implemented or extended interfaces.
+    art::Thread* self = art::Thread::Current();
+    art::StackHandleScope<1> hs(self);
+    art::Handle<art::mirror::Class> h_klass(hs.NewHandle<art::mirror::Class>(klass));
+    for (size_t i = 0; i < h_klass->NumDirectInterfaces(); ++i) {
+      art::ObjPtr<art::mirror::Class> inf_klass =
+          art::mirror::Class::GetDirectInterface(self, h_klass, i);
+      if (inf_klass == nullptr) {
+        // TODO: With a resolved class this should not happen...
+        self->ClearException();
+        break;
+      }
+
+      stop_reports_ = !ReportReferenceMaybeEnqueue(JVMTI_HEAP_REFERENCE_INTERFACE,
+                                                   nullptr,
+                                                   klass,
+                                                   inf_klass.Ptr());
+      if (stop_reports_) {
+        return;
+      }
+    }
+
+    // Classloader.
+    // TODO: What about the boot classpath loader? We'll skip for now, but do we have to find the
+    //       fake BootClassLoader?
+    if (klass->GetClassLoader() != nullptr) {
+      stop_reports_ = !ReportReferenceMaybeEnqueue(JVMTI_HEAP_REFERENCE_CLASS_LOADER,
+                                                   nullptr,
+                                                   klass,
+                                                   klass->GetClassLoader());
+      if (stop_reports_) {
+        return;
+      }
+    }
+    DCHECK_EQ(h_klass.Get(), klass);
+
+    // Declared static fields.
+    for (auto& field : klass->GetSFields()) {
+      if (!field.IsPrimitiveType()) {
+        art::ObjPtr<art::mirror::Object> field_value = field.GetObject(klass);
+        if (field_value != nullptr) {
+          jvmtiHeapReferenceInfo reference_info;
+          memset(&reference_info, 0, sizeof(reference_info));
+
+          // TODO: Implement spec-compliant numbering.
+          reference_info.field.index = field.GetOffset().Int32Value();
+
+          stop_reports_ = !ReportReferenceMaybeEnqueue(JVMTI_HEAP_REFERENCE_STATIC_FIELD,
+                                                       &reference_info,
+                                                       klass,
+                                                       field_value.Ptr());
+          if (stop_reports_) {
+            return;
+          }
+        }
+      }
+    }
+  }
+
+  void MaybeEnqueue(art::mirror::Object* obj) REQUIRES_SHARED(art::Locks::mutator_lock_) {
+    if (visited_.find(obj) == visited_.end()) {
+      worklist_.push_back(obj);
+      visited_.insert(obj);
+    }
+  }
+
+  bool ReportReferenceMaybeEnqueue(jvmtiHeapReferenceKind kind,
+                                   const jvmtiHeapReferenceInfo* reference_info,
+                                   art::mirror::Object* referree,
+                                   art::mirror::Object* referrer)
+      REQUIRES_SHARED(art::Locks::mutator_lock_)
+      REQUIRES(!*tag_table_->GetAllowDisallowLock()) {
+    jint result = ReportReference(kind, reference_info, referree, referrer);
+    if ((result & JVMTI_VISIT_ABORT) == 0) {
+      if ((result & JVMTI_VISIT_OBJECTS) != 0) {
+        MaybeEnqueue(referrer);
+      }
+      return true;
+    } else {
+      return false;
+    }
+  }
+
+  jint ReportReference(jvmtiHeapReferenceKind kind,
+                       const jvmtiHeapReferenceInfo* reference_info,
+                       art::mirror::Object* referrer,
+                       art::mirror::Object* referree)
+      REQUIRES_SHARED(art::Locks::mutator_lock_)
+      REQUIRES(!*tag_table_->GetAllowDisallowLock()) {
+    if (referree == nullptr || stop_reports_) {
+      return 0;
+    }
+
+    const jlong class_tag = tag_table_->GetTagOrZero(referree->GetClass());
+    const jlong referrer_class_tag =
+        referrer == nullptr ? 0 : tag_table_->GetTagOrZero(referrer->GetClass());
+    const jlong size = static_cast<jlong>(referree->SizeOf());
+    jlong tag = tag_table_->GetTagOrZero(referree);
+    jlong saved_tag = tag;
+    jlong referrer_tag = 0;
+    jlong saved_referrer_tag = 0;
+    jlong* referrer_tag_ptr;
+    if (referrer == nullptr) {
+      referrer_tag_ptr = nullptr;
+    } else {
+      if (referrer == referree) {
+        referrer_tag_ptr = &tag;
+      } else {
+        referrer_tag = saved_referrer_tag = tag_table_->GetTagOrZero(referrer);
+        referrer_tag_ptr = &referrer_tag;
+      }
+    }
+    jint length = -1;
+    if (referree->IsArrayInstance()) {
+      length = referree->AsArray()->GetLength();
+    }
+
+    jint result = callbacks_->heap_reference_callback(kind,
+                                                      reference_info,
+                                                      class_tag,
+                                                      referrer_class_tag,
+                                                      size,
+                                                      &tag,
+                                                      referrer_tag_ptr,
+                                                      length,
+                                                      const_cast<void*>(user_data_));
+
+    if (tag != saved_tag) {
+      tag_table_->Set(referree, tag);
+    }
+    if (referrer_tag != saved_referrer_tag) {
+      tag_table_->Set(referrer, referrer_tag);
+    }
+
+    return result;
+  }
+
+  ObjectTagTable* tag_table_;
+  const jvmtiHeapCallbacks* callbacks_;
+  const void* user_data_;
+
+  std::vector<art::mirror::Object*> worklist_;
+  size_t start_;
+  static constexpr size_t kMaxStart = 1000000U;
+
+  std::unordered_set<art::mirror::Object*> visited_;
+
+  bool stop_reports_;
+
+  friend class CollectAndReportRootsVisitor;
+};
+
+jvmtiError HeapUtil::FollowReferences(jvmtiEnv* env ATTRIBUTE_UNUSED,
+                                      jint heap_filter ATTRIBUTE_UNUSED,
+                                      jclass klass ATTRIBUTE_UNUSED,
+                                      jobject initial_object,
+                                      const jvmtiHeapCallbacks* callbacks,
+                                      const void* user_data) {
+  if (callbacks == nullptr) {
+    return ERR(NULL_POINTER);
+  }
+
+  if (callbacks->array_primitive_value_callback != nullptr) {
+    // TODO: Implement.
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  art::Thread* self = art::Thread::Current();
+  art::ScopedObjectAccess soa(self);      // Now we know we have the shared lock.
+
+  art::Runtime::Current()->GetHeap()->IncrementDisableMovingGC(self);
+  {
+    art::ObjPtr<art::mirror::Object> o_initial = soa.Decode<art::mirror::Object>(initial_object);
+
+    art::ScopedThreadSuspension sts(self, art::kWaitingForVisitObjects);
+    art::ScopedSuspendAll ssa("FollowReferences");
+
+    FollowReferencesHelper frh(this, o_initial, callbacks, user_data);
+    frh.Init();
+    frh.Work();
+  }
+  art::Runtime::Current()->GetHeap()->DecrementDisableMovingGC(self);
+
+  return ERR(NONE);
+}
+
+jvmtiError HeapUtil::GetLoadedClasses(jvmtiEnv* env,
+                                      jint* class_count_ptr,
+                                      jclass** classes_ptr) {
+  if (class_count_ptr == nullptr || classes_ptr == nullptr) {
+    return ERR(NULL_POINTER);
+  }
+
+  class ReportClassVisitor : public art::ClassVisitor {
+   public:
+    explicit ReportClassVisitor(art::Thread* self) : self_(self) {}
+
+    bool operator()(art::ObjPtr<art::mirror::Class> klass)
+        OVERRIDE REQUIRES_SHARED(art::Locks::mutator_lock_) {
+      classes_.push_back(self_->GetJniEnv()->AddLocalReference<jclass>(klass));
+      return true;
+    }
+
+    art::Thread* self_;
+    std::vector<jclass> classes_;
+  };
+
+  art::Thread* self = art::Thread::Current();
+  ReportClassVisitor rcv(self);
+  {
+    art::ScopedObjectAccess soa(self);
+    art::Runtime::Current()->GetClassLinker()->VisitClasses(&rcv);
+  }
+
+  size_t size = rcv.classes_.size();
+  jclass* classes = nullptr;
+  jvmtiError alloc_ret = env->Allocate(static_cast<jlong>(size * sizeof(jclass)),
+                                       reinterpret_cast<unsigned char**>(&classes));
+  if (alloc_ret != ERR(NONE)) {
+    return alloc_ret;
+  }
+
+  for (size_t i = 0; i < size; ++i) {
+    classes[i] = rcv.classes_[i];
+  }
+  *classes_ptr = classes;
+  *class_count_ptr = static_cast<jint>(size);
+
+  return ERR(NONE);
+}
+
+jvmtiError HeapUtil::ForceGarbageCollection(jvmtiEnv* env ATTRIBUTE_UNUSED) {
+  art::Runtime::Current()->GetHeap()->CollectGarbage(false);
+
+  return ERR(NONE);
+}
+}  // namespace openjdkjvmti
diff --git a/runtime/openjdkjvmti/heap.h b/runtime/openjdkjvmti/ti_heap.h
similarity index 71%
rename from runtime/openjdkjvmti/heap.h
rename to runtime/openjdkjvmti/ti_heap.h
index b6becb9..72ee097 100644
--- a/runtime/openjdkjvmti/heap.h
+++ b/runtime/openjdkjvmti/ti_heap.h
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#ifndef ART_RUNTIME_OPENJDKJVMTI_HEAP_H_
-#define ART_RUNTIME_OPENJDKJVMTI_HEAP_H_
+#ifndef ART_RUNTIME_OPENJDKJVMTI_TI_HEAP_H_
+#define ART_RUNTIME_OPENJDKJVMTI_TI_HEAP_H_
 
 #include "jvmti.h"
 
@@ -36,6 +36,15 @@
                                 const jvmtiHeapCallbacks* callbacks,
                                 const void* user_data);
 
+  jvmtiError FollowReferences(jvmtiEnv* env,
+                              jint heap_filter,
+                              jclass klass,
+                              jobject initial_object,
+                              const jvmtiHeapCallbacks* callbacks,
+                              const void* user_data);
+
+  static jvmtiError ForceGarbageCollection(jvmtiEnv* env);
+
   ObjectTagTable* GetTags() {
     return tags_;
   }
@@ -46,4 +55,4 @@
 
 }  // namespace openjdkjvmti
 
-#endif  // ART_RUNTIME_OPENJDKJVMTI_HEAP_H_
+#endif  // ART_RUNTIME_OPENJDKJVMTI_TI_HEAP_H_
diff --git a/runtime/openjdkjvmti/ti_method.cc b/runtime/openjdkjvmti/ti_method.cc
index 6210936..ffa5ac7 100644
--- a/runtime/openjdkjvmti/ti_method.cc
+++ b/runtime/openjdkjvmti/ti_method.cc
@@ -34,6 +34,7 @@
 #include "art_jvmti.h"
 #include "art_method-inl.h"
 #include "base/enums.h"
+#include "jni_internal.h"
 #include "modifiers.h"
 #include "scoped_thread_state_change-inl.h"
 
@@ -45,7 +46,7 @@
                                      char** signature_ptr,
                                      char** generic_ptr) {
   art::ScopedObjectAccess soa(art::Thread::Current());
-  art::ArtMethod* art_method = soa.DecodeMethod(method);
+  art::ArtMethod* art_method = art::jni::DecodeArtMethod(method);
   art_method = art_method->GetInterfaceMethodIfProxy(art::kRuntimePointerSize);
 
   JvmtiUniquePtr name_copy;
@@ -77,7 +78,9 @@
   }
 
   // TODO: Support generic signature.
-  *generic_ptr = nullptr;
+  if (generic_ptr != nullptr) {
+    *generic_ptr = nullptr;
+  }
 
   // Everything is fine, release the buffers.
   name_copy.release();
@@ -93,10 +96,10 @@
     return ERR(NULL_POINTER);
   }
 
-  art::ScopedObjectAccess soa(art::Thread::Current());
-  art::ArtMethod* art_method = soa.DecodeMethod(method);
+  art::ArtMethod* art_method = art::jni::DecodeArtMethod(method);
   // Note: No GetInterfaceMethodIfProxy, we want to actual class.
 
+  art::ScopedObjectAccess soa(art::Thread::Current());
   art::mirror::Class* klass = art_method->GetDeclaringClass();
   *declaring_class_ptr = soa.AddLocalReference<jclass>(klass);
 
@@ -110,9 +113,7 @@
     return ERR(NULL_POINTER);
   }
 
-  art::ScopedObjectAccess soa(art::Thread::Current());
-  art::ArtMethod* art_method = soa.DecodeMethod(method);
-
+  art::ArtMethod* art_method = art::jni::DecodeArtMethod(method);
   uint32_t modifiers = art_method->GetAccessFlags();
 
   // Note: Keep this code in sync with Executable.fixMethodFlags.
diff --git a/runtime/openjdkjvmti/ti_redefine.cc b/runtime/openjdkjvmti/ti_redefine.cc
new file mode 100644
index 0000000..69bd887
--- /dev/null
+++ b/runtime/openjdkjvmti/ti_redefine.cc
@@ -0,0 +1,507 @@
+/* Copyright (C) 2016 The Android Open Source Project
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This file implements interfaces from the file jvmti.h. This implementation
+ * is licensed under the same terms as the file jvmti.h.  The
+ * copyright and license information for the file jvmti.h follows.
+ *
+ * Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+#include "ti_redefine.h"
+
+#include <limits>
+
+#include "art_jvmti.h"
+#include "base/logging.h"
+#include "events-inl.h"
+#include "gc/allocation_listener.h"
+#include "instrumentation.h"
+#include "jni_env_ext-inl.h"
+#include "jvmti_allocator.h"
+#include "mirror/class.h"
+#include "mirror/class_ext.h"
+#include "mirror/object.h"
+#include "object_lock.h"
+#include "runtime.h"
+#include "ScopedLocalRef.h"
+
+namespace openjdkjvmti {
+
+// Moves dex data to an anonymous, read-only mmap'd region.
+std::unique_ptr<art::MemMap> Redefiner::MoveDataToMemMap(const std::string& original_location,
+                                                         jint data_len,
+                                                         unsigned char* dex_data,
+                                                         std::string* error_msg) {
+  std::unique_ptr<art::MemMap> map(art::MemMap::MapAnonymous(
+      art::StringPrintf("%s-transformed", original_location.c_str()).c_str(),
+      nullptr,
+      data_len,
+      PROT_READ|PROT_WRITE,
+      /*low_4gb*/false,
+      /*reuse*/false,
+      error_msg));
+  if (map == nullptr) {
+    return map;
+  }
+  memcpy(map->Begin(), dex_data, data_len);
+  // Make the dex files mmap read only.
+  map->Protect(PROT_READ);
+  return map;
+}
+
+jvmtiError Redefiner::RedefineClass(ArtJvmTiEnv* env,
+                                    art::Runtime* runtime,
+                                    art::Thread* self,
+                                    jclass klass,
+                                    const std::string& original_dex_location,
+                                    jint data_len,
+                                    unsigned char* dex_data,
+                                    std::string* error_msg) {
+  std::unique_ptr<art::MemMap> map(MoveDataToMemMap(original_dex_location,
+                                                    data_len,
+                                                    dex_data,
+                                                    error_msg));
+  std::ostringstream os;
+  char* generic_ptr_unused = nullptr;
+  char* signature_ptr = nullptr;
+  if (env->GetClassSignature(klass, &signature_ptr, &generic_ptr_unused) != OK) {
+    signature_ptr = const_cast<char*>("<UNKNOWN CLASS>");
+  }
+  if (map.get() == nullptr) {
+    os << "Failed to create anonymous mmap for modified dex file of class " << signature_ptr
+       << "in dex file " << original_dex_location << " because: " << *error_msg;
+    *error_msg = os.str();
+    return ERR(OUT_OF_MEMORY);
+  }
+  if (map->Size() < sizeof(art::DexFile::Header)) {
+    *error_msg = "Could not read dex file header because dex_data was too short";
+    return ERR(INVALID_CLASS_FORMAT);
+  }
+  uint32_t checksum = reinterpret_cast<const art::DexFile::Header*>(map->Begin())->checksum_;
+  std::unique_ptr<const art::DexFile> dex_file(art::DexFile::Open(map->GetName(),
+                                                                  checksum,
+                                                                  std::move(map),
+                                                                  /*verify*/true,
+                                                                  /*verify_checksum*/true,
+                                                                  error_msg));
+  if (dex_file.get() == nullptr) {
+    os << "Unable to load modified dex file for " << signature_ptr << ": " << *error_msg;
+    *error_msg = os.str();
+    return ERR(INVALID_CLASS_FORMAT);
+  }
+  // Get shared mutator lock.
+  art::ScopedObjectAccess soa(self);
+  art::StackHandleScope<1> hs(self);
+  Redefiner r(runtime, self, klass, signature_ptr, dex_file, error_msg);
+  // Lock around this class to avoid races.
+  art::ObjectLock<art::mirror::Class> lock(self, hs.NewHandle(r.GetMirrorClass()));
+  return r.Run();
+}
+
+// TODO *MAJOR* This should return the actual source java.lang.DexFile object for the klass.
+// TODO Make mirror of DexFile and associated types to make this less hellish.
+// TODO Make mirror of BaseDexClassLoader and associated types to make this less hellish.
+art::mirror::Object* Redefiner::FindSourceDexFileObject(
+    art::Handle<art::mirror::ClassLoader> loader) {
+  const char* dex_path_list_element_array_name = "[Ldalvik/system/DexPathList$Element;";
+  const char* dex_path_list_element_name = "Ldalvik/system/DexPathList$Element;";
+  const char* dex_file_name = "Ldalvik/system/DexFile;";
+  const char* dex_path_list_name = "Ldalvik/system/DexPathList;";
+  const char* dex_class_loader_name = "Ldalvik/system/BaseDexClassLoader;";
+
+  CHECK(!self_->IsExceptionPending());
+  art::StackHandleScope<11> hs(self_);
+  art::ClassLinker* class_linker = runtime_->GetClassLinker();
+
+  art::Handle<art::mirror::ClassLoader> null_loader(hs.NewHandle<art::mirror::ClassLoader>(
+      nullptr));
+  art::Handle<art::mirror::Class> base_dex_loader_class(hs.NewHandle(class_linker->FindClass(
+      self_, dex_class_loader_name, null_loader)));
+
+  // Get all the ArtFields so we can look in the BaseDexClassLoader
+  art::ArtField* path_list_field = base_dex_loader_class->FindDeclaredInstanceField(
+      "pathList", dex_path_list_name);
+  CHECK(path_list_field != nullptr);
+
+  art::ArtField* dex_path_list_element_field =
+      class_linker->FindClass(self_, dex_path_list_name, null_loader)
+        ->FindDeclaredInstanceField("dexElements", dex_path_list_element_array_name);
+  CHECK(dex_path_list_element_field != nullptr);
+
+  art::ArtField* element_dex_file_field =
+      class_linker->FindClass(self_, dex_path_list_element_name, null_loader)
+        ->FindDeclaredInstanceField("dexFile", dex_file_name);
+  CHECK(element_dex_file_field != nullptr);
+
+  // Check if loader is a BaseDexClassLoader
+  art::Handle<art::mirror::Class> loader_class(hs.NewHandle(loader->GetClass()));
+  if (!loader_class->IsSubClass(base_dex_loader_class.Get())) {
+    LOG(ERROR) << "The classloader is not a BaseDexClassLoader which is currently the only "
+               << "supported class loader type!";
+    return nullptr;
+  }
+  // Start navigating the fields of the loader (now known to be a BaseDexClassLoader derivative)
+  art::Handle<art::mirror::Object> path_list(
+      hs.NewHandle(path_list_field->GetObject(loader.Get())));
+  CHECK(path_list.Get() != nullptr);
+  CHECK(!self_->IsExceptionPending());
+  art::Handle<art::mirror::ObjectArray<art::mirror::Object>> dex_elements_list(hs.NewHandle(
+      dex_path_list_element_field->GetObject(path_list.Get())->
+      AsObjectArray<art::mirror::Object>()));
+  CHECK(!self_->IsExceptionPending());
+  CHECK(dex_elements_list.Get() != nullptr);
+  size_t num_elements = dex_elements_list->GetLength();
+  art::MutableHandle<art::mirror::Object> current_element(
+      hs.NewHandle<art::mirror::Object>(nullptr));
+  art::MutableHandle<art::mirror::Object> first_dex_file(
+      hs.NewHandle<art::mirror::Object>(nullptr));
+  // Iterate over the DexPathList$Element to find the right one
+  // TODO Or not ATM just return the first one.
+  for (size_t i = 0; i < num_elements; i++) {
+    current_element.Assign(dex_elements_list->Get(i));
+    CHECK(current_element.Get() != nullptr);
+    CHECK(!self_->IsExceptionPending());
+    CHECK(dex_elements_list.Get() != nullptr);
+    CHECK_EQ(current_element->GetClass(), class_linker->FindClass(self_,
+                                                                  dex_path_list_element_name,
+                                                                  null_loader));
+    // TODO It would be cleaner to put the art::DexFile into the dalvik.system.DexFile the class
+    // comes from but it is more annoying because we would need to find this class. It is not
+    // necessary for proper function since we just need to be in front of the classes old dex file
+    // in the path.
+    first_dex_file.Assign(element_dex_file_field->GetObject(current_element.Get()));
+    if (first_dex_file.Get() != nullptr) {
+      return first_dex_file.Get();
+    }
+  }
+  return nullptr;
+}
+
+art::mirror::Class* Redefiner::GetMirrorClass() {
+  return self_->DecodeJObject(klass_)->AsClass();
+}
+
+art::mirror::ClassLoader* Redefiner::GetClassLoader() {
+  return GetMirrorClass()->GetClassLoader();
+}
+
+art::mirror::DexCache* Redefiner::CreateNewDexCache(art::Handle<art::mirror::ClassLoader> loader) {
+  return runtime_->GetClassLinker()->RegisterDexFile(*dex_file_, loader.Get());
+}
+
+// TODO Really wishing I had that mirror of java.lang.DexFile now.
+art::mirror::LongArray* Redefiner::AllocateDexFileCookie(
+    art::Handle<art::mirror::Object> java_dex_file_obj) {
+  art::StackHandleScope<2> hs(self_);
+  // mCookie is nulled out if the DexFile has been closed but mInternalCookie sticks around until
+  // the object is finalized. Since they always point to the same array if mCookie is not null we
+  // just use the mInternalCookie field. We will update one or both of these fields later.
+  // TODO Should I get the class from the classloader or directly?
+  art::ArtField* internal_cookie_field = java_dex_file_obj->GetClass()->FindDeclaredInstanceField(
+      "mInternalCookie", "Ljava/lang/Object;");
+  // TODO Add check that mCookie is either null or same as mInternalCookie
+  CHECK(internal_cookie_field != nullptr);
+  art::Handle<art::mirror::LongArray> cookie(
+      hs.NewHandle(internal_cookie_field->GetObject(java_dex_file_obj.Get())->AsLongArray()));
+  // TODO Maybe make these non-fatal.
+  CHECK(cookie.Get() != nullptr);
+  CHECK_GE(cookie->GetLength(), 1);
+  art::Handle<art::mirror::LongArray> new_cookie(
+      hs.NewHandle(art::mirror::LongArray::Alloc(self_, cookie->GetLength() + 1)));
+  if (new_cookie.Get() == nullptr) {
+    self_->AssertPendingOOMException();
+    return nullptr;
+  }
+  // Copy the oat-dex field at the start.
+  // TODO Should I clear this field?
+  // TODO This is a really crappy thing here with the first element being different.
+  new_cookie->SetWithoutChecks<false>(0, cookie->GetWithoutChecks(0));
+  new_cookie->SetWithoutChecks<false>(
+      1, static_cast<int64_t>(reinterpret_cast<intptr_t>(dex_file_.get())));
+  new_cookie->Memcpy(2, cookie.Get(), 1, cookie->GetLength() - 1);
+  return new_cookie.Get();
+}
+
+void Redefiner::RecordFailure(jvmtiError result, const std::string& error_msg) {
+  *error_msg_ = art::StringPrintf("Unable to perform redefinition of '%s': %s",
+                                  class_sig_,
+                                  error_msg.c_str());
+  result_ = result;
+}
+
+bool Redefiner::FinishRemainingAllocations(
+    /*out*/art::MutableHandle<art::mirror::ClassLoader>* source_class_loader,
+    /*out*/art::MutableHandle<art::mirror::Object>* java_dex_file_obj,
+    /*out*/art::MutableHandle<art::mirror::LongArray>* new_dex_file_cookie,
+    /*out*/art::MutableHandle<art::mirror::DexCache>* new_dex_cache) {
+  art::StackHandleScope<4> hs(self_);
+  // This shouldn't allocate
+  art::Handle<art::mirror::ClassLoader> loader(hs.NewHandle(GetClassLoader()));
+  if (loader.Get() == nullptr) {
+    // TODO Better error msg.
+    RecordFailure(ERR(INTERNAL), "Unable to find class loader!");
+    return false;
+  }
+  art::Handle<art::mirror::Object> dex_file_obj(hs.NewHandle(FindSourceDexFileObject(loader)));
+  if (dex_file_obj.Get() == nullptr) {
+    // TODO Better error msg.
+    RecordFailure(ERR(INTERNAL), "Unable to find class loader!");
+    return false;
+  }
+  art::Handle<art::mirror::LongArray> new_cookie(hs.NewHandle(AllocateDexFileCookie(dex_file_obj)));
+  if (new_cookie.Get() == nullptr) {
+    self_->AssertPendingOOMException();
+    self_->ClearException();
+    RecordFailure(ERR(OUT_OF_MEMORY), "Unable to allocate dex file array for class loader");
+    return false;
+  }
+  art::Handle<art::mirror::DexCache> dex_cache(hs.NewHandle(CreateNewDexCache(loader)));
+  if (dex_cache.Get() == nullptr) {
+    self_->AssertPendingOOMException();
+    self_->ClearException();
+    RecordFailure(ERR(OUT_OF_MEMORY), "Unable to allocate DexCache");
+    return false;
+  }
+  source_class_loader->Assign(loader.Get());
+  java_dex_file_obj->Assign(dex_file_obj.Get());
+  new_dex_file_cookie->Assign(new_cookie.Get());
+  new_dex_cache->Assign(dex_cache.Get());
+  return true;
+}
+
+jvmtiError Redefiner::Run() {
+  art::StackHandleScope<5> hs(self_);
+  // TODO We might want to have a global lock (or one based on the class being redefined at least)
+  // in order to make cleanup easier. Not a huge deal though.
+  //
+  // First we just allocate the ClassExt and its fields that we need. These can be updated
+  // atomically without any issues (since we allocate the map arrays as empty) so we don't bother
+  // doing a try loop. The other allocations we need to ensure that nothing has changed in the time
+  // between allocating them and pausing all threads before we can update them so we need to do a
+  // try loop.
+  if (!EnsureRedefinitionIsValid() || !EnsureClassAllocationsFinished()) {
+    return result_;
+  }
+  art::MutableHandle<art::mirror::ClassLoader> source_class_loader(
+      hs.NewHandle<art::mirror::ClassLoader>(nullptr));
+  art::MutableHandle<art::mirror::Object> java_dex_file(
+      hs.NewHandle<art::mirror::Object>(nullptr));
+  art::MutableHandle<art::mirror::LongArray> new_dex_file_cookie(
+      hs.NewHandle<art::mirror::LongArray>(nullptr));
+  art::MutableHandle<art::mirror::DexCache> new_dex_cache(
+      hs.NewHandle<art::mirror::DexCache>(nullptr));
+  if (!FinishRemainingAllocations(&source_class_loader,
+                                  &java_dex_file,
+                                  &new_dex_file_cookie,
+                                  &new_dex_cache)) {
+    // TODO Null out the ClassExt fields we allocated (if possible, might be racing with another
+    // redefineclass call which made it even bigger. Leak shouldn't be huge (2x array of size
+    // declared_methods_.length) but would be good to get rid of.
+    // new_dex_file_cookie & new_dex_cache should be cleaned up by the GC.
+    return result_;
+  }
+  // Get the mirror class now that we aren't allocating anymore.
+  art::Handle<art::mirror::Class> art_class(hs.NewHandle(GetMirrorClass()));
+  // Enable assertion that this thread isn't interrupted during this installation.
+  // After this we will need to do real cleanup in case of failure. Prior to this we could simply
+  // return and would let everything get cleaned up or harmlessly leaked.
+  // Do transition to final suspension
+  // TODO We might want to give this its own suspended state!
+  // TODO This isn't right. We need to change state without any chance of suspend ideally!
+  self_->TransitionFromRunnableToSuspended(art::ThreadState::kNative);
+  runtime_->GetThreadList()->SuspendAll(
+      "Final installation of redefined Class!", /*long_suspend*/true);
+  // TODO Might want to move this into a different type.
+  // Now we reach the part where we must do active cleanup if something fails.
+  // TODO We should really Retry if this fails instead of simply aborting.
+  // Set the new DexFileCookie returns the original so we can fix it back up if redefinition fails
+  art::ObjPtr<art::mirror::LongArray> original_dex_file_cookie(nullptr);
+  if (!UpdateJavaDexFile(java_dex_file.Get(),
+                         new_dex_file_cookie.Get(),
+                         &original_dex_file_cookie)) {
+    // Release suspendAll
+    runtime_->GetThreadList()->ResumeAll();
+    // Get back shared mutator lock as expected for return.
+    self_->TransitionFromSuspendedToRunnable();
+    return result_;
+  }
+  if (!UpdateClass(art_class.Get(), new_dex_cache.Get())) {
+    // TODO Should have some form of scope to do this.
+    RestoreJavaDexFile(java_dex_file.Get(), original_dex_file_cookie);
+    // Release suspendAll
+    runtime_->GetThreadList()->ResumeAll();
+    // Get back shared mutator lock as expected for return.
+    self_->TransitionFromSuspendedToRunnable();
+    return result_;
+  }
+  // Update the ClassObjects Keep the old DexCache (and other stuff) around so we can restore
+  // functions/fields.
+  // Verify the new Class.
+  //   Failure then undo updates to class
+  // Do stack walks and allocate obsolete methods
+  // Shrink the obsolete method maps if possible?
+  // TODO find appropriate class loader. Allocate new dex files array. Pause all java treads.
+  // Replace dex files array. Do stack scan + allocate obsoletes. Remove array if possible.
+  // TODO We might want to ensure that all threads are stopped for this!
+  // AddDexToClassPath();
+  // TODO
+  // Release suspendAll
+  // TODO Put this into a scoped thing.
+  runtime_->GetThreadList()->ResumeAll();
+  // Get back shared mutator lock as expected for return.
+  self_->TransitionFromSuspendedToRunnable();
+  // TODO Do this at a more reasonable place.
+  dex_file_.release();
+  return OK;
+}
+
+void Redefiner::RestoreJavaDexFile(art::ObjPtr<art::mirror::Object> java_dex_file,
+                                   art::ObjPtr<art::mirror::LongArray> orig_cookie) {
+  art::ArtField* internal_cookie_field = java_dex_file->GetClass()->FindDeclaredInstanceField(
+      "mInternalCookie", "Ljava/lang/Object;");
+  art::ArtField* cookie_field = java_dex_file->GetClass()->FindDeclaredInstanceField(
+      "mCookie", "Ljava/lang/Object;");
+  art::ObjPtr<art::mirror::LongArray> new_cookie(
+      cookie_field->GetObject(java_dex_file)->AsLongArray());
+  internal_cookie_field->SetObject<false>(java_dex_file, orig_cookie);
+  if (!new_cookie.IsNull()) {
+    cookie_field->SetObject<false>(java_dex_file, orig_cookie);
+  }
+}
+
+// Performs updates to class that will allow us to verify it.
+bool Redefiner::UpdateClass(art::ObjPtr<art::mirror::Class> mclass,
+                            art::ObjPtr<art::mirror::DexCache> new_dex_cache) {
+  art::ClassLinker* linker = runtime_->GetClassLinker();
+  art::PointerSize image_pointer_size = linker->GetImagePointerSize();
+  const art::DexFile::ClassDef* class_def = art::OatFile::OatDexFile::FindClassDef(
+      *dex_file_, class_sig_, art::ComputeModifiedUtf8Hash(class_sig_));
+  if (class_def == nullptr) {
+    RecordFailure(ERR(INVALID_CLASS_FORMAT), "Unable to find ClassDef!");
+    return false;
+  }
+  const art::DexFile::TypeId& declaring_class_id = dex_file_->GetTypeId(class_def->class_idx_);
+  const art::DexFile& old_dex_file = mclass->GetDexFile();
+  for (art::ArtMethod& method : mclass->GetMethods(image_pointer_size)) {
+    const art::DexFile::StringId* new_name_id = dex_file_->FindStringId(method.GetName());
+    art::dex::TypeIndex method_return_idx =
+        dex_file_->GetIndexForTypeId(*dex_file_->FindTypeId(method.GetReturnTypeDescriptor()));
+    const auto* old_type_list = method.GetParameterTypeList();
+    std::vector<art::dex::TypeIndex> new_type_list;
+    for (uint32_t i = 0; old_type_list != nullptr && i < old_type_list->Size(); i++) {
+      new_type_list.push_back(
+          dex_file_->GetIndexForTypeId(
+              *dex_file_->FindTypeId(
+                  old_dex_file.GetTypeDescriptor(
+                      old_dex_file.GetTypeId(
+                          old_type_list->GetTypeItem(i).type_idx_)))));
+    }
+    const art::DexFile::ProtoId* proto_id = dex_file_->FindProtoId(method_return_idx,
+                                                                   new_type_list);
+    CHECK(proto_id != nullptr || old_type_list == nullptr);
+    // TODO Return false, cleanup.
+    const art::DexFile::MethodId* method_id = dex_file_->FindMethodId(declaring_class_id,
+                                                                      *new_name_id,
+                                                                      *proto_id);
+    CHECK(method_id != nullptr);
+    // TODO Return false, cleanup.
+    uint32_t dex_method_idx = dex_file_->GetIndexForMethodId(*method_id);
+    method.SetDexMethodIndex(dex_method_idx);
+    linker->SetEntryPointsToInterpreter(&method);
+    method.SetCodeItemOffset(dex_file_->FindCodeItemOffset(*class_def, dex_method_idx));
+    method.SetDexCacheResolvedMethods(new_dex_cache->GetResolvedMethods(), image_pointer_size);
+    method.SetDexCacheResolvedTypes(new_dex_cache->GetResolvedTypes(), image_pointer_size);
+  }
+  // Update the class fields.
+  // Need to update class last since the ArtMethod gets its DexFile from the class (which is needed
+  // to call GetReturnTypeDescriptor and GetParameterTypeList above).
+  mclass->SetDexCache(new_dex_cache.Ptr());
+  mclass->SetDexCacheStrings(new_dex_cache->GetStrings());
+  mclass->SetDexClassDefIndex(dex_file_->GetIndexForClassDef(*class_def));
+  mclass->SetDexTypeIndex(dex_file_->GetIndexForTypeId(*dex_file_->FindTypeId(class_sig_)));
+  return true;
+}
+
+bool Redefiner::UpdateJavaDexFile(art::ObjPtr<art::mirror::Object> java_dex_file,
+                                  art::ObjPtr<art::mirror::LongArray> new_cookie,
+                                  /*out*/art::ObjPtr<art::mirror::LongArray>* original_cookie) {
+  art::ArtField* internal_cookie_field = java_dex_file->GetClass()->FindDeclaredInstanceField(
+      "mInternalCookie", "Ljava/lang/Object;");
+  art::ArtField* cookie_field = java_dex_file->GetClass()->FindDeclaredInstanceField(
+      "mCookie", "Ljava/lang/Object;");
+  CHECK(internal_cookie_field != nullptr);
+  art::ObjPtr<art::mirror::LongArray> orig_internal_cookie(
+      internal_cookie_field->GetObject(java_dex_file)->AsLongArray());
+  art::ObjPtr<art::mirror::LongArray> orig_cookie(
+      cookie_field->GetObject(java_dex_file)->AsLongArray());
+  internal_cookie_field->SetObject<false>(java_dex_file, new_cookie);
+  *original_cookie = orig_internal_cookie;
+  if (!orig_cookie.IsNull()) {
+    cookie_field->SetObject<false>(java_dex_file, new_cookie);
+  }
+  return true;
+}
+
+// This function does all (java) allocations we need to do for the Class being redefined.
+// TODO Change this name maybe?
+bool Redefiner::EnsureClassAllocationsFinished() {
+  art::StackHandleScope<2> hs(self_);
+  art::Handle<art::mirror::Class> klass(hs.NewHandle(self_->DecodeJObject(klass_)->AsClass()));
+  if (klass.Get() == nullptr) {
+    RecordFailure(ERR(INVALID_CLASS), "Unable to decode class argument!");
+    return false;
+  }
+  // Allocate the classExt
+  art::Handle<art::mirror::ClassExt> ext(hs.NewHandle(klass->EnsureExtDataPresent(self_)));
+  if (ext.Get() == nullptr) {
+    // No memory. Clear exception (it's not useful) and return error.
+    // TODO This doesn't need to be fatal. We could just not support obsolete methods after hitting
+    // this case.
+    self_->AssertPendingOOMException();
+    self_->ClearException();
+    RecordFailure(ERR(OUT_OF_MEMORY), "Could not allocate ClassExt");
+    return false;
+  }
+  // Allocate the 2 arrays that make up the obsolete methods map.  Since the contents of the arrays
+  // are only modified when all threads (other than the modifying one) are suspended we don't need
+  // to worry about missing the unsyncronized writes to the array. We do synchronize when setting it
+  // however, since that can happen at any time.
+  // TODO Clear these after we walk the stacks in order to free them in the (likely?) event there
+  // are no obsolete methods.
+  {
+    art::ObjectLock<art::mirror::ClassExt> lock(self_, ext);
+    if (!ext->ExtendObsoleteArrays(
+          self_, klass->GetDeclaredMethodsSlice(art::kRuntimePointerSize).size())) {
+      // OOM. Clear exception and return error.
+      self_->AssertPendingOOMException();
+      self_->ClearException();
+      RecordFailure(ERR(OUT_OF_MEMORY), "Unable to allocate/extend obsolete methods map");
+      return false;
+    }
+  }
+  return true;
+}
+
+}  // namespace openjdkjvmti
diff --git a/runtime/openjdkjvmti/ti_redefine.h b/runtime/openjdkjvmti/ti_redefine.h
new file mode 100644
index 0000000..f3a5834
--- /dev/null
+++ b/runtime/openjdkjvmti/ti_redefine.h
@@ -0,0 +1,168 @@
+/* Copyright (C) 2016 The Android Open Source Project
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This file implements interfaces from the file jvmti.h. This implementation
+ * is licensed under the same terms as the file jvmti.h.  The
+ * copyright and license information for the file jvmti.h follows.
+ *
+ * Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+#ifndef ART_RUNTIME_OPENJDKJVMTI_TI_REDEFINE_H_
+#define ART_RUNTIME_OPENJDKJVMTI_TI_REDEFINE_H_
+
+#include <string>
+
+#include <jni.h>
+
+#include "art_jvmti.h"
+#include "art_method.h"
+#include "class_linker.h"
+#include "dex_file.h"
+#include "gc_root-inl.h"
+#include "globals.h"
+#include "jni_env_ext-inl.h"
+#include "jvmti.h"
+#include "linear_alloc.h"
+#include "mem_map.h"
+#include "mirror/array-inl.h"
+#include "mirror/array.h"
+#include "mirror/class-inl.h"
+#include "mirror/class.h"
+#include "mirror/class_loader-inl.h"
+#include "mirror/string-inl.h"
+#include "oat_file.h"
+#include "obj_ptr.h"
+#include "scoped_thread_state_change-inl.h"
+#include "stack.h"
+#include "thread_list.h"
+#include "transform.h"
+#include "utf.h"
+#include "utils/dex_cache_arrays_layout-inl.h"
+
+namespace openjdkjvmti {
+
+// Class that can redefine a single class's methods.
+class Redefiner {
+ public:
+  // Redefine the given class with the given dex data. Note this function does not take ownership of
+  // the dex_data pointer. It is not used after this call however and may be freed if desired.
+  // The caller is responsible for freeing it. The runtime makes it's own copy of the data.
+  static jvmtiError RedefineClass(ArtJvmTiEnv* env,
+                                  art::Runtime* runtime,
+                                  art::Thread* self,
+                                  jclass klass,
+                                  const std::string& original_dex_location,
+                                  jint data_len,
+                                  unsigned char* dex_data,
+                                  std::string* error_msg);
+
+ private:
+  jvmtiError result_;
+  art::Runtime* runtime_;
+  art::Thread* self_;
+  // Kept as a jclass since we have weird run-state changes that make keeping it around as a
+  // mirror::Class difficult and confusing.
+  jclass klass_;
+  std::unique_ptr<const art::DexFile> dex_file_;
+  std::string* error_msg_;
+  char* class_sig_;
+
+  // TODO Maybe change jclass to a mirror::Class
+  Redefiner(art::Runtime* runtime,
+            art::Thread* self,
+            jclass klass,
+            char* class_sig,
+            std::unique_ptr<const art::DexFile>& redefined_dex_file,
+            std::string* error_msg)
+      : result_(ERR(INTERNAL)),
+        runtime_(runtime),
+        self_(self),
+        klass_(klass),
+        dex_file_(std::move(redefined_dex_file)),
+        error_msg_(error_msg),
+        class_sig_(class_sig) { }
+
+  static std::unique_ptr<art::MemMap> MoveDataToMemMap(const std::string& original_location,
+                                                       jint data_len,
+                                                       unsigned char* dex_data,
+                                                       std::string* error_msg);
+
+  // TODO Put on all the lock qualifiers.
+  jvmtiError Run() REQUIRES_SHARED(art::Locks::mutator_lock_);
+
+  bool FinishRemainingAllocations(
+        /*out*/art::MutableHandle<art::mirror::ClassLoader>* source_class_loader,
+        /*out*/art::MutableHandle<art::mirror::Object>* source_dex_file_obj,
+        /*out*/art::MutableHandle<art::mirror::LongArray>* new_dex_file_cookie,
+        /*out*/art::MutableHandle<art::mirror::DexCache>* new_dex_cache)
+      REQUIRES_SHARED(art::Locks::mutator_lock_);
+
+  // Preallocates all needed allocations in klass so that we can pause execution safely.
+  // TODO We should be able to free the arrays if they end up not being used. Investigate doing this
+  // in the future. For now we will just take the memory hit.
+  bool EnsureClassAllocationsFinished() REQUIRES_SHARED(art::Locks::mutator_lock_);
+
+  art::mirror::ClassLoader* GetClassLoader() REQUIRES_SHARED(art::Locks::mutator_lock_);
+
+  // This finds the java.lang.DexFile we will add the native DexFile to as part of the classpath.
+  // TODO Make sure the DexFile object returned is the one that the klass_ actually comes from.
+  art::mirror::Object* FindSourceDexFileObject(art::Handle<art::mirror::ClassLoader> loader)
+      REQUIRES_SHARED(art::Locks::mutator_lock_);
+
+  art::mirror::Class* GetMirrorClass() REQUIRES_SHARED(art::Locks::mutator_lock_);
+
+  // Allocates and fills the new DexFileCookie
+  art::mirror::LongArray* AllocateDexFileCookie(art::Handle<art::mirror::Object> java_dex_file_obj)
+      REQUIRES_SHARED(art::Locks::mutator_lock_);
+
+  art::mirror::DexCache* CreateNewDexCache(art::Handle<art::mirror::ClassLoader> loader)
+      REQUIRES_SHARED(art::Locks::mutator_lock_);
+
+  void RecordFailure(jvmtiError result, const std::string& error_msg);
+
+  // TODO Actually write this.
+  // This will check that no constraints are violated (more than 1 class in dex file, any changes in
+  // number/declaration of methods & fields, changes in access flags, etc.)
+  bool EnsureRedefinitionIsValid() {
+    return true;
+  }
+
+  bool UpdateJavaDexFile(art::ObjPtr<art::mirror::Object> java_dex_file,
+                         art::ObjPtr<art::mirror::LongArray> new_cookie,
+                         /*out*/art::ObjPtr<art::mirror::LongArray>* original_cookie)
+      REQUIRES(art::Locks::mutator_lock_);
+
+  void RestoreJavaDexFile(art::ObjPtr<art::mirror::Object> java_dex_file,
+                          art::ObjPtr<art::mirror::LongArray> original_cookie)
+      REQUIRES(art::Locks::mutator_lock_);
+
+  bool UpdateClass(art::ObjPtr<art::mirror::Class> mclass,
+                   art::ObjPtr<art::mirror::DexCache> new_dex_cache)
+      REQUIRES(art::Locks::mutator_lock_);
+};
+
+}  // namespace openjdkjvmti
+
+#endif  // ART_RUNTIME_OPENJDKJVMTI_TI_REDEFINE_H_
diff --git a/runtime/openjdkjvmti/ti_stack.cc b/runtime/openjdkjvmti/ti_stack.cc
index 33e677f..6f8976f 100644
--- a/runtime/openjdkjvmti/ti_stack.cc
+++ b/runtime/openjdkjvmti/ti_stack.cc
@@ -37,6 +37,7 @@
 #include "dex_file.h"
 #include "dex_file_annotations.h"
 #include "jni_env_ext.h"
+#include "jni_internal.h"
 #include "mirror/class.h"
 #include "mirror/dex_cache.h"
 #include "scoped_thread_state_change-inl.h"
@@ -64,7 +65,7 @@
 
     if (start == 0) {
       m = m->GetInterfaceMethodIfProxy(art::kRuntimePointerSize);
-      jmethodID id = soa.EncodeMethod(m);
+      jmethodID id = art::jni::EncodeArtMethod(m);
 
       art::mirror::DexCache* dex_cache = m->GetDexCache();
       int32_t line_number = -1;
diff --git a/runtime/openjdkjvmti/transform.cc b/runtime/openjdkjvmti/transform.cc
index 3443aea..f7b8b92 100644
--- a/runtime/openjdkjvmti/transform.cc
+++ b/runtime/openjdkjvmti/transform.cc
@@ -29,10 +29,15 @@
  * questions.
  */
 
+#include <unordered_map>
+#include <unordered_set>
+
 #include "transform.h"
 
+#include "art_method.h"
 #include "class_linker.h"
 #include "dex_file.h"
+#include "dex_file_types.h"
 #include "gc_root-inl.h"
 #include "globals.h"
 #include "jni_env_ext-inl.h"
@@ -45,6 +50,7 @@
 #include "mirror/string-inl.h"
 #include "oat_file.h"
 #include "scoped_thread_state_change-inl.h"
+#include "stack.h"
 #include "thread_list.h"
 #include "transform.h"
 #include "utf.h"
@@ -52,196 +58,7 @@
 
 namespace openjdkjvmti {
 
-static bool ReadChecksum(jint data_len, const unsigned char* dex, /*out*/uint32_t* res) {
-  if (data_len < static_cast<jint>(sizeof(art::DexFile::Header))) {
-    return false;
-  }
-  *res = reinterpret_cast<const art::DexFile::Header*>(dex)->checksum_;
-  return true;
-}
-
-static std::unique_ptr<art::MemMap> MoveDataToMemMap(const std::string& original_location,
-                                                      jint data_len,
-                                                      unsigned char* dex_data) {
-  std::string error_msg;
-  std::unique_ptr<art::MemMap> map(art::MemMap::MapAnonymous(
-      art::StringPrintf("%s-transformed", original_location.c_str()).c_str(),
-      nullptr,
-      data_len,
-      PROT_READ|PROT_WRITE,
-      /*low_4gb*/false,
-      /*reuse*/false,
-      &error_msg));
-  if (map == nullptr) {
-    return map;
-  }
-  memcpy(map->Begin(), dex_data, data_len);
-  map->Protect(PROT_READ);
-  return map;
-}
-
-static void InvalidateExistingMethods(art::Thread* self,
-                                      art::Handle<art::mirror::Class> klass,
-                                      art::Handle<art::mirror::DexCache> cache,
-                                      const art::DexFile* dex_file)
-    REQUIRES_SHARED(art::Locks::mutator_lock_) {
-  // Create new DexCache with new DexFile.
-  // reset dex_class_def_idx_
-  // for each method reset entry_point_from_quick_compiled_code_ to bridge
-  // for each method reset dex_code_item_offset_
-  // for each method reset dex_method_index_
-  // for each method set dex_cache_resolved_methods_ to new DexCache
-  // for each method set dex_cache_resolved_types_ to new DexCache
-  auto* runtime = art::Runtime::Current();
-  art::ClassLinker* linker = runtime->GetClassLinker();
-  art::PointerSize image_pointer_size = linker->GetImagePointerSize();
-  std::string descriptor_storage;
-  const char* descriptor = klass->GetDescriptor(&descriptor_storage);
-  // Get the new class def
-  const art::DexFile::ClassDef* class_def = art::OatFile::OatDexFile::FindClassDef(
-      *dex_file, descriptor, art::ComputeModifiedUtf8Hash(descriptor));
-  CHECK(class_def != nullptr);
-  const art::DexFile::TypeId& declaring_class_id = dex_file->GetTypeId(class_def->class_idx_);
-  art::StackHandleScope<6> hs(self);
-  const art::DexFile& old_dex_file = klass->GetDexFile();
-  for (art::ArtMethod& method : klass->GetMethods(image_pointer_size)) {
-    // Find the code_item for the method then find the dex_method_index and dex_code_item_offset to
-    // set.
-    const art::DexFile::StringId* new_name_id = dex_file->FindStringId(method.GetName());
-    uint16_t method_return_idx =
-        dex_file->GetIndexForTypeId(*dex_file->FindTypeId(method.GetReturnTypeDescriptor()));
-    const auto* old_type_list = method.GetParameterTypeList();
-    std::vector<uint16_t> new_type_list;
-    for (uint32_t i = 0; old_type_list != nullptr && i < old_type_list->Size(); i++) {
-      new_type_list.push_back(
-          dex_file->GetIndexForTypeId(
-              *dex_file->FindTypeId(
-                  old_dex_file.GetTypeDescriptor(
-                      old_dex_file.GetTypeId(
-                          old_type_list->GetTypeItem(i).type_idx_)))));
-    }
-    const art::DexFile::ProtoId* proto_id = dex_file->FindProtoId(method_return_idx,
-                                                                  new_type_list);
-    CHECK(proto_id != nullptr || old_type_list == nullptr);
-    const art::DexFile::MethodId* method_id = dex_file->FindMethodId(declaring_class_id,
-                                                                      *new_name_id,
-                                                                      *proto_id);
-    CHECK(method_id != nullptr);
-    uint32_t dex_method_idx = dex_file->GetIndexForMethodId(*method_id);
-    method.SetDexMethodIndex(dex_method_idx);
-    linker->SetEntryPointsToInterpreter(&method);
-    method.SetCodeItemOffset(dex_file->FindCodeItemOffset(*class_def, dex_method_idx));
-    method.SetDexCacheResolvedMethods(cache->GetResolvedMethods(), image_pointer_size);
-    method.SetDexCacheResolvedTypes(cache->GetResolvedTypes(), image_pointer_size);
-  }
-
-  // Update the class fields.
-  // Need to update class last since the ArtMethod gets its DexFile from the class (which is needed
-  // to call GetReturnTypeDescriptor and GetParameterTypeList above).
-  klass->SetDexCache(cache.Get());
-  klass->SetDexCacheStrings(cache->GetStrings());
-  klass->SetDexClassDefIndex(dex_file->GetIndexForClassDef(*class_def));
-  klass->SetDexTypeIndex(dex_file->GetIndexForTypeId(*dex_file->FindTypeId(descriptor)));
-}
-
-// Adds the dex file.
-static art::mirror::LongArray* InsertDexFileIntoArray(art::Thread* self,
-                                                      const art::DexFile* dex,
-                                                      art::Handle<art::mirror::LongArray>& orig)
-    REQUIRES_SHARED(art::Locks::mutator_lock_) {
-  art::StackHandleScope<1> hs(self);
-  CHECK_GE(orig->GetLength(), 1);
-  art::Handle<art::mirror::LongArray> ret(
-      hs.NewHandle(art::mirror::LongArray::Alloc(self, orig->GetLength() + 1)));
-  CHECK(ret.Get() != nullptr);
-  // Copy the oat-dex.
-  // TODO Should I clear the oatdex element?
-  ret->SetWithoutChecks<false>(0, orig->GetWithoutChecks(0));
-  ret->SetWithoutChecks<false>(1, static_cast<int64_t>(reinterpret_cast<intptr_t>(dex)));
-  ret->Memcpy(2, orig.Get(), 1, orig->GetLength() - 1);
-  return ret.Get();
-}
-
-// TODO Handle all types of class loaders.
-static bool FindDalvikSystemDexFileAndLoaderForClass(
-    art::Handle<art::mirror::Class> klass,
-    /*out*/art::mirror::Object** dex_file,
-    /*out*/art::mirror::ClassLoader** loader)
-      REQUIRES_SHARED(art::Locks::mutator_lock_) {
-  const char* dex_path_list_element_array_name = "[Ldalvik/system/DexPathList$Element;";
-  const char* dex_path_list_element_name = "Ldalvik/system/DexPathList$Element;";
-  const char* dex_file_name = "Ldalvik/system/DexFile;";
-  const char* dex_path_list_name = "Ldalvik/system/DexPathList;";
-  const char* dex_class_loader_name = "Ldalvik/system/BaseDexClassLoader;";
-
-  art::Thread* self = art::Thread::Current();
-  CHECK(!self->IsExceptionPending());
-  art::StackHandleScope<11> hs(self);
-  art::ClassLinker* class_linker = art::Runtime::Current()->GetClassLinker();
-
-  art::Handle<art::mirror::ClassLoader> null_loader(hs.NewHandle<art::mirror::ClassLoader>(
-      nullptr));
-  art::Handle<art::mirror::Class> base_dex_loader_class(hs.NewHandle(class_linker->FindClass(
-      self, dex_class_loader_name, null_loader)));
-
-  art::ArtField* path_list_field = base_dex_loader_class->FindDeclaredInstanceField(
-      "pathList", dex_path_list_name);
-  CHECK(path_list_field != nullptr);
-
-  art::ArtField* dex_path_list_element_field =
-      class_linker->FindClass(self, dex_path_list_name, null_loader)
-        ->FindDeclaredInstanceField("dexElements", dex_path_list_element_array_name);
-  CHECK(dex_path_list_element_field != nullptr);
-
-  art::ArtField* element_dex_file_field =
-      class_linker->FindClass(self, dex_path_list_element_name, null_loader)
-        ->FindDeclaredInstanceField("dexFile", dex_file_name);
-  CHECK(element_dex_file_field != nullptr);
-
-  art::Handle<art::mirror::ClassLoader> h_class_loader(hs.NewHandle(klass->GetClassLoader()));
-  art::Handle<art::mirror::Class> loader_class(hs.NewHandle(h_class_loader->GetClass()));
-  // Check if loader is a BaseDexClassLoader
-  if (!loader_class->IsSubClass(base_dex_loader_class.Get())) {
-    LOG(ERROR) << "The classloader is not a BaseDexClassLoader which is currently the only "
-               << "supported class loader type!";
-    return false;
-  }
-  art::Handle<art::mirror::Object> path_list(
-      hs.NewHandle(path_list_field->GetObject(h_class_loader.Get())));
-  CHECK(path_list.Get() != nullptr);
-  CHECK(!self->IsExceptionPending());
-  art::Handle<art::mirror::ObjectArray<art::mirror::Object>> dex_elements_list(hs.NewHandle(
-      dex_path_list_element_field->GetObject(path_list.Get())->
-      AsObjectArray<art::mirror::Object>()));
-  CHECK(!self->IsExceptionPending());
-  CHECK(dex_elements_list.Get() != nullptr);
-  size_t num_elements = dex_elements_list->GetLength();
-  art::MutableHandle<art::mirror::Object> current_element(
-      hs.NewHandle<art::mirror::Object>(nullptr));
-  art::MutableHandle<art::mirror::Object> first_dex_file(
-      hs.NewHandle<art::mirror::Object>(nullptr));
-  for (size_t i = 0; i < num_elements; i++) {
-    current_element.Assign(dex_elements_list->Get(i));
-    CHECK(current_element.Get() != nullptr);
-    CHECK(!self->IsExceptionPending());
-    CHECK(dex_elements_list.Get() != nullptr);
-    CHECK_EQ(current_element->GetClass(), class_linker->FindClass(self,
-                                                                  dex_path_list_element_name,
-                                                                  null_loader));
-    // TODO It would be cleaner to put the art::DexFile into the dalvik.system.DexFile the class
-    // comes from but it is more annoying because we would need to find this class. It is not
-    // necessary for proper function since we just need to be in front of the classes old dex file
-    // in the path.
-    first_dex_file.Assign(element_dex_file_field->GetObject(current_element.Get()));
-    if (first_dex_file.Get() != nullptr) {
-      *dex_file = first_dex_file.Get();
-      *loader = h_class_loader.Get();
-      return true;
-    }
-  }
-  return false;
-}
-
+// TODO Move this function somewhere more appropriate.
 // Gets the data surrounding the given class.
 jvmtiError GetTransformationData(ArtJvmTiEnv* env,
                                  jclass klass,
@@ -280,83 +97,4 @@
   return OK;
 }
 
-// Install the new dex file.
-// TODO do error checks for bad state (method in a stack, changes to number of methods/fields/etc).
-jvmtiError MoveTransformedFileIntoRuntime(jclass jklass,
-                                          std::string original_location,
-                                          jint data_len,
-                                          unsigned char* dex_data) {
-  const char* dex_file_name = "Ldalvik/system/DexFile;";
-  art::Thread* self = art::Thread::Current();
-  art::Runtime* runtime = art::Runtime::Current();
-  art::ThreadList* threads = runtime->GetThreadList();
-  art::ClassLinker* class_linker = runtime->GetClassLinker();
-  uint32_t checksum = 0;
-  if (!ReadChecksum(data_len, dex_data, &checksum)) {
-    return ERR(INVALID_CLASS_FORMAT);
-  }
-
-  std::unique_ptr<art::MemMap> map(MoveDataToMemMap(original_location, data_len, dex_data));
-  if (map.get() == nullptr) {
-    return ERR(INTERNAL);
-  }
-  std::string error_msg;
-  // Load the new dex_data in memory (mmap it, etc)
-  std::unique_ptr<const art::DexFile> new_dex_file = art::DexFile::Open(map->GetName(),
-                                                                        checksum,
-                                                                        std::move(map),
-                                                                        /*verify*/ true,
-                                                                        /*verify_checksum*/ true,
-                                                                        &error_msg);
-  CHECK(new_dex_file.get() != nullptr) << "Unable to load dex file! " << error_msg;
-
-  // Get mutator lock. We need the lifetimes of these variables (hs, the classes, etc.) to be longer
-  // then current lock (since there isn't upgrading of the lock) so we don't use soa.
-  art::ThreadState old_state = self->TransitionFromSuspendedToRunnable();
-  // This scope is needed to make sure that the HandleScope dies with mutator_lock_ since we need to
-  // upgrade the mutator_lock during the execution.
-  {
-    art::StackHandleScope<11> hs(self);
-    art::Handle<art::mirror::ClassLoader> null_loader(
-        hs.NewHandle<art::mirror::ClassLoader>(nullptr));
-    CHECK(null_loader.Get() == nullptr);
-    art::ArtField* dex_file_cookie_field = class_linker->
-        FindClass(self, dex_file_name, null_loader)->
-        FindDeclaredInstanceField("mCookie", "Ljava/lang/Object;");
-    art::ArtField* dex_file_internal_cookie_field =
-        class_linker->FindClass(self, dex_file_name, null_loader)
-          ->FindDeclaredInstanceField("mInternalCookie", "Ljava/lang/Object;");
-    CHECK(dex_file_cookie_field != nullptr);
-    art::Handle<art::mirror::Class> klass(hs.NewHandle(self->DecodeJObject(jklass)->AsClass()));
-    art::mirror::Object* dex_file_ptr = nullptr;
-    art::mirror::ClassLoader* class_loader_ptr = nullptr;
-    // Find dalvik.system.DexFile that represents the dex file we are changing.
-    if (!FindDalvikSystemDexFileAndLoaderForClass(klass, &dex_file_ptr, &class_loader_ptr)) {
-      self->TransitionFromRunnableToSuspended(old_state);
-      LOG(ERROR) << "Could not find DexFile.";
-      return ERR(INTERNAL);
-    }
-    art::Handle<art::mirror::Object> dex_file_obj(hs.NewHandle(dex_file_ptr));
-    art::Handle<art::mirror::ClassLoader> class_loader(hs.NewHandle(class_loader_ptr));
-    art::Handle<art::mirror::LongArray> art_dex_array(
-        hs.NewHandle<art::mirror::LongArray>(
-            dex_file_cookie_field->GetObject(dex_file_obj.Get())->AsLongArray()));
-    art::Handle<art::mirror::LongArray> new_art_dex_array(
-        hs.NewHandle<art::mirror::LongArray>(
-            InsertDexFileIntoArray(self, new_dex_file.get(), art_dex_array)));
-    art::Handle<art::mirror::DexCache> cache(
-        hs.NewHandle(class_linker->RegisterDexFile(*new_dex_file.get(), class_loader.Get())));
-    self->TransitionFromRunnableToSuspended(old_state);
-
-    threads->SuspendAll("moving dex file into runtime", /*long_suspend*/true);
-    // Change the mCookie field. Old value will be GC'd as normal.
-    dex_file_cookie_field->SetObject<false>(dex_file_obj.Get(), new_art_dex_array.Get());
-    dex_file_internal_cookie_field->SetObject<false>(dex_file_obj.Get(), new_art_dex_array.Get());
-    // Invalidate existing methods.
-    InvalidateExistingMethods(self, klass, cache, new_dex_file.release());
-  }
-  threads->ResumeAll();
-  return OK;
-}
-
 }  // namespace openjdkjvmti
diff --git a/runtime/openjdkjvmti/transform.h b/runtime/openjdkjvmti/transform.h
index 85bcb00..35b990b 100644
--- a/runtime/openjdkjvmti/transform.h
+++ b/runtime/openjdkjvmti/transform.h
@@ -52,12 +52,6 @@
                                  /*out*/jint* data_len,
                                  /*out*/unsigned char** dex_data);
 
-// Install the new dex file.
-jvmtiError MoveTransformedFileIntoRuntime(jclass jklass,
-                                          std::string original_location,
-                                          jint data_len,
-                                          unsigned char* dex_data);
-
 }  // namespace openjdkjvmti
 
 #endif  // ART_RUNTIME_OPENJDKJVMTI_TRANSFORM_H_
diff --git a/runtime/parsed_options.cc b/runtime/parsed_options.cc
index 56eab5e..e1022b0 100644
--- a/runtime/parsed_options.cc
+++ b/runtime/parsed_options.cc
@@ -300,6 +300,8 @@
       .Define("-Xplugin:_")
           .WithType<std::vector<Plugin>>().AppendValues()
           .IntoKey(M::Plugins)
+      .Define("-Xfully-deoptable")
+          .IntoKey(M::FullyDeoptable)
       .Ignore({
           "-ea", "-da", "-enableassertions", "-disableassertions", "--runtime-arg", "-esa",
           "-dsa", "-enablesystemassertions", "-disablesystemassertions", "-Xrs", "-Xint:_",
diff --git a/runtime/plugin.h b/runtime/plugin.h
index 18f3977..f077aaf 100644
--- a/runtime/plugin.h
+++ b/runtime/plugin.h
@@ -34,7 +34,7 @@
 // single-threaded fashion so not much need
 class Plugin {
  public:
-  static Plugin Create(std::string lib) {
+  static Plugin Create(const std::string& lib) {
     return Plugin(lib);
   }
 
@@ -66,7 +66,7 @@
   }
 
  private:
-  explicit Plugin(std::string library) : library_(library), dlopen_handle_(nullptr) { }
+  explicit Plugin(const std::string& library) : library_(library), dlopen_handle_(nullptr) { }
 
   std::string library_;
   void* dlopen_handle_;
diff --git a/runtime/primitive.cc b/runtime/primitive.cc
index d29a060..2380284 100644
--- a/runtime/primitive.cc
+++ b/runtime/primitive.cc
@@ -31,11 +31,35 @@
   "PrimVoid",
 };
 
+static const char* kBoxedDescriptors[] = {
+  "Ljava/lang/Object;",
+  "Ljava/lang/Boolean;",
+  "Ljava/lang/Byte;",
+  "Ljava/lang/Character;",
+  "Ljava/lang/Short;",
+  "Ljava/lang/Integer;",
+  "Ljava/lang/Long;",
+  "Ljava/lang/Float;",
+  "Ljava/lang/Double;",
+  "Ljava/lang/Void;",
+};
+
+#define COUNT_OF(x) (sizeof(x) / sizeof(x[0]))
+
 const char* Primitive::PrettyDescriptor(Primitive::Type type) {
+  static_assert(COUNT_OF(kTypeNames) == static_cast<size_t>(Primitive::kPrimLast) + 1,
+                "Missing element");
   CHECK(Primitive::kPrimNot <= type && type <= Primitive::kPrimVoid) << static_cast<int>(type);
   return kTypeNames[type];
 }
 
+const char* Primitive::BoxedDescriptor(Primitive::Type type) {
+  static_assert(COUNT_OF(kBoxedDescriptors) == static_cast<size_t>(Primitive::kPrimLast) + 1,
+                "Missing element");
+  CHECK(Primitive::kPrimNot <= type && type <= Primitive::kPrimVoid) << static_cast<int>(type);
+  return kBoxedDescriptors[type];
+}
+
 std::ostream& operator<<(std::ostream& os, const Primitive::Type& type) {
   int32_t int_type = static_cast<int32_t>(type);
   if (type >= Primitive::kPrimNot && type <= Primitive::kPrimVoid) {
diff --git a/runtime/primitive.h b/runtime/primitive.h
index 18f45ff..a0edaee 100644
--- a/runtime/primitive.h
+++ b/runtime/primitive.h
@@ -138,6 +138,9 @@
 
   static const char* PrettyDescriptor(Type type);
 
+  // Returns the descriptor corresponding to the boxed type of |type|.
+  static const char* BoxedDescriptor(Type type);
+
   static bool IsFloatingPointType(Type type) {
     return type == kPrimFloat || type == kPrimDouble;
   }
@@ -158,6 +161,35 @@
     }
   }
 
+  // Return true if |type| is an numeric type.
+  static constexpr bool IsNumericType(Type type) {
+    switch (type) {
+      case Primitive::Type::kPrimNot: return false;
+      case Primitive::Type::kPrimBoolean: return false;
+      case Primitive::Type::kPrimByte: return true;
+      case Primitive::Type::kPrimChar: return false;
+      case Primitive::Type::kPrimShort: return true;
+      case Primitive::Type::kPrimInt: return true;
+      case Primitive::Type::kPrimLong: return true;
+      case Primitive::Type::kPrimFloat: return true;
+      case Primitive::Type::kPrimDouble: return true;
+      case Primitive::Type::kPrimVoid: return false;
+    }
+  }
+
+  // Returns true if it is possible to widen type |from| to type |to|. Both |from| and
+  // |to| should be numeric primitive types.
+  static bool IsWidenable(Type from, Type to) {
+    static_assert(Primitive::Type::kPrimByte < Primitive::Type::kPrimShort, "Bad ordering");
+    static_assert(Primitive::Type::kPrimShort < Primitive::Type::kPrimInt, "Bad ordering");
+    static_assert(Primitive::Type::kPrimInt < Primitive::Type::kPrimLong, "Bad ordering");
+    static_assert(Primitive::Type::kPrimLong < Primitive::Type::kPrimFloat, "Bad ordering");
+    static_assert(Primitive::Type::kPrimFloat < Primitive::Type::kPrimDouble, "Bad ordering");
+    // Widening is only applicable between numeric types, like byte
+    // and int. Non-numeric types, such as boolean, cannot be widened.
+    return IsNumericType(from) && IsNumericType(to) && from <= to;
+  }
+
   static bool IsIntOrLongType(Type type) {
     return type == kPrimInt || type == kPrimLong;
   }
diff --git a/runtime/proxy_test.cc b/runtime/proxy_test.cc
index 32a5582..fd7e56d 100644
--- a/runtime/proxy_test.cc
+++ b/runtime/proxy_test.cc
@@ -199,8 +199,6 @@
   ScopedObjectAccess soa(Thread::Current());
   jobject jclass_loader = LoadDex("Interfaces");
   StackHandleScope<7> hs(soa.Self());
-  Handle<mirror::ClassLoader> class_loader(
-      hs.NewHandle(soa.Decode<mirror::ClassLoader>(jclass_loader)));
 
   Handle<mirror::Class> proxyClass0;
   Handle<mirror::Class> proxyClass1;
diff --git a/runtime/read_barrier-inl.h b/runtime/read_barrier-inl.h
index be95600..37cf257 100644
--- a/runtime/read_barrier-inl.h
+++ b/runtime/read_barrier-inl.h
@@ -40,14 +40,16 @@
       }
     }
     if (kUseBakerReadBarrier) {
-      // The higher bits of the rb_ptr, rb_ptr_high_bits (must be zero)
-      // is used to create artificial data dependency from the is_gray
-      // load to the ref field (ptr) load to avoid needing a load-load
-      // barrier between the two.
-      uintptr_t rb_ptr_high_bits;
-      bool is_gray = HasGrayReadBarrierPointer(obj, &rb_ptr_high_bits);
+      // fake_address_dependency (must be zero) is used to create artificial data dependency from
+      // the is_gray load to the ref field (ptr) load to avoid needing a load-load barrier between
+      // the two.
+      uintptr_t fake_address_dependency;
+      bool is_gray = IsGray(obj, &fake_address_dependency);
+      if (kEnableReadBarrierInvariantChecks) {
+        CHECK_EQ(fake_address_dependency, 0U) << obj << " rb_state=" << obj->GetReadBarrierState();
+      }
       ref_addr = reinterpret_cast<mirror::HeapReference<MirrorType>*>(
-          rb_ptr_high_bits | reinterpret_cast<uintptr_t>(ref_addr));
+          fake_address_dependency | reinterpret_cast<uintptr_t>(ref_addr));
       MirrorType* ref = ref_addr->AsMirrorPtr();
       MirrorType* old_ref = ref;
       if (is_gray) {
@@ -60,9 +62,6 @@
               offset, old_ref, ref);
         }
       }
-      if (kEnableReadBarrierInvariantChecks) {
-        CHECK_EQ(rb_ptr_high_bits, 0U) << obj << " rb_ptr=" << obj->GetReadBarrierPointer();
-      }
       AssertToSpaceInvariant(obj, offset, ref);
       return ref;
     } else if (kUseBrooksReadBarrier) {
@@ -223,20 +222,14 @@
   return Runtime::Current()->GetHeap()->ConcurrentCopyingCollector()->MarkFromReadBarrier(obj);
 }
 
-inline bool ReadBarrier::HasGrayReadBarrierPointer(mirror::Object* obj,
-                                                   uintptr_t* out_rb_ptr_high_bits) {
-  mirror::Object* rb_ptr = obj->GetReadBarrierPointer();
-  uintptr_t rb_ptr_bits = reinterpret_cast<uintptr_t>(rb_ptr);
-  uintptr_t rb_ptr_low_bits = rb_ptr_bits & rb_ptr_mask_;
-  if (kEnableReadBarrierInvariantChecks) {
-    CHECK(rb_ptr_low_bits == white_ptr_ || rb_ptr_low_bits == gray_ptr_ ||
-          rb_ptr_low_bits == black_ptr_)
-        << "obj=" << obj << " rb_ptr=" << rb_ptr << " " << obj->PrettyTypeOf();
-  }
-  bool is_gray = rb_ptr_low_bits == gray_ptr_;
-  // The high bits are supposed to be zero. We check this on the caller side.
-  *out_rb_ptr_high_bits = rb_ptr_bits & ~rb_ptr_mask_;
-  return is_gray;
+inline bool ReadBarrier::IsGray(mirror::Object* obj, uintptr_t* fake_address_dependency) {
+  return obj->GetReadBarrierState(fake_address_dependency) == gray_state_;
+}
+
+inline bool ReadBarrier::IsGray(mirror::Object* obj) {
+  // Use a load-acquire to load the read barrier bit to avoid reordering with the subsequent load.
+  // GetReadBarrierStateAcquire() has load-acquire semantics.
+  return obj->GetReadBarrierStateAcquire() == gray_state_;
 }
 
 }  // namespace art
diff --git a/runtime/read_barrier.h b/runtime/read_barrier.h
index a861861..cbc2697 100644
--- a/runtime/read_barrier.h
+++ b/runtime/read_barrier.h
@@ -82,26 +82,32 @@
   // ALWAYS_INLINE on this caused a performance regression b/26744236.
   static mirror::Object* Mark(mirror::Object* obj) REQUIRES_SHARED(Locks::mutator_lock_);
 
-  static mirror::Object* WhitePtr() {
-    return reinterpret_cast<mirror::Object*>(white_ptr_);
+  static constexpr uint32_t WhiteState() {
+    return white_state_;
   }
-  static mirror::Object* GrayPtr() {
-    return reinterpret_cast<mirror::Object*>(gray_ptr_);
-  }
-  static mirror::Object* BlackPtr() {
-    return reinterpret_cast<mirror::Object*>(black_ptr_);
+  static constexpr uint32_t GrayState() {
+    return gray_state_;
   }
 
-  ALWAYS_INLINE static bool HasGrayReadBarrierPointer(mirror::Object* obj,
-                                                      uintptr_t* out_rb_ptr_high_bits)
+  // fake_address_dependency will be zero which should be bitwise-or'ed with the address of the
+  // subsequent load to prevent the reordering of the read barrier bit load and the subsequent
+  // object reference load (from one of `obj`'s fields).
+  // *fake_address_dependency will be set to 0.
+  ALWAYS_INLINE static bool IsGray(mirror::Object* obj, uintptr_t* fake_address_dependency)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  // Note: These couldn't be constexpr pointers as reinterpret_cast isn't compatible with them.
-  static constexpr uintptr_t white_ptr_ = 0x0;    // Not marked.
-  static constexpr uintptr_t gray_ptr_ = 0x1;     // Marked, but not marked through. On mark stack.
-  // TODO: black_ptr_ is unused, we should remove it.
-  static constexpr uintptr_t black_ptr_ = 0x2;    // Marked through. Used for non-moving objects.
-  static constexpr uintptr_t rb_ptr_mask_ = 0x1;  // The low bits for white|gray.
+  // This uses a load-acquire to load the read barrier bit internally to prevent the reordering of
+  // the read barrier bit load and the subsequent load.
+  ALWAYS_INLINE static bool IsGray(mirror::Object* obj)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
+  static bool IsValidReadBarrierState(uint32_t rb_state) {
+    return rb_state == white_state_ || rb_state == gray_state_;
+  }
+
+  static constexpr uint32_t white_state_ = 0x0;    // Not marked.
+  static constexpr uint32_t gray_state_ = 0x1;     // Marked, but not marked through. On mark stack.
+  static constexpr uint32_t rb_state_mask_ = 0x1;  // The low bits for white|gray.
 };
 
 }  // namespace art
diff --git a/runtime/reference_table.cc b/runtime/reference_table.cc
index 16ed7fb..1c975a4 100644
--- a/runtime/reference_table.cc
+++ b/runtime/reference_table.cc
@@ -215,33 +215,87 @@
   }
   std::sort(sorted_entries.begin(), sorted_entries.end(), GcRootComparator());
 
+  class SummaryElement {
+   public:
+    GcRoot<mirror::Object> root;
+    size_t equiv;
+    size_t identical;
+
+    SummaryElement() : equiv(0), identical(0) {}
+    SummaryElement(SummaryElement&& ref) {
+      root = ref.root;
+      equiv = ref.equiv;
+      identical = ref.identical;
+    }
+    SummaryElement(const SummaryElement&) = default;
+    SummaryElement& operator=(SummaryElement&&) = default;
+
+    void Reset(GcRoot<mirror::Object>& _root) {
+      root = _root;
+      equiv = 0;
+      identical = 0;
+    }
+  };
+  std::vector<SummaryElement> sorted_summaries;
+  {
+    SummaryElement prev;
+
+    for (GcRoot<mirror::Object>& root : sorted_entries) {
+      ObjPtr<mirror::Object> current = root.Read<kWithoutReadBarrier>();
+
+      if (UNLIKELY(prev.root.IsNull())) {
+        prev.Reset(root);
+        continue;
+      }
+
+      ObjPtr<mirror::Object> prevObj = prev.root.Read<kWithoutReadBarrier>();
+      if (current == prevObj) {
+        // Same reference, added more than once.
+        ++prev.identical;
+      } else if (current->GetClass() == prevObj->GetClass() &&
+          GetElementCount(current) == GetElementCount(prevObj)) {
+        // Same class / element count, different object.
+        ++prev.equiv;
+      } else {
+        sorted_summaries.push_back(prev);
+        prev.Reset(root);
+      }
+      prev.root = root;
+    }
+    sorted_summaries.push_back(prev);
+
+    // Compare summary elements, first by combined count, then by identical (indicating leaks),
+    // then by class (and size and address).
+    struct SummaryElementComparator {
+      GcRootComparator gc_root_cmp;
+
+      bool operator()(SummaryElement& elem1, SummaryElement& elem2) const
+          NO_THREAD_SAFETY_ANALYSIS {
+        Locks::mutator_lock_->AssertSharedHeld(Thread::Current());
+
+        size_t count1 = elem1.equiv + elem1.identical;
+        size_t count2 = elem2.equiv + elem2.identical;
+        if (count1 != count2) {
+          return count1 > count2;
+        }
+
+        if (elem1.identical != elem2.identical) {
+          return elem1.identical > elem2.identical;
+        }
+
+        // Otherwise, compare the GC roots as before.
+        return gc_root_cmp(elem1.root, elem2.root);
+      }
+    };
+    std::sort(sorted_summaries.begin(), sorted_summaries.end(), SummaryElementComparator());
+  }
+
   // Dump a summary of the whole table.
   os << "  Summary:\n";
-  size_t equiv = 0;
-  size_t identical = 0;
-  ObjPtr<mirror::Object> prev = nullptr;
-  for (GcRoot<mirror::Object>& root : sorted_entries) {
-    ObjPtr<mirror::Object> current = root.Read<kWithoutReadBarrier>();
-    if (prev != nullptr) {
-      const size_t element_count = GetElementCount(prev);
-      if (current == prev) {
-        // Same reference, added more than once.
-        ++identical;
-      } else if (current->GetClass() == prev->GetClass() &&
-          GetElementCount(current) == element_count) {
-        // Same class / element count, different object.
-        ++equiv;
-      } else {
-        // Different class.
-        DumpSummaryLine(os, prev, element_count, identical, equiv);
-        equiv = 0;
-        identical = 0;
-      }
-    }
-    prev = current;
+  for (SummaryElement& elem : sorted_summaries) {
+    ObjPtr<mirror::Object> elemObj = elem.root.Read<kWithoutReadBarrier>();
+    DumpSummaryLine(os, elemObj, GetElementCount(elemObj), elem.identical, elem.equiv);
   }
-  // Handle the last entry.
-  DumpSummaryLine(os, prev, GetElementCount(prev), identical, equiv);
 }
 
 void ReferenceTable::VisitRoots(RootVisitor* visitor, const RootInfo& root_info) {
diff --git a/runtime/reference_table_test.cc b/runtime/reference_table_test.cc
index 489db9a..d80a9b3 100644
--- a/runtime/reference_table_test.cc
+++ b/runtime/reference_table_test.cc
@@ -166,4 +166,77 @@
   }
 }
 
+static std::vector<size_t> FindAll(const std::string& haystack, const char* needle) {
+  std::vector<size_t> res;
+  size_t start = 0;
+  do {
+    size_t pos = haystack.find(needle, start);
+    if (pos == std::string::npos) {
+      break;
+    }
+    res.push_back(pos);
+    start = pos + 1;
+  } while (start < haystack.size());
+  return res;
+}
+
+TEST_F(ReferenceTableTest, SummaryOrder) {
+  // Check that the summary statistics are sorted.
+  ScopedObjectAccess soa(Thread::Current());
+
+  ReferenceTable rt("test", 0, 20);
+
+  {
+    mirror::Object* s1 = mirror::String::AllocFromModifiedUtf8(soa.Self(), "hello");
+    mirror::Object* s2 = mirror::String::AllocFromModifiedUtf8(soa.Self(), "world");
+
+    // 3 copies of s1, 2 copies of s2, interleaved.
+    for (size_t i = 0; i != 2; ++i) {
+      rt.Add(s1);
+      rt.Add(s2);
+    }
+    rt.Add(s1);
+  }
+
+  {
+    // Differently sized byte arrays. Should be sorted by identical (non-unique cound).
+    mirror::Object* b1_1 = mirror::ByteArray::Alloc(soa.Self(), 1);
+    rt.Add(b1_1);
+    rt.Add(mirror::ByteArray::Alloc(soa.Self(), 2));
+    rt.Add(b1_1);
+    rt.Add(mirror::ByteArray::Alloc(soa.Self(), 2));
+    rt.Add(mirror::ByteArray::Alloc(soa.Self(), 1));
+    rt.Add(mirror::ByteArray::Alloc(soa.Self(), 2));
+  }
+
+  rt.Add(mirror::CharArray::Alloc(soa.Self(), 0));
+
+  // Now dump, and ensure order.
+  std::ostringstream oss;
+  rt.Dump(oss);
+
+  // Only do this on the part after Summary.
+  std::string base = oss.str();
+  size_t summary_pos = base.find("Summary:");
+  ASSERT_NE(summary_pos, std::string::npos);
+
+  std::string haystack = base.substr(summary_pos);
+
+  std::vector<size_t> strCounts = FindAll(haystack, "java.lang.String");
+  std::vector<size_t> b1Counts = FindAll(haystack, "byte[] (1 elements)");
+  std::vector<size_t> b2Counts = FindAll(haystack, "byte[] (2 elements)");
+  std::vector<size_t> cCounts = FindAll(haystack, "char[]");
+
+  // Only one each.
+  EXPECT_EQ(1u, strCounts.size());
+  EXPECT_EQ(1u, b1Counts.size());
+  EXPECT_EQ(1u, b2Counts.size());
+  EXPECT_EQ(1u, cCounts.size());
+
+  // Expect them to be in order.
+  EXPECT_LT(strCounts[0], b1Counts[0]);
+  EXPECT_LT(b1Counts[0], b2Counts[0]);
+  EXPECT_LT(b2Counts[0], cCounts[0]);
+}
+
 }  // namespace art
diff --git a/runtime/reflection-inl.h b/runtime/reflection-inl.h
index c4d4fae..68e7a10 100644
--- a/runtime/reflection-inl.h
+++ b/runtime/reflection-inl.h
@@ -29,11 +29,10 @@
 
 namespace art {
 
-inline bool ConvertPrimitiveValue(bool unbox_for_result,
-                                  Primitive::Type srcType,
-                                  Primitive::Type dstType,
-                                  const JValue& src,
-                                  JValue* dst) {
+inline bool ConvertPrimitiveValueNoThrow(Primitive::Type srcType,
+                                         Primitive::Type dstType,
+                                         const JValue& src,
+                                         JValue* dst) {
   DCHECK(srcType != Primitive::kPrimNot && dstType != Primitive::kPrimNot);
   if (LIKELY(srcType == dstType)) {
     dst->SetJ(src.GetJ());
@@ -91,6 +90,18 @@
   default:
     break;
   }
+  return false;
+}
+
+inline bool ConvertPrimitiveValue(bool unbox_for_result,
+                                  Primitive::Type srcType,
+                                  Primitive::Type dstType,
+                                  const JValue& src,
+                                  JValue* dst) {
+  if (ConvertPrimitiveValueNoThrow(srcType, dstType, src, dst)) {
+    return true;
+  }
+
   if (!unbox_for_result) {
     ThrowIllegalArgumentException(StringPrintf("Invalid primitive conversion from %s to %s",
                                                PrettyDescriptor(srcType).c_str(),
diff --git a/runtime/reflection.cc b/runtime/reflection.cc
index f88309b..8446b52 100644
--- a/runtime/reflection.cc
+++ b/runtime/reflection.cc
@@ -363,7 +363,7 @@
   Thread* const self = Thread::Current();
   PointerSize pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
   for (uint32_t i = 0; i < num_params; i++) {
-    uint16_t type_idx = params->GetTypeItem(i).type_idx_;
+    dex::TypeIndex type_idx = params->GetTypeItem(i).type_idx_;
     ObjPtr<mirror::Class> param_type(m->GetClassFromTypeIndex(type_idx,
                                                               true /* resolve*/,
                                                               pointer_size));
@@ -453,7 +453,7 @@
     return JValue();
   }
 
-  ArtMethod* method = soa.DecodeMethod(mid);
+  ArtMethod* method = jni::DecodeArtMethod(mid);
   bool is_string_init = method->GetDeclaringClass()->IsStringClass() && method->IsConstructor();
   if (is_string_init) {
     // Replace calls to String.<init> with equivalent StringFactory call.
@@ -484,7 +484,7 @@
     return JValue();
   }
 
-  ArtMethod* method = soa.DecodeMethod(mid);
+  ArtMethod* method = jni::DecodeArtMethod(mid);
   bool is_string_init = method->GetDeclaringClass()->IsStringClass() && method->IsConstructor();
   if (is_string_init) {
     // Replace calls to String.<init> with equivalent StringFactory call.
@@ -516,7 +516,7 @@
   }
 
   ObjPtr<mirror::Object> receiver = soa.Decode<mirror::Object>(obj);
-  ArtMethod* method = FindVirtualMethod(receiver, soa.DecodeMethod(mid));
+  ArtMethod* method = FindVirtualMethod(receiver, jni::DecodeArtMethod(mid));
   bool is_string_init = method->GetDeclaringClass()->IsStringClass() && method->IsConstructor();
   if (is_string_init) {
     // Replace calls to String.<init> with equivalent StringFactory call.
@@ -548,7 +548,7 @@
   }
 
   ObjPtr<mirror::Object> receiver = soa.Decode<mirror::Object>(obj);
-  ArtMethod* method = FindVirtualMethod(receiver, soa.DecodeMethod(mid));
+  ArtMethod* method = FindVirtualMethod(receiver, jni::DecodeArtMethod(mid));
   bool is_string_init = method->GetDeclaringClass()->IsStringClass() && method->IsConstructor();
   if (is_string_init) {
     // Replace calls to String.<init> with equivalent StringFactory call.
@@ -739,8 +739,11 @@
     arg_array.Append(value.GetI());
   }
 
-  soa.DecodeMethod(m)->Invoke(soa.Self(), arg_array.GetArray(), arg_array.GetNumBytes(),
-                              &result, shorty);
+  jni::DecodeArtMethod(m)->Invoke(soa.Self(),
+                                  arg_array.GetArray(),
+                                  arg_array.GetNumBytes(),
+                                  &result,
+                                  shorty);
   return result.GetL();
 }
 
diff --git a/runtime/reflection.h b/runtime/reflection.h
index 6e5ef71..f2652fd 100644
--- a/runtime/reflection.h
+++ b/runtime/reflection.h
@@ -47,6 +47,12 @@
                              JValue* unboxed_value)
     REQUIRES_SHARED(Locks::mutator_lock_);
 
+ALWAYS_INLINE bool ConvertPrimitiveValueNoThrow(Primitive::Type src_class,
+                                                Primitive::Type dst_class,
+                                                const JValue& src,
+                                                JValue* dst)
+    REQUIRES_SHARED(Locks::mutator_lock_);
+
 ALWAYS_INLINE bool ConvertPrimitiveValue(bool unbox_for_result,
                                          Primitive::Type src_class,
                                          Primitive::Type dst_class,
diff --git a/runtime/reflection_test.cc b/runtime/reflection_test.cc
index 22076bb..e254dfe 100644
--- a/runtime/reflection_test.cc
+++ b/runtime/reflection_test.cc
@@ -23,6 +23,7 @@
 #include "art_method-inl.h"
 #include "base/enums.h"
 #include "common_compiler_test.h"
+#include "jni_internal.h"
 #include "scoped_thread_state_change-inl.h"
 
 namespace art {
@@ -136,7 +137,7 @@
     ObjPtr<mirror::Object> receiver;
     ReflectionTestMakeExecutable(&method, &receiver, is_static, "nop", "()V");
     ScopedLocalRef<jobject> receiver_ref(soa.Env(), soa.AddLocalReference<jobject>(receiver));
-    InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), nullptr);
+    InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), nullptr);
   }
 
   void InvokeIdentityByteMethod(bool is_static) {
@@ -148,20 +149,20 @@
     jvalue args[1];
 
     args[0].b = 0;
-    JValue result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    JValue result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_EQ(0, result.GetB());
 
     args[0].b = -1;
-    result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_EQ(-1, result.GetB());
 
     args[0].b = SCHAR_MAX;
-    result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_EQ(SCHAR_MAX, result.GetB());
 
     static_assert(SCHAR_MIN == -128, "SCHAR_MIN unexpected");
     args[0].b = SCHAR_MIN;
-    result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_EQ(SCHAR_MIN, result.GetB());
   }
 
@@ -174,19 +175,19 @@
     jvalue args[1];
 
     args[0].i = 0;
-    JValue result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    JValue result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_EQ(0, result.GetI());
 
     args[0].i = -1;
-    result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_EQ(-1, result.GetI());
 
     args[0].i = INT_MAX;
-    result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_EQ(INT_MAX, result.GetI());
 
     args[0].i = INT_MIN;
-    result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_EQ(INT_MIN, result.GetI());
   }
 
@@ -199,19 +200,19 @@
     jvalue args[1];
 
     args[0].d = 0.0;
-    JValue result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    JValue result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_DOUBLE_EQ(0.0, result.GetD());
 
     args[0].d = -1.0;
-    result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_DOUBLE_EQ(-1.0, result.GetD());
 
     args[0].d = DBL_MAX;
-    result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_DOUBLE_EQ(DBL_MAX, result.GetD());
 
     args[0].d = DBL_MIN;
-    result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_DOUBLE_EQ(DBL_MIN, result.GetD());
   }
 
@@ -225,22 +226,22 @@
 
     args[0].i = 1;
     args[1].i = 2;
-    JValue result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    JValue result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_EQ(3, result.GetI());
 
     args[0].i = -2;
     args[1].i = 5;
-    result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_EQ(3, result.GetI());
 
     args[0].i = INT_MAX;
     args[1].i = INT_MIN;
-    result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_EQ(-1, result.GetI());
 
     args[0].i = INT_MAX;
     args[1].i = INT_MAX;
-    result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_EQ(-2, result.GetI());
   }
 
@@ -255,31 +256,31 @@
     args[0].i = 0;
     args[1].i = 0;
     args[2].i = 0;
-    JValue result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    JValue result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_EQ(0, result.GetI());
 
     args[0].i = 1;
     args[1].i = 2;
     args[2].i = 3;
-    result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_EQ(6, result.GetI());
 
     args[0].i = -1;
     args[1].i = 2;
     args[2].i = -3;
-    result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_EQ(-2, result.GetI());
 
     args[0].i = INT_MAX;
     args[1].i = INT_MIN;
     args[2].i = INT_MAX;
-    result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_EQ(2147483646, result.GetI());
 
     args[0].i = INT_MAX;
     args[1].i = INT_MAX;
     args[2].i = INT_MAX;
-    result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_EQ(2147483645, result.GetI());
   }
 
@@ -295,35 +296,35 @@
     args[1].i = 0;
     args[2].i = 0;
     args[3].i = 0;
-    JValue result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    JValue result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_EQ(0, result.GetI());
 
     args[0].i = 1;
     args[1].i = 2;
     args[2].i = 3;
     args[3].i = 4;
-    result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_EQ(10, result.GetI());
 
     args[0].i = -1;
     args[1].i = 2;
     args[2].i = -3;
     args[3].i = 4;
-    result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_EQ(2, result.GetI());
 
     args[0].i = INT_MAX;
     args[1].i = INT_MIN;
     args[2].i = INT_MAX;
     args[3].i = INT_MIN;
-    result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_EQ(-2, result.GetI());
 
     args[0].i = INT_MAX;
     args[1].i = INT_MAX;
     args[2].i = INT_MAX;
     args[3].i = INT_MAX;
-    result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_EQ(-4, result.GetI());
   }
 
@@ -340,7 +341,7 @@
     args[2].i = 0;
     args[3].i = 0;
     args[4].i = 0;
-    JValue result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    JValue result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_EQ(0, result.GetI());
 
     args[0].i = 1;
@@ -348,7 +349,7 @@
     args[2].i = 3;
     args[3].i = 4;
     args[4].i = 5;
-    result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_EQ(15, result.GetI());
 
     args[0].i = -1;
@@ -356,7 +357,7 @@
     args[2].i = -3;
     args[3].i = 4;
     args[4].i = -5;
-    result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_EQ(-3, result.GetI());
 
     args[0].i = INT_MAX;
@@ -364,7 +365,7 @@
     args[2].i = INT_MAX;
     args[3].i = INT_MIN;
     args[4].i = INT_MAX;
-    result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_EQ(2147483645, result.GetI());
 
     args[0].i = INT_MAX;
@@ -372,7 +373,7 @@
     args[2].i = INT_MAX;
     args[3].i = INT_MAX;
     args[4].i = INT_MAX;
-    result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_EQ(2147483643, result.GetI());
   }
 
@@ -386,27 +387,27 @@
 
     args[0].d = 0.0;
     args[1].d = 0.0;
-    JValue result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    JValue result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_DOUBLE_EQ(0.0, result.GetD());
 
     args[0].d = 1.0;
     args[1].d = 2.0;
-    result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_DOUBLE_EQ(3.0, result.GetD());
 
     args[0].d = 1.0;
     args[1].d = -2.0;
-    result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_DOUBLE_EQ(-1.0, result.GetD());
 
     args[0].d = DBL_MAX;
     args[1].d = DBL_MIN;
-    result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_DOUBLE_EQ(1.7976931348623157e308, result.GetD());
 
     args[0].d = DBL_MAX;
     args[1].d = DBL_MAX;
-    result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_DOUBLE_EQ(INFINITY, result.GetD());
   }
 
@@ -421,19 +422,19 @@
     args[0].d = 0.0;
     args[1].d = 0.0;
     args[2].d = 0.0;
-    JValue result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    JValue result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_DOUBLE_EQ(0.0, result.GetD());
 
     args[0].d = 1.0;
     args[1].d = 2.0;
     args[2].d = 3.0;
-    result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_DOUBLE_EQ(6.0, result.GetD());
 
     args[0].d = 1.0;
     args[1].d = -2.0;
     args[2].d = 3.0;
-    result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_DOUBLE_EQ(2.0, result.GetD());
   }
 
@@ -449,21 +450,21 @@
     args[1].d = 0.0;
     args[2].d = 0.0;
     args[3].d = 0.0;
-    JValue result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    JValue result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_DOUBLE_EQ(0.0, result.GetD());
 
     args[0].d = 1.0;
     args[1].d = 2.0;
     args[2].d = 3.0;
     args[3].d = 4.0;
-    result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_DOUBLE_EQ(10.0, result.GetD());
 
     args[0].d = 1.0;
     args[1].d = -2.0;
     args[2].d = 3.0;
     args[3].d = -4.0;
-    result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_DOUBLE_EQ(-2.0, result.GetD());
   }
 
@@ -480,7 +481,7 @@
     args[2].d = 0.0;
     args[3].d = 0.0;
     args[4].d = 0.0;
-    JValue result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    JValue result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_DOUBLE_EQ(0.0, result.GetD());
 
     args[0].d = 1.0;
@@ -488,7 +489,7 @@
     args[2].d = 3.0;
     args[3].d = 4.0;
     args[4].d = 5.0;
-    result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_DOUBLE_EQ(15.0, result.GetD());
 
     args[0].d = 1.0;
@@ -496,7 +497,7 @@
     args[2].d = 3.0;
     args[3].d = -4.0;
     args[4].d = 5.0;
-    result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_DOUBLE_EQ(3.0, result.GetD());
   }
 
@@ -531,7 +532,7 @@
 
   jvalue args[1];
   args[0].l = nullptr;
-  InvokeWithJValues(soa, nullptr, soa.EncodeMethod(method), args);
+  InvokeWithJValues(soa, nullptr, jni::EncodeArtMethod(method), args);
 }
 
 TEST_F(ReflectionTest, StaticNopMethod) {
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index d645c5a..92e00ec 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -62,6 +62,7 @@
 #include "base/stl_util.h"
 #include "base/systrace.h"
 #include "base/unix_file/fd_file.h"
+#include "cha.h"
 #include "class_linker-inl.h"
 #include "compiler_callbacks.h"
 #include "debugger.h"
@@ -81,10 +82,12 @@
 #include "intern_table.h"
 #include "interpreter/interpreter.h"
 #include "jit/jit.h"
+#include "jit/jit_code_cache.h"
 #include "jni_internal.h"
 #include "linear_alloc.h"
 #include "mirror/array.h"
 #include "mirror/class-inl.h"
+#include "mirror/class_ext.h"
 #include "mirror/class_loader.h"
 #include "mirror/emulated_stack_frame.h"
 #include "mirror/field.h"
@@ -238,6 +241,7 @@
       force_native_bridge_(false),
       is_native_bridge_loaded_(false),
       is_native_debuggable_(false),
+      is_fully_deoptable_(false),
       zygote_max_failed_boots_(0),
       experimental_flags_(ExperimentalFlags::kNone),
       oat_file_manager_(nullptr),
@@ -346,6 +350,7 @@
   delete monitor_list_;
   delete monitor_pool_;
   delete class_linker_;
+  delete cha_;
   delete heap_;
   delete intern_table_;
   delete oat_file_manager_;
@@ -370,6 +375,7 @@
   void Dump(std::ostream& os) const {
     if (gAborting > 1) {
       os << "Runtime aborting --- recursively, so no thread-specific detail!\n";
+      DumpRecursiveAbort(os);
       return;
     }
     gAborting++;
@@ -426,6 +432,21 @@
       }
     }
   }
+
+  // For recursive aborts.
+  void DumpRecursiveAbort(std::ostream& os) const NO_THREAD_SAFETY_ANALYSIS {
+    // The only thing we'll attempt is dumping the native stack of the current thread. We will only
+    // try this if we haven't exceeded an arbitrary amount of recursions, to recover and actually
+    // die.
+    // Note: as we're using a global counter for the recursive abort detection, there is a potential
+    //       race here and it is not OK to just print when the counter is "2" (one from
+    //       Runtime::Abort(), one from previous Dump() call). Use a number that seems large enough.
+    static constexpr size_t kOnlyPrintWhenRecursionLessThan = 100u;
+    if (gAborting < kOnlyPrintWhenRecursionLessThan) {
+      gAborting++;
+      DumpNativeStack(os, GetTid());
+    }
+  }
 };
 
 void Runtime::Abort(const char* msg) {
@@ -440,8 +461,16 @@
 
   // Many people have difficulty distinguish aborts from crashes,
   // so be explicit.
+  // Note: use cerr on the host to print log lines immediately, so we get at least some output
+  //       in case of recursive aborts. We lose annotation with the source file and line number
+  //       here, which is a minor issue. The same is significantly more complicated on device,
+  //       which is why we ignore the issue there.
   AbortState state;
-  LOG(FATAL_WITHOUT_ABORT) << Dumpable<AbortState>(state);
+  if (kIsTargetBuild) {
+    LOG(FATAL_WITHOUT_ABORT) << Dumpable<AbortState>(state);
+  } else {
+    std::cerr << Dumpable<AbortState>(state);
+  }
 
   // Sometimes we dump long messages, and the Android abort message only retains the first line.
   // In those cases, just log the message again, to avoid logcat limits.
@@ -490,6 +519,14 @@
   GetMonitorList()->SweepMonitorList(visitor);
   GetJavaVM()->SweepJniWeakGlobals(visitor);
   GetHeap()->SweepAllocationRecords(visitor);
+  if (GetJit() != nullptr) {
+    // Visit JIT literal tables. Objects in these tables are classes and strings
+    // and only classes can be affected by class unloading. The strings always
+    // stay alive as they are strongly interned.
+    // TODO: Move this closer to CleanupClassLoaders, to avoid blocking weak accesses
+    // from mutators. See b/32167580.
+    GetJit()->GetCodeCache()->SweepRootTables(visitor);
+  }
 
   // All other generic system-weak holders.
   for (gc::AbstractSystemWeakHolder* holder : system_weak_holders_) {
@@ -558,7 +595,10 @@
       "getSystemClassLoader", "()Ljava/lang/ClassLoader;", pointer_size);
   CHECK(getSystemClassLoader != nullptr);
 
-  JValue result = InvokeWithJValues(soa, nullptr, soa.EncodeMethod(getSystemClassLoader), nullptr);
+  JValue result = InvokeWithJValues(soa,
+                                    nullptr,
+                                    jni::EncodeArtMethod(getSystemClassLoader),
+                                    nullptr);
   JNIEnv* env = soa.Self()->GetJniEnv();
   ScopedLocalRef<jobject> system_class_loader(env, soa.AddLocalReference<jobject>(result.GetL()));
   CHECK(system_class_loader.get() != nullptr);
@@ -760,6 +800,9 @@
 }
 
 bool Runtime::IsDebuggable() const {
+  if (IsFullyDeoptable()) {
+    return true;
+  }
   const OatFile* oat_file = GetOatFileManager().GetPrimaryOatFile();
   return oat_file != nullptr && oat_file->IsDebuggable();
 }
@@ -983,6 +1026,8 @@
   verify_ = runtime_options.GetOrDefault(Opt::Verify);
   allow_dex_file_fallback_ = !runtime_options.Exists(Opt::NoDexFileFallback);
 
+  is_fully_deoptable_ = runtime_options.Exists(Opt::FullyDeoptable);
+
   no_sig_chain_ = runtime_options.Exists(Opt::NoSigChain);
   force_native_bridge_ = runtime_options.Exists(Opt::ForceNativeBridge);
 
@@ -1019,8 +1064,10 @@
                        runtime_options.GetOrDefault(Opt::NonMovingSpaceCapacity),
                        runtime_options.GetOrDefault(Opt::Image),
                        runtime_options.GetOrDefault(Opt::ImageInstructionSet),
-                       xgc_option.collector_type_,
-                       runtime_options.GetOrDefault(Opt::BackgroundGc),
+                       // Override the collector type to CC if the read barrier config.
+                       kUseReadBarrier ? gc::kCollectorTypeCC : xgc_option.collector_type_,
+                       kUseReadBarrier ? BackgroundGcOption(gc::kCollectorTypeCCBackground)
+                                       : runtime_options.GetOrDefault(Opt::BackgroundGc),
                        runtime_options.GetOrDefault(Opt::LargeObjectSpace),
                        runtime_options.GetOrDefault(Opt::LargeObjectThreshold),
                        runtime_options.GetOrDefault(Opt::ParallelGCThreads),
@@ -1158,6 +1205,7 @@
 
   CHECK_GE(GetHeap()->GetContinuousSpaces().size(), 1U);
   class_linker_ = new ClassLinker(intern_table_);
+  cha_ = new ClassHierarchyAnalysis;
   if (GetHeap()->HasBootImageSpace()) {
     bool result = class_linker_->InitFromBootImage(&error_msg);
     if (!result) {
@@ -1595,6 +1643,7 @@
   mirror::MethodType::VisitRoots(visitor);
   mirror::MethodHandleImpl::VisitRoots(visitor);
   mirror::EmulatedStackFrame::VisitRoots(visitor);
+  mirror::ClassExt::VisitRoots(visitor);
   // Visit all the primitive array types classes.
   mirror::PrimitiveArray<uint8_t>::VisitRoots(visitor);   // BooleanArray
   mirror::PrimitiveArray<int8_t>::VisitRoots(visitor);    // ByteArray
@@ -1736,6 +1785,9 @@
   intern_table_->ChangeWeakRootState(gc::kWeakRootStateNoReadsOrWrites);
   java_vm_->DisallowNewWeakGlobals();
   heap_->DisallowNewAllocationRecords();
+  if (GetJit() != nullptr) {
+    GetJit()->GetCodeCache()->DisallowInlineCacheAccess();
+  }
 
   // All other generic system-weak holders.
   for (gc::AbstractSystemWeakHolder* holder : system_weak_holders_) {
@@ -1749,6 +1801,9 @@
   intern_table_->ChangeWeakRootState(gc::kWeakRootStateNormal);  // TODO: Do this in the sweeping.
   java_vm_->AllowNewWeakGlobals();
   heap_->AllowNewAllocationRecords();
+  if (GetJit() != nullptr) {
+    GetJit()->GetCodeCache()->AllowInlineCacheAccess();
+  }
 
   // All other generic system-weak holders.
   for (gc::AbstractSystemWeakHolder* holder : system_weak_holders_) {
@@ -1756,18 +1811,21 @@
   }
 }
 
-void Runtime::BroadcastForNewSystemWeaks() {
+void Runtime::BroadcastForNewSystemWeaks(bool broadcast_for_checkpoint) {
   // This is used for the read barrier case that uses the thread-local
-  // Thread::GetWeakRefAccessEnabled() flag.
-  CHECK(kUseReadBarrier);
+  // Thread::GetWeakRefAccessEnabled() flag and the checkpoint while weak ref access is disabled
+  // (see ThreadList::RunCheckpoint).
   monitor_list_->BroadcastForNewMonitors();
   intern_table_->BroadcastForNewInterns();
   java_vm_->BroadcastForNewWeakGlobals();
   heap_->BroadcastForNewAllocationRecords();
+  if (GetJit() != nullptr) {
+    GetJit()->GetCodeCache()->BroadcastForInlineCacheAccess();
+  }
 
   // All other generic system-weak holders.
   for (gc::AbstractSystemWeakHolder* holder : system_weak_holders_) {
-    holder->Broadcast();
+    holder->Broadcast(broadcast_for_checkpoint);
   }
 }
 
@@ -1953,31 +2011,32 @@
   preinitialization_transaction_->RecordWriteArray(array, index, value);
 }
 
-void Runtime::RecordStrongStringInsertion(mirror::String* s) const {
+void Runtime::RecordStrongStringInsertion(ObjPtr<mirror::String> s) const {
   DCHECK(IsAotCompiler());
   DCHECK(IsActiveTransaction());
   preinitialization_transaction_->RecordStrongStringInsertion(s);
 }
 
-void Runtime::RecordWeakStringInsertion(mirror::String* s) const {
+void Runtime::RecordWeakStringInsertion(ObjPtr<mirror::String> s) const {
   DCHECK(IsAotCompiler());
   DCHECK(IsActiveTransaction());
   preinitialization_transaction_->RecordWeakStringInsertion(s);
 }
 
-void Runtime::RecordStrongStringRemoval(mirror::String* s) const {
+void Runtime::RecordStrongStringRemoval(ObjPtr<mirror::String> s) const {
   DCHECK(IsAotCompiler());
   DCHECK(IsActiveTransaction());
   preinitialization_transaction_->RecordStrongStringRemoval(s);
 }
 
-void Runtime::RecordWeakStringRemoval(mirror::String* s) const {
+void Runtime::RecordWeakStringRemoval(ObjPtr<mirror::String> s) const {
   DCHECK(IsAotCompiler());
   DCHECK(IsActiveTransaction());
   preinitialization_transaction_->RecordWeakStringRemoval(s);
 }
 
-void Runtime::RecordResolveString(mirror::DexCache* dex_cache, uint32_t string_idx) const {
+void Runtime::RecordResolveString(ObjPtr<mirror::DexCache> dex_cache,
+                                  dex::StringIndex string_idx) const {
   DCHECK(IsAotCompiler());
   DCHECK(IsActiveTransaction());
   preinitialization_transaction_->RecordResolveString(dex_cache, string_idx);
@@ -2137,7 +2196,7 @@
 
 NO_RETURN
 void Runtime::Aborter(const char* abort_message) {
-#ifdef __ANDROID__
+#ifdef ART_TARGET_ANDROID
   android_set_abort_message(abort_message);
 #endif
   Runtime::Abort(abort_message);
diff --git a/runtime/runtime.h b/runtime/runtime.h
index 043ff5d..e6b3128 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -28,6 +28,7 @@
 
 #include "arch/instruction_set.h"
 #include "base/macros.h"
+#include "dex_file_types.h"
 #include "experimental_flags.h"
 #include "gc_root.h"
 #include "instrumentation.h"
@@ -75,6 +76,7 @@
 }  // namespace verifier
 class ArenaPool;
 class ArtMethod;
+class ClassHierarchyAnalysis;
 class ClassLinker;
 class Closure;
 class CompilerCallbacks;
@@ -107,9 +109,7 @@
   kVisitRootFlagStartLoggingNewRoots = 0x4,
   kVisitRootFlagStopLoggingNewRoots = 0x8,
   kVisitRootFlagClearRootLog = 0x10,
-  // Non moving means we can have optimizations where we don't visit some roots if they are
-  // definitely reachable from another location. E.g. ArtMethod and ArtField roots.
-  kVisitRootFlagNonMoving = 0x20,
+  kVisitRootFlagClassLoader = 0x20,
 };
 
 class Runtime {
@@ -182,7 +182,7 @@
     return compiler_options_;
   }
 
-  void AddCompilerOption(std::string option) {
+  void AddCompilerOption(const std::string& option) {
     compiler_options_.push_back(option);
   }
 
@@ -316,17 +316,20 @@
   }
 
   bool IsMethodHandlesEnabled() const {
-    // return experimental_flags_ & ExperimentalFlags::kMethodHandles;
-    return true;
+    return experimental_flags_ & ExperimentalFlags::kMethodHandles;
   }
 
   void DisallowNewSystemWeaks() REQUIRES_SHARED(Locks::mutator_lock_);
   void AllowNewSystemWeaks() REQUIRES_SHARED(Locks::mutator_lock_);
-  void BroadcastForNewSystemWeaks() REQUIRES_SHARED(Locks::mutator_lock_);
+  // broadcast_for_checkpoint is true when we broadcast for making blocking threads to respond to
+  // checkpoint requests. It's false when we broadcast to unblock blocking threads after system weak
+  // access is reenabled.
+  void BroadcastForNewSystemWeaks(bool broadcast_for_checkpoint = false);
 
   // Visit all the roots. If only_dirty is true then non-dirty roots won't be visited. If
   // clean_dirty is true then dirty roots will be marked as non-dirty after visiting.
   void VisitRoots(RootVisitor* visitor, VisitRootFlags flags = kVisitRootFlagAllRoots)
+      REQUIRES(!Locks::classlinker_classes_lock_, !Locks::trace_lock_)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Visit image roots, only used for hprof since the GC uses the image space mod union table
@@ -336,6 +339,7 @@
   // Visit all of the roots we can do safely do concurrently.
   void VisitConcurrentRoots(RootVisitor* visitor,
                             VisitRootFlags flags = kVisitRootFlagAllRoots)
+      REQUIRES(!Locks::classlinker_classes_lock_, !Locks::trace_lock_)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Visit all of the non thread roots, we can do this with mutators unpaused.
@@ -510,15 +514,15 @@
       REQUIRES_SHARED(Locks::mutator_lock_);
   void RecordWriteArray(mirror::Array* array, size_t index, uint64_t value) const
       REQUIRES_SHARED(Locks::mutator_lock_);
-  void RecordStrongStringInsertion(mirror::String* s) const
+  void RecordStrongStringInsertion(ObjPtr<mirror::String> s) const
       REQUIRES(Locks::intern_table_lock_);
-  void RecordWeakStringInsertion(mirror::String* s) const
+  void RecordWeakStringInsertion(ObjPtr<mirror::String> s) const
       REQUIRES(Locks::intern_table_lock_);
-  void RecordStrongStringRemoval(mirror::String* s) const
+  void RecordStrongStringRemoval(ObjPtr<mirror::String> s) const
       REQUIRES(Locks::intern_table_lock_);
-  void RecordWeakStringRemoval(mirror::String* s) const
+  void RecordWeakStringRemoval(ObjPtr<mirror::String> s) const
       REQUIRES(Locks::intern_table_lock_);
-  void RecordResolveString(mirror::DexCache* dex_cache, uint32_t string_idx) const
+  void RecordResolveString(ObjPtr<mirror::DexCache> dex_cache, dex::StringIndex string_idx) const
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   void SetFaultMessage(const std::string& message) REQUIRES(!fault_message_lock_);
@@ -590,6 +594,14 @@
 
   bool IsDebuggable() const;
 
+  bool IsFullyDeoptable() const {
+    return is_fully_deoptable_;
+  }
+
+  void SetFullyDeoptable(bool value) {
+    is_fully_deoptable_ = value;
+  }
+
   bool IsNativeDebuggable() const {
     return is_native_debuggable_;
   }
@@ -663,6 +675,10 @@
   void AddSystemWeakHolder(gc::AbstractSystemWeakHolder* holder);
   void RemoveSystemWeakHolder(gc::AbstractSystemWeakHolder* holder);
 
+  ClassHierarchyAnalysis* GetClassHierarchyAnalysis() {
+    return cha_;
+  }
+
   NO_RETURN
   static void Aborter(const char* abort_message);
 
@@ -855,6 +871,9 @@
   // Whether we are running under native debugger.
   bool is_native_debuggable_;
 
+  // Whether we are expected to be deoptable at all points.
+  bool is_fully_deoptable_;
+
   // The maximum number of failed boots we allow before pruning the dalvik cache
   // and trying again. This option is only inspected when we're running as a
   // zygote.
@@ -907,6 +926,8 @@
   // Generic system-weak holders.
   std::vector<gc::AbstractSystemWeakHolder*> system_weak_holders_;
 
+  ClassHierarchyAnalysis* cha_;
+
   DISALLOW_COPY_AND_ASSIGN(Runtime);
 };
 std::ostream& operator<<(std::ostream& os, const Runtime::CalleeSaveType& rhs);
diff --git a/runtime/runtime_options.def b/runtime/runtime_options.def
index b01a570..d1970fe 100644
--- a/runtime/runtime_options.def
+++ b/runtime/runtime_options.def
@@ -121,6 +121,7 @@
 RUNTIME_OPTIONS_KEY (std::vector<ti::Agent>,         AgentLib)  // -agentlib:<libname>=<options>, Requires -Xexperimental:agents
 RUNTIME_OPTIONS_KEY (std::vector<ti::Agent>,         AgentPath)  // -agentpath:<libname>=<options>, Requires -Xexperimental:agents
 RUNTIME_OPTIONS_KEY (std::vector<Plugin>,            Plugins)  // -Xplugin:<library> Requires -Xexperimental:runtime-plugins
+RUNTIME_OPTIONS_KEY (Unit,                           FullyDeoptable)  // -Xfully-deoptable
 
 // Not parse-able from command line, but can be provided explicitly.
 // (Do not add anything here that is defined in ParsedOptions::MakeParser)
diff --git a/runtime/scoped_thread_state_change-inl.h b/runtime/scoped_thread_state_change-inl.h
index bde23c8..d4469f4 100644
--- a/runtime/scoped_thread_state_change-inl.h
+++ b/runtime/scoped_thread_state_change-inl.h
@@ -86,30 +86,6 @@
   return ObjPtr<T, kPoison>::DownCast(Self()->DecodeJObject(obj));
 }
 
-inline ArtField* ScopedObjectAccessAlreadyRunnable::DecodeField(jfieldID fid) const {
-  Locks::mutator_lock_->AssertSharedHeld(Self());
-  DCHECK(IsRunnable());  // Don't work with raw objects in non-runnable states.
-  return reinterpret_cast<ArtField*>(fid);
-}
-
-inline jfieldID ScopedObjectAccessAlreadyRunnable::EncodeField(ArtField* field) const {
-  Locks::mutator_lock_->AssertSharedHeld(Self());
-  DCHECK(IsRunnable());  // Don't work with raw objects in non-runnable states.
-  return reinterpret_cast<jfieldID>(field);
-}
-
-inline ArtMethod* ScopedObjectAccessAlreadyRunnable::DecodeMethod(jmethodID mid) const {
-  Locks::mutator_lock_->AssertSharedHeld(Self());
-  DCHECK(IsRunnable());  // Don't work with raw objects in non-runnable states.
-  return reinterpret_cast<ArtMethod*>(mid);
-}
-
-inline jmethodID ScopedObjectAccessAlreadyRunnable::EncodeMethod(ArtMethod* method) const {
-  Locks::mutator_lock_->AssertSharedHeld(Self());
-  DCHECK(IsRunnable());  // Don't work with raw objects in non-runnable states.
-  return reinterpret_cast<jmethodID>(method);
-}
-
 inline bool ScopedObjectAccessAlreadyRunnable::IsRunnable() const {
   return self_->GetState() == kRunnable;
 }
diff --git a/runtime/scoped_thread_state_change.h b/runtime/scoped_thread_state_change.h
index 04fd914..b499258 100644
--- a/runtime/scoped_thread_state_change.h
+++ b/runtime/scoped_thread_state_change.h
@@ -94,14 +94,6 @@
   template<typename T, bool kPoison = kIsDebugBuild>
   ObjPtr<T, kPoison> Decode(jobject obj) const REQUIRES_SHARED(Locks::mutator_lock_);
 
-  ArtField* DecodeField(jfieldID fid) const REQUIRES_SHARED(Locks::mutator_lock_);
-
-  jfieldID EncodeField(ArtField* field) const REQUIRES_SHARED(Locks::mutator_lock_);
-
-  ArtMethod* DecodeMethod(jmethodID mid) const REQUIRES_SHARED(Locks::mutator_lock_);
-
-  jmethodID EncodeMethod(ArtMethod* method) const REQUIRES_SHARED(Locks::mutator_lock_);
-
   ALWAYS_INLINE bool IsRunnable() const;
 
  protected:
diff --git a/runtime/stack.cc b/runtime/stack.cc
index 167a30b..f20aa20 100644
--- a/runtime/stack.cc
+++ b/runtime/stack.cc
@@ -648,7 +648,7 @@
     return;
   }
 
-  uint32_t code_size = OatQuickMethodHeader::FromEntryPoint(code)->code_size_;
+  uint32_t code_size = OatQuickMethodHeader::FromEntryPoint(code)->GetCodeSize();
   uintptr_t code_start = reinterpret_cast<uintptr_t>(code);
   CHECK(code_start <= pc && pc <= (code_start + code_size))
       << method->PrettyMethod()
diff --git a/runtime/stack.h b/runtime/stack.h
index 8a446ec..d02e4b7 100644
--- a/runtime/stack.h
+++ b/runtime/stack.h
@@ -66,6 +66,11 @@
 struct ShadowFrameDeleter;
 using ShadowFrameAllocaUniquePtr = std::unique_ptr<ShadowFrame, ShadowFrameDeleter>;
 
+// Size in bytes of the should_deoptimize flag on stack.
+// We just need 4 bytes for our purpose regardless of the architecture. Frame size
+// calculation will automatically do alignment for the final frame size.
+static constexpr size_t kShouldDeoptimizeFlagSize = 4;
+
 // Counting locks by storing object pointers into a vector. Duplicate entries mark recursive locks.
 // The vector will be visited with the ShadowFrame during GC (so all the locked-on objects are
 // thread roots).
@@ -469,14 +474,21 @@
   }
 };
 
-class JavaFrameRootInfo : public RootInfo {
+class JavaFrameRootInfo FINAL : public RootInfo {
  public:
   JavaFrameRootInfo(uint32_t thread_id, const StackVisitor* stack_visitor, size_t vreg)
      : RootInfo(kRootJavaFrame, thread_id), stack_visitor_(stack_visitor), vreg_(vreg) {
   }
-  virtual void Describe(std::ostream& os) const OVERRIDE
+  void Describe(std::ostream& os) const OVERRIDE
       REQUIRES_SHARED(Locks::mutator_lock_);
 
+  size_t GetVReg() const {
+    return vreg_;
+  }
+  const StackVisitor* GetVisitor() const {
+    return stack_visitor_;
+  }
+
  private:
   const StackVisitor* const stack_visitor_;
   const size_t vreg_;
@@ -623,7 +635,7 @@
     return num_frames_;
   }
 
-  size_t GetFrameDepth() REQUIRES_SHARED(Locks::mutator_lock_) {
+  size_t GetFrameDepth() const REQUIRES_SHARED(Locks::mutator_lock_) {
     return cur_depth_;
   }
 
@@ -748,10 +760,6 @@
     return cur_shadow_frame_;
   }
 
-  bool IsCurrentFrameInInterpreter() const {
-    return cur_shadow_frame_ != nullptr;
-  }
-
   HandleScope* GetCurrentHandleScope(size_t pointer_size) const {
     ArtMethod** sp = GetCurrentQuickFrame();
     // Skip ArtMethod*; handle scope comes next;
diff --git a/runtime/string_reference.h b/runtime/string_reference.h
index c75c218..0fc06e6 100644
--- a/runtime/string_reference.h
+++ b/runtime/string_reference.h
@@ -21,20 +21,22 @@
 
 #include "base/logging.h"
 #include "dex_file-inl.h"
+#include "dex_file_types.h"
 #include "utf-inl.h"
 
 namespace art {
 
 // A string is located by its DexFile and the string_ids_ table index into that DexFile.
 struct StringReference {
-  StringReference(const DexFile* file, uint32_t index) : dex_file(file), string_index(index) { }
+  StringReference(const DexFile* file, dex::StringIndex index)
+      : dex_file(file), string_index(index) { }
 
   const char* GetStringData() const {
     return dex_file->GetStringData(dex_file->GetStringId(string_index));
   }
 
   const DexFile* dex_file;
-  uint32_t string_index;
+  dex::StringIndex string_index;
 };
 
 // Compare only the reference and not the string contents.
diff --git a/runtime/thread-inl.h b/runtime/thread-inl.h
index 5fa9353..c92305f 100644
--- a/runtime/thread-inl.h
+++ b/runtime/thread-inl.h
@@ -72,6 +72,19 @@
       RunCheckpointFunction();
     } else if (ReadFlag(kSuspendRequest)) {
       FullSuspendCheck();
+    } else if (ReadFlag(kEmptyCheckpointRequest)) {
+      RunEmptyCheckpoint();
+    } else {
+      break;
+    }
+  }
+}
+
+inline void Thread::CheckEmptyCheckpoint() {
+  DCHECK_EQ(Thread::Current(), this);
+  for (;;) {
+    if (ReadFlag(kEmptyCheckpointRequest)) {
+      RunEmptyCheckpoint();
     } else {
       break;
     }
@@ -145,8 +158,13 @@
       RunCheckpointFunction();
       continue;
     }
+    if (UNLIKELY((old_state_and_flags.as_struct.flags & kEmptyCheckpointRequest) != 0)) {
+      RunEmptyCheckpoint();
+      continue;
+    }
     // Change the state but keep the current flags (kCheckpointRequest is clear).
     DCHECK_EQ((old_state_and_flags.as_struct.flags & kCheckpointRequest), 0);
+    DCHECK_EQ((old_state_and_flags.as_struct.flags & kEmptyCheckpointRequest), 0);
     new_state_and_flags.as_struct.flags = old_state_and_flags.as_struct.flags;
     new_state_and_flags.as_struct.state = new_state;
 
@@ -163,7 +181,8 @@
 inline void Thread::PassActiveSuspendBarriers() {
   while (true) {
     uint16_t current_flags = tls32_.state_and_flags.as_struct.flags;
-    if (LIKELY((current_flags & (kCheckpointRequest | kActiveSuspendBarrier)) == 0)) {
+    if (LIKELY((current_flags &
+                (kCheckpointRequest | kEmptyCheckpointRequest | kActiveSuspendBarrier)) == 0)) {
       break;
     } else if ((current_flags & kActiveSuspendBarrier) != 0) {
       PassActiveSuspendBarriers(this);
@@ -211,7 +230,8 @@
       }
     } else if ((old_state_and_flags.as_struct.flags & kActiveSuspendBarrier) != 0) {
       PassActiveSuspendBarriers(this);
-    } else if ((old_state_and_flags.as_struct.flags & kCheckpointRequest) != 0) {
+    } else if ((old_state_and_flags.as_struct.flags &
+                (kCheckpointRequest | kEmptyCheckpointRequest)) != 0) {
       // Impossible
       LOG(FATAL) << "Transitioning to runnable with checkpoint flag, "
                  << " flags=" << old_state_and_flags.as_struct.flags
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 3f7d086..1283cf0 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -116,20 +116,32 @@
 }
 
 void InitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints);
+void UpdateReadBarrierEntrypoints(QuickEntryPoints* qpoints, bool is_marking);
+
+void Thread::SetIsGcMarkingAndUpdateEntrypoints(bool is_marking) {
+  CHECK(kUseReadBarrier);
+  tls32_.is_gc_marking = is_marking;
+  UpdateReadBarrierEntrypoints(&tlsPtr_.quick_entrypoints, is_marking);
+  ResetQuickAllocEntryPointsForThread(is_marking);
+}
 
 void Thread::InitTlsEntryPoints() {
   // Insert a placeholder so we can easily tell if we call an unimplemented entry point.
   uintptr_t* begin = reinterpret_cast<uintptr_t*>(&tlsPtr_.jni_entrypoints);
-  uintptr_t* end = reinterpret_cast<uintptr_t*>(reinterpret_cast<uint8_t*>(&tlsPtr_.quick_entrypoints) +
-      sizeof(tlsPtr_.quick_entrypoints));
+  uintptr_t* end = reinterpret_cast<uintptr_t*>(
+      reinterpret_cast<uint8_t*>(&tlsPtr_.quick_entrypoints) + sizeof(tlsPtr_.quick_entrypoints));
   for (uintptr_t* it = begin; it != end; ++it) {
     *it = reinterpret_cast<uintptr_t>(UnimplementedEntryPoint);
   }
   InitEntryPoints(&tlsPtr_.jni_entrypoints, &tlsPtr_.quick_entrypoints);
 }
 
-void Thread::ResetQuickAllocEntryPointsForThread() {
-  ResetQuickAllocEntryPoints(&tlsPtr_.quick_entrypoints);
+void Thread::ResetQuickAllocEntryPointsForThread(bool is_marking) {
+  if (kUseReadBarrier && kRuntimeISA != kX86_64) {
+    // Allocation entrypoint switching is currently only implemented for X86_64.
+    is_marking = true;
+  }
+  ResetQuickAllocEntryPoints(&tlsPtr_.quick_entrypoints, is_marking);
 }
 
 class DeoptimizationContextRecord {
@@ -410,9 +422,9 @@
     self->tlsPtr_.opeer = soa.Decode<mirror::Object>(self->tlsPtr_.jpeer).Ptr();
     self->GetJniEnv()->DeleteGlobalRef(self->tlsPtr_.jpeer);
     self->tlsPtr_.jpeer = nullptr;
-    self->SetThreadName(self->GetThreadName(soa)->ToModifiedUtf8().c_str());
+    self->SetThreadName(self->GetThreadName()->ToModifiedUtf8().c_str());
 
-    ArtField* priorityField = soa.DecodeField(WellKnownClasses::java_lang_Thread_priority);
+    ArtField* priorityField = jni::DecodeArtField(WellKnownClasses::java_lang_Thread_priority);
     self->SetNativePriority(priorityField->GetInt(self->tlsPtr_.opeer));
     Dbg::PostThreadStart(self);
 
@@ -430,7 +442,7 @@
 
 Thread* Thread::FromManagedThread(const ScopedObjectAccessAlreadyRunnable& soa,
                                   mirror::Object* thread_peer) {
-  ArtField* f = soa.DecodeField(WellKnownClasses::java_lang_Thread_nativePeer);
+  ArtField* f = jni::DecodeArtField(WellKnownClasses::java_lang_Thread_nativePeer);
   Thread* result = reinterpret_cast<Thread*>(static_cast<uintptr_t>(f->GetLong(thread_peer)));
   // Sanity check that if we have a result it is either suspended or we hold the thread_list_lock_
   // to stop it from going away.
@@ -562,7 +574,7 @@
   if (VLOG_IS_ON(threads)) {
     ScopedObjectAccess soa(env);
 
-    ArtField* f = soa.DecodeField(WellKnownClasses::java_lang_Thread_name);
+    ArtField* f = jni::DecodeArtField(WellKnownClasses::java_lang_Thread_name);
     ObjPtr<mirror::String> java_name =
         f->GetObject(soa.Decode<mirror::Object>(java_peer))->AsString();
     std::string thread_name;
@@ -823,7 +835,7 @@
 
   ScopedObjectAccess soa(self);
   StackHandleScope<1> hs(self);
-  MutableHandle<mirror::String> peer_thread_name(hs.NewHandle(GetThreadName(soa)));
+  MutableHandle<mirror::String> peer_thread_name(hs.NewHandle(GetThreadName()));
   if (peer_thread_name.Get() == nullptr) {
     // The Thread constructor should have set the Thread.name to a
     // non-null value. However, because we can run without code
@@ -834,7 +846,7 @@
     } else {
       InitPeer<false>(soa, thread_is_daemon, thread_group, thread_name.get(), thread_priority);
     }
-    peer_thread_name.Assign(GetThreadName(soa));
+    peer_thread_name.Assign(GetThreadName());
   }
   // 'thread_name' may have been null, so don't trust 'peer_thread_name' to be non-null.
   if (peer_thread_name.Get() != nullptr) {
@@ -845,13 +857,13 @@
 template<bool kTransactionActive>
 void Thread::InitPeer(ScopedObjectAccess& soa, jboolean thread_is_daemon, jobject thread_group,
                       jobject thread_name, jint thread_priority) {
-  soa.DecodeField(WellKnownClasses::java_lang_Thread_daemon)->
+  jni::DecodeArtField(WellKnownClasses::java_lang_Thread_daemon)->
       SetBoolean<kTransactionActive>(tlsPtr_.opeer, thread_is_daemon);
-  soa.DecodeField(WellKnownClasses::java_lang_Thread_group)->
+  jni::DecodeArtField(WellKnownClasses::java_lang_Thread_group)->
       SetObject<kTransactionActive>(tlsPtr_.opeer, soa.Decode<mirror::Object>(thread_group));
-  soa.DecodeField(WellKnownClasses::java_lang_Thread_name)->
+  jni::DecodeArtField(WellKnownClasses::java_lang_Thread_name)->
       SetObject<kTransactionActive>(tlsPtr_.opeer, soa.Decode<mirror::Object>(thread_name));
-  soa.DecodeField(WellKnownClasses::java_lang_Thread_priority)->
+  jni::DecodeArtField(WellKnownClasses::java_lang_Thread_priority)->
       SetInt<kTransactionActive>(tlsPtr_.opeer, thread_priority);
 }
 
@@ -947,8 +959,8 @@
   DumpStack(os, dump_native_stack, backtrace_map);
 }
 
-mirror::String* Thread::GetThreadName(const ScopedObjectAccessAlreadyRunnable& soa) const {
-  ArtField* f = soa.DecodeField(WellKnownClasses::java_lang_Thread_name);
+mirror::String* Thread::GetThreadName() const {
+  ArtField* f = jni::DecodeArtField(WellKnownClasses::java_lang_Thread_name);
   if (tlsPtr_.opeer == nullptr) {
     return nullptr;
   }
@@ -1148,6 +1160,12 @@
   } while (!done);
 }
 
+void Thread::RunEmptyCheckpoint() {
+  DCHECK_EQ(Thread::Current(), this);
+  AtomicClearFlag(kEmptyCheckpointRequest);
+  Runtime::Current()->GetThreadList()->EmptyCheckpointBarrier()->Pass(this);
+}
+
 bool Thread::RequestCheckpoint(Closure* function) {
   union StateAndFlags old_state_and_flags;
   old_state_and_flags.as_int = tls32_.state_and_flags.as_int;
@@ -1175,6 +1193,28 @@
   return success;
 }
 
+bool Thread::RequestEmptyCheckpoint() {
+  union StateAndFlags old_state_and_flags;
+  old_state_and_flags.as_int = tls32_.state_and_flags.as_int;
+  if (old_state_and_flags.as_struct.state != kRunnable) {
+    // If it's not runnable, we don't need to do anything because it won't be in the middle of a
+    // heap access (eg. the read barrier).
+    return false;
+  }
+
+  // We must be runnable to request a checkpoint.
+  DCHECK_EQ(old_state_and_flags.as_struct.state, kRunnable);
+  union StateAndFlags new_state_and_flags;
+  new_state_and_flags.as_int = old_state_and_flags.as_int;
+  new_state_and_flags.as_struct.flags |= kEmptyCheckpointRequest;
+  bool success = tls32_.state_and_flags.as_atomic_int.CompareExchangeStrongSequentiallyConsistent(
+      old_state_and_flags.as_int, new_state_and_flags.as_int);
+  if (success) {
+    TriggerSuspend();
+  }
+  return success;
+}
+
 class BarrierClosure : public Closure {
  public:
   explicit BarrierClosure(Closure* wrapped) : wrapped_(wrapped), barrier_(0) {}
@@ -1306,17 +1346,18 @@
   // cause ScopedObjectAccessUnchecked to deadlock.
   if (gAborting == 0 && self != nullptr && thread != nullptr && thread->tlsPtr_.opeer != nullptr) {
     ScopedObjectAccessUnchecked soa(self);
-    priority = soa.DecodeField(WellKnownClasses::java_lang_Thread_priority)
+    priority = jni::DecodeArtField(WellKnownClasses::java_lang_Thread_priority)
         ->GetInt(thread->tlsPtr_.opeer);
-    is_daemon = soa.DecodeField(WellKnownClasses::java_lang_Thread_daemon)
+    is_daemon = jni::DecodeArtField(WellKnownClasses::java_lang_Thread_daemon)
         ->GetBoolean(thread->tlsPtr_.opeer);
 
     ObjPtr<mirror::Object> thread_group =
-        soa.DecodeField(WellKnownClasses::java_lang_Thread_group)->GetObject(thread->tlsPtr_.opeer);
+        jni::DecodeArtField(WellKnownClasses::java_lang_Thread_group)
+            ->GetObject(thread->tlsPtr_.opeer);
 
     if (thread_group != nullptr) {
       ArtField* group_name_field =
-          soa.DecodeField(WellKnownClasses::java_lang_ThreadGroup_name);
+          jni::DecodeArtField(WellKnownClasses::java_lang_ThreadGroup_name);
       ObjPtr<mirror::String> group_name_string =
           group_name_field->GetObject(thread_group)->AsString();
       group_name = (group_name_string != nullptr) ? group_name_string->ToModifiedUtf8() : "<null>";
@@ -1792,10 +1833,10 @@
 
     // this.nativePeer = 0;
     if (Runtime::Current()->IsActiveTransaction()) {
-      soa.DecodeField(WellKnownClasses::java_lang_Thread_nativePeer)
+      jni::DecodeArtField(WellKnownClasses::java_lang_Thread_nativePeer)
           ->SetLong<true>(tlsPtr_.opeer, 0);
     } else {
-      soa.DecodeField(WellKnownClasses::java_lang_Thread_nativePeer)
+      jni::DecodeArtField(WellKnownClasses::java_lang_Thread_nativePeer)
           ->SetLong<false>(tlsPtr_.opeer, 0);
     }
     Dbg::PostThreadDeath(self);
@@ -1803,7 +1844,7 @@
     // Thread.join() is implemented as an Object.wait() on the Thread.lock object. Signal anyone
     // who is waiting.
     ObjPtr<mirror::Object> lock =
-        soa.DecodeField(WellKnownClasses::java_lang_Thread_lock)->GetObject(tlsPtr_.opeer);
+        jni::DecodeArtField(WellKnownClasses::java_lang_Thread_lock)->GetObject(tlsPtr_.opeer);
     // (This conditional is only needed for tests, where Thread.lock won't have been set.)
     if (lock != nullptr) {
       StackHandleScope<1> hs(self);
@@ -1833,7 +1874,8 @@
     tlsPtr_.jni_env = nullptr;
   }
   CHECK_NE(GetState(), kRunnable);
-  CHECK_NE(ReadFlag(kCheckpointRequest), true);
+  CHECK(!ReadFlag(kCheckpointRequest));
+  CHECK(!ReadFlag(kEmptyCheckpointRequest));
   CHECK(tlsPtr_.checkpoint_function == nullptr);
   CHECK_EQ(checkpoint_overflow_.size(), 0u);
   CHECK(tlsPtr_.flip_function == nullptr);
@@ -1863,7 +1905,7 @@
   }
   delete tlsPtr_.instrumentation_stack;
   delete tlsPtr_.name;
-  delete tlsPtr_.stack_trace_sample;
+  delete tlsPtr_.deps_or_stack_trace_sample.stack_trace_sample;
   free(tlsPtr_.nested_signal_state);
 
   Runtime::Current()->GetHeap()->AssertThreadLocalBuffersAreRevoked(this);
@@ -1894,7 +1936,7 @@
 void Thread::RemoveFromThreadGroup(ScopedObjectAccess& soa) {
   // this.group.removeThread(this);
   // group can be null if we're in the compiler or a test.
-  ObjPtr<mirror::Object> ogroup = soa.DecodeField(WellKnownClasses::java_lang_Thread_group)
+  ObjPtr<mirror::Object> ogroup = jni::DecodeArtField(WellKnownClasses::java_lang_Thread_group)
       ->GetObject(tlsPtr_.opeer);
   if (ogroup != nullptr) {
     ScopedLocalRef<jobject> group(soa.Env(), soa.AddLocalReference<jobject>(ogroup));
@@ -2414,7 +2456,7 @@
       ++i;
     }
     ScopedLocalRef<jobject> ref(soa.Env(), soa.AddLocalReference<jobject>(exception.Get()));
-    InvokeWithJValues(soa, ref.get(), soa.EncodeMethod(exception_init_method), jv_args);
+    InvokeWithJValues(soa, ref.get(), jni::EncodeArtMethod(exception_init_method), jv_args);
     if (LIKELY(!IsExceptionPending())) {
       SetException(exception.Get());
     }
@@ -2503,7 +2545,7 @@
   QUICK_ENTRY_POINT_INFO(pAllocStringFromChars)
   QUICK_ENTRY_POINT_INFO(pAllocStringFromString)
   QUICK_ENTRY_POINT_INFO(pInstanceofNonTrivial)
-  QUICK_ENTRY_POINT_INFO(pCheckCast)
+  QUICK_ENTRY_POINT_INFO(pCheckInstanceOf)
   QUICK_ENTRY_POINT_INFO(pInitializeStaticStorage)
   QUICK_ENTRY_POINT_INFO(pInitializeTypeAndVerifyAccess)
   QUICK_ENTRY_POINT_INFO(pInitializeType)
diff --git a/runtime/thread.h b/runtime/thread.h
index 75b5b12..35226f2 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -39,6 +39,7 @@
 #include "jvalue.h"
 #include "object_callbacks.h"
 #include "offsets.h"
+#include "runtime.h"
 #include "runtime_stats.h"
 #include "stack.h"
 #include "thread_state.h"
@@ -70,7 +71,8 @@
 }  // namespace mirror
 
 namespace verifier {
-class MethodVerifier;
+  class MethodVerifier;
+  class VerifierDeps;
 }  // namespace verifier
 
 class ArtMethod;
@@ -105,7 +107,8 @@
   kSuspendRequest   = 1,  // If set implies that suspend_count_ > 0 and the Thread should enter the
                           // safepoint handler.
   kCheckpointRequest = 2,  // Request that the thread do some checkpoint work and then continue.
-  kActiveSuspendBarrier = 4  // Register that at least 1 suspend barrier needs to be passed.
+  kEmptyCheckpointRequest = 4,  // Request that the thread do empty checkpoint and then continue.
+  kActiveSuspendBarrier = 8,  // Register that at least 1 suspend barrier needs to be passed.
 };
 
 enum class StackedShadowFrameType {
@@ -171,6 +174,9 @@
   // Process pending thread suspension request and handle if pending.
   void CheckSuspend() REQUIRES_SHARED(Locks::mutator_lock_);
 
+  // Process a pending empty checkpoint if pending.
+  void CheckEmptyCheckpoint() REQUIRES_SHARED(Locks::mutator_lock_);
+
   static Thread* FromManagedThread(const ScopedObjectAccessAlreadyRunnable& ts,
                                    mirror::Object* thread_peer)
       REQUIRES(Locks::thread_list_lock_, !Locks::thread_suspend_count_lock_)
@@ -239,6 +245,8 @@
       REQUIRES(Locks::thread_suspend_count_lock_);
   void RequestSynchronousCheckpoint(Closure* function)
       REQUIRES(!Locks::thread_suspend_count_lock_, !Locks::thread_list_lock_);
+  bool RequestEmptyCheckpoint()
+      REQUIRES(Locks::thread_suspend_count_lock_);
 
   void SetFlipFunction(Closure* function);
   Closure* GetFlipFunction();
@@ -333,8 +341,7 @@
   }
 
   // Returns the java.lang.Thread's name, or null if this Thread* doesn't have a peer.
-  mirror::String* GetThreadName(const ScopedObjectAccessAlreadyRunnable& ts) const
-      REQUIRES_SHARED(Locks::mutator_lock_);
+  mirror::String* GetThreadName() const REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Sets 'name' to the java.lang.Thread's name. This requires no transition to managed code,
   // allocation, or locking.
@@ -846,10 +853,7 @@
     return tls32_.is_gc_marking;
   }
 
-  void SetIsGcMarking(bool is_marking) {
-    CHECK(kUseReadBarrier);
-    tls32_.is_gc_marking = is_marking;
-  }
+  void SetIsGcMarkingAndUpdateEntrypoints(bool is_marking);
 
   bool GetWeakRefAccessEnabled() const {
     CHECK(kUseReadBarrier);
@@ -945,11 +949,27 @@
   }
 
   std::vector<ArtMethod*>* GetStackTraceSample() const {
-    return tlsPtr_.stack_trace_sample;
+    DCHECK(!Runtime::Current()->IsAotCompiler());
+    return tlsPtr_.deps_or_stack_trace_sample.stack_trace_sample;
   }
 
   void SetStackTraceSample(std::vector<ArtMethod*>* sample) {
-    tlsPtr_.stack_trace_sample = sample;
+    DCHECK(!Runtime::Current()->IsAotCompiler());
+    tlsPtr_.deps_or_stack_trace_sample.stack_trace_sample = sample;
+  }
+
+  verifier::VerifierDeps* GetVerifierDeps() const {
+    DCHECK(Runtime::Current()->IsAotCompiler());
+    return tlsPtr_.deps_or_stack_trace_sample.verifier_deps;
+  }
+
+  // It is the responsability of the caller to make sure the verifier_deps
+  // entry in the thread is cleared before destruction of the actual VerifierDeps
+  // object, or the thread.
+  void SetVerifierDeps(verifier::VerifierDeps* verifier_deps) {
+    DCHECK(Runtime::Current()->IsAotCompiler());
+    DCHECK(verifier_deps == nullptr || tlsPtr_.deps_or_stack_trace_sample.verifier_deps == nullptr);
+    tlsPtr_.deps_or_stack_trace_sample.verifier_deps = verifier_deps;
   }
 
   uint64_t GetTraceClockBase() const {
@@ -987,7 +1007,7 @@
     tls32_.state_and_flags.as_atomic_int.FetchAndAndSequentiallyConsistent(-1 ^ flag);
   }
 
-  void ResetQuickAllocEntryPointsForThread();
+  void ResetQuickAllocEntryPointsForThread(bool is_marking);
 
   // Returns the remaining space in the TLAB.
   size_t TlabSize() const;
@@ -1218,6 +1238,7 @@
       REQUIRES(Locks::thread_suspend_count_lock_);
 
   void RunCheckpointFunction();
+  void RunEmptyCheckpoint();
 
   bool PassActiveSuspendBarriers(Thread* self)
       REQUIRES(!Locks::thread_suspend_count_lock_);
@@ -1375,7 +1396,7 @@
       tls_ptr_sized_values() : card_table(nullptr), exception(nullptr), stack_end(nullptr),
       managed_stack(), suspend_trigger(nullptr), jni_env(nullptr), tmp_jni_env(nullptr),
       self(nullptr), opeer(nullptr), jpeer(nullptr), stack_begin(nullptr), stack_size(0),
-      stack_trace_sample(nullptr), wait_next(nullptr), monitor_enter_object(nullptr),
+      deps_or_stack_trace_sample(), wait_next(nullptr), monitor_enter_object(nullptr),
       top_handle_scope(nullptr), class_loader_override(nullptr), long_jump_context(nullptr),
       instrumentation_stack(nullptr), debug_invoke_req(nullptr), single_step_control(nullptr),
       stacked_shadow_frame_record(nullptr), deoptimization_context_stack(nullptr),
@@ -1429,8 +1450,18 @@
     // Size of the stack.
     size_t stack_size;
 
-    // Pointer to previous stack trace captured by sampling profiler.
-    std::vector<ArtMethod*>* stack_trace_sample;
+    // Sampling profiler and AOT verification cannot happen on the same run, so we share
+    // the same entry for the stack trace and the verifier deps.
+    union DepsOrStackTraceSample {
+      DepsOrStackTraceSample() {
+        verifier_deps = nullptr;
+        stack_trace_sample = nullptr;
+      }
+      // Pointer to previous stack trace captured by sampling profiler.
+      std::vector<ArtMethod*>* stack_trace_sample;
+      // When doing AOT verification, per-thread VerifierDeps.
+      verifier::VerifierDeps* verifier_deps;
+    } deps_or_stack_trace_sample;
 
     // The next thread in the wait set this thread is part of or null if not waiting.
     Thread* wait_next;
@@ -1567,7 +1598,8 @@
 
 class SCOPED_CAPABILITY ScopedAssertNoThreadSuspension {
  public:
-  ALWAYS_INLINE ScopedAssertNoThreadSuspension(const char* cause) ACQUIRE(Roles::uninterruptible_) {
+  ALWAYS_INLINE explicit ScopedAssertNoThreadSuspension(const char* cause)
+      ACQUIRE(Roles::uninterruptible_) {
     if (kIsDebugBuild) {
       self_ = Thread::Current();
       old_cause_ = self_->StartAssertNoThreadSuspension(cause);
diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc
index eba6666..27fb37a 100644
--- a/runtime/thread_list.cc
+++ b/runtime/thread_list.cc
@@ -32,6 +32,7 @@
 #include "base/timing_logger.h"
 #include "debugger.h"
 #include "gc/collector/concurrent_copying.h"
+#include "gc/reference_processor.h"
 #include "jni_internal.h"
 #include "lock_word.h"
 #include "monitor.h"
@@ -68,7 +69,8 @@
       debug_suspend_all_count_(0),
       unregistering_count_(0),
       suspend_all_historam_("suspend all histogram", 16, 64),
-      long_suspend_(false) {
+      long_suspend_(false),
+      empty_checkpoint_barrier_(new Barrier(0)) {
   CHECK(Monitor::IsValidLockWord(LockWord::FromThinLockId(kMaxThreadId, 1, 0U)));
 }
 
@@ -373,6 +375,43 @@
   return count;
 }
 
+size_t ThreadList::RunEmptyCheckpoint() {
+  Thread* self = Thread::Current();
+  Locks::mutator_lock_->AssertNotExclusiveHeld(self);
+  Locks::thread_list_lock_->AssertNotHeld(self);
+  Locks::thread_suspend_count_lock_->AssertNotHeld(self);
+
+  size_t count = 0;
+  {
+    MutexLock mu(self, *Locks::thread_list_lock_);
+    MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
+    for (Thread* thread : list_) {
+      if (thread != self) {
+        while (true) {
+          if (thread->RequestEmptyCheckpoint()) {
+            // This thread will run an empty checkpoint (decrement the empty checkpoint barrier)
+            // some time in the near future.
+            ++count;
+            break;
+          }
+          if (thread->GetState() != kRunnable) {
+            // It's seen suspended, we are done because it must not be in the middle of a mutator
+            // heap access.
+            break;
+          }
+        }
+      }
+    }
+  }
+
+  // Wake up the threads blocking for weak ref access so that they will respond to the empty
+  // checkpoint request. Otherwise we will hang as they are blocking in the kRunnable state.
+  Runtime::Current()->GetHeap()->GetReferenceProcessor()->BroadcastForSlowPath(self);
+  Runtime::Current()->BroadcastForNewSystemWeaks(/*broadcast_for_checkpoint*/true);
+
+  return count;
+}
+
 // Request that a checkpoint function be run on all active (non-suspended)
 // threads.  Returns the number of successful requests.
 size_t ThreadList::RunCheckpointOnRunnableThreads(Closure* checkpoint_function) {
@@ -1242,7 +1281,7 @@
     // Initialize according to the state of the CC collector.
     bool is_gc_marking =
         Runtime::Current()->GetHeap()->ConcurrentCopyingCollector()->IsMarking();
-    self->SetIsGcMarking(is_gc_marking);
+    self->SetIsGcMarkingAndUpdateEntrypoints(is_gc_marking);
     bool weak_ref_access_enabled =
         Runtime::Current()->GetHeap()->ConcurrentCopyingCollector()->IsWeakRefAccessEnabled();
     self->SetWeakRefAccessEnabled(weak_ref_access_enabled);
diff --git a/runtime/thread_list.h b/runtime/thread_list.h
index b455e31..133d430 100644
--- a/runtime/thread_list.h
+++ b/runtime/thread_list.h
@@ -17,6 +17,7 @@
 #ifndef ART_RUNTIME_THREAD_LIST_H_
 #define ART_RUNTIME_THREAD_LIST_H_
 
+#include "barrier.h"
 #include "base/histogram.h"
 #include "base/mutex.h"
 #include "base/value_object.h"
@@ -100,6 +101,14 @@
   size_t RunCheckpoint(Closure* checkpoint_function, Closure* callback = nullptr)
       REQUIRES(!Locks::thread_list_lock_, !Locks::thread_suspend_count_lock_);
 
+  // Run an empty checkpoint on threads. Wait until threads pass the next suspend point or are
+  // suspended. This is used to ensure that the threads finish or aren't in the middle of an
+  // in-flight mutator heap access (eg. a read barrier.) Runnable threads will respond by
+  // decrementing the empty checkpoint barrier count. This works even when the weak ref access is
+  // disabled. Only one concurrent use is currently supported.
+  size_t RunEmptyCheckpoint()
+      REQUIRES(!Locks::thread_list_lock_, !Locks::thread_suspend_count_lock_);
+
   size_t RunCheckpointOnRunnableThreads(Closure* checkpoint_function)
       REQUIRES(!Locks::thread_list_lock_, !Locks::thread_suspend_count_lock_);
 
@@ -158,6 +167,10 @@
   void DumpNativeStacks(std::ostream& os)
       REQUIRES(!Locks::thread_list_lock_);
 
+  Barrier* EmptyCheckpointBarrier() {
+    return empty_checkpoint_barrier_.get();
+  }
+
  private:
   uint32_t AllocThreadId(Thread* self);
   void ReleaseThreadId(Thread* self, uint32_t id) REQUIRES(!Locks::allocated_thread_ids_lock_);
@@ -203,6 +216,8 @@
   // Whether or not the current thread suspension is long.
   bool long_suspend_;
 
+  std::unique_ptr<Barrier> empty_checkpoint_barrier_;
+
   friend class Thread;
 
   DISALLOW_COPY_AND_ASSIGN(ThreadList);
diff --git a/runtime/thread_pool.cc b/runtime/thread_pool.cc
index b14f340..d9d2ea3 100644
--- a/runtime/thread_pool.cc
+++ b/runtime/thread_pool.cc
@@ -85,6 +85,7 @@
   ThreadPoolWorker* worker = reinterpret_cast<ThreadPoolWorker*>(arg);
   Runtime* runtime = Runtime::Current();
   CHECK(runtime->AttachCurrentThread(worker->name_.c_str(), true, nullptr, false));
+  worker->thread_ = Thread::Current();
   // Do work until its time to shut down.
   worker->Run();
   runtime->DetachCurrentThread();
@@ -177,7 +178,7 @@
     }
 
     ++waiting_count_;
-    if (waiting_count_ == GetThreadCount() && tasks_.empty()) {
+    if (waiting_count_ == GetThreadCount() && !HasOutstandingTasks()) {
       // We may be done, lets broadcast to the completion condition.
       completion_condition_.Broadcast(self);
     }
@@ -200,7 +201,7 @@
 }
 
 Task* ThreadPool::TryGetTaskLocked() {
-  if (started_ && !tasks_.empty()) {
+  if (HasOutstandingTasks()) {
     Task* task = tasks_.front();
     tasks_.pop_front();
     return task;
@@ -218,7 +219,7 @@
   }
   // Wait until each thread is waiting and the task list is empty.
   MutexLock mu(self, task_queue_lock_);
-  while (!shutting_down_ && (waiting_count_ != GetThreadCount() || !tasks_.empty())) {
+  while (!shutting_down_ && (waiting_count_ != GetThreadCount() || HasOutstandingTasks())) {
     if (!may_hold_locks) {
       completion_condition_.Wait(self);
     } else {
diff --git a/runtime/thread_pool.h b/runtime/thread_pool.h
index b6c6f02..eaadfe0 100644
--- a/runtime/thread_pool.h
+++ b/runtime/thread_pool.h
@@ -62,6 +62,8 @@
   // Set the "nice" priorty for this worker.
   void SetPthreadPriority(int priority);
 
+  Thread* GetThread() const { return thread_; }
+
  protected:
   ThreadPoolWorker(ThreadPool* thread_pool, const std::string& name, size_t stack_size);
   static void* Callback(void* arg) REQUIRES(!Locks::mutator_lock_);
@@ -71,6 +73,7 @@
   const std::string name_;
   std::unique_ptr<MemMap> stack_;
   pthread_t pthread_;
+  Thread* thread_;
 
  private:
   friend class ThreadPool;
@@ -84,6 +87,10 @@
     return threads_.size();
   }
 
+  const std::vector<ThreadPoolWorker*>& GetWorkers() const {
+    return threads_;
+  }
+
   // Broadcast to the workers and tell them to empty out the work queue.
   void StartWorkers(Thread* self) REQUIRES(!task_queue_lock_);
 
@@ -100,7 +107,8 @@
   ThreadPool(const char* name, size_t num_threads);
   virtual ~ThreadPool();
 
-  // Wait for all tasks currently on queue to get completed.
+  // Wait for all tasks currently on queue to get completed. If the pool has been stopped, only
+  // wait till all already running tasks are done.
   void Wait(Thread* self, bool do_work, bool may_hold_locks) REQUIRES(!task_queue_lock_);
 
   size_t GetTaskCount(Thread* self) REQUIRES(!task_queue_lock_);
@@ -130,6 +138,10 @@
     return shutting_down_;
   }
 
+  bool HasOutstandingTasks() const REQUIRES(task_queue_lock_) {
+    return started_ && !tasks_.empty();
+  }
+
   const std::string name_;
   Mutex task_queue_lock_;
   ConditionVariable task_queue_condition_ GUARDED_BY(task_queue_lock_);
diff --git a/runtime/thread_pool_test.cc b/runtime/thread_pool_test.cc
index d5f17d1..14c2c3b 100644
--- a/runtime/thread_pool_test.cc
+++ b/runtime/thread_pool_test.cc
@@ -98,6 +98,29 @@
   thread_pool.Wait(self, false, false);
 }
 
+TEST_F(ThreadPoolTest, StopWait) {
+  Thread* self = Thread::Current();
+  ThreadPool thread_pool("Thread pool test thread pool", num_threads);
+
+  AtomicInteger count(0);
+  static const int32_t num_tasks = num_threads * 100;
+  for (int32_t i = 0; i < num_tasks; ++i) {
+    thread_pool.AddTask(self, new CountTask(&count));
+  }
+
+  // Signal the threads to start processing tasks.
+  thread_pool.StartWorkers(self);
+  usleep(200);
+  thread_pool.StopWorkers(self);
+
+  thread_pool.Wait(self, false, false);  // We should not deadlock here.
+
+  // Drain the task list. Note: we have to restart here, as no tasks will be finished when
+  // the pool is stopped.
+  thread_pool.StartWorkers(self);
+  thread_pool.Wait(self, /* do_work */ true, false);
+}
+
 class TreeTask : public Task {
  public:
   TreeTask(ThreadPool* const thread_pool, AtomicInteger* count, int depth)
diff --git a/runtime/transaction.cc b/runtime/transaction.cc
index 9f8d981..2536968 100644
--- a/runtime/transaction.cc
+++ b/runtime/transaction.cc
@@ -167,29 +167,30 @@
   array_log.LogValue(index, value);
 }
 
-void Transaction::RecordResolveString(mirror::DexCache* dex_cache, uint32_t string_idx) {
+void Transaction::RecordResolveString(ObjPtr<mirror::DexCache> dex_cache,
+                                      dex::StringIndex string_idx) {
   DCHECK(dex_cache != nullptr);
-  DCHECK_LT(string_idx, dex_cache->GetDexFile()->NumStringIds());
+  DCHECK_LT(string_idx.index_, dex_cache->GetDexFile()->NumStringIds());
   MutexLock mu(Thread::Current(), log_lock_);
   resolve_string_logs_.push_back(ResolveStringLog(dex_cache, string_idx));
 }
 
-void Transaction::RecordStrongStringInsertion(mirror::String* s) {
+void Transaction::RecordStrongStringInsertion(ObjPtr<mirror::String> s) {
   InternStringLog log(s, InternStringLog::kStrongString, InternStringLog::kInsert);
   LogInternedString(log);
 }
 
-void Transaction::RecordWeakStringInsertion(mirror::String* s) {
+void Transaction::RecordWeakStringInsertion(ObjPtr<mirror::String> s) {
   InternStringLog log(s, InternStringLog::kWeakString, InternStringLog::kInsert);
   LogInternedString(log);
 }
 
-void Transaction::RecordStrongStringRemoval(mirror::String* s) {
+void Transaction::RecordStrongStringRemoval(ObjPtr<mirror::String> s) {
   InternStringLog log(s, InternStringLog::kStrongString, InternStringLog::kRemove);
   LogInternedString(log);
 }
 
-void Transaction::RecordWeakStringRemoval(mirror::String* s) {
+void Transaction::RecordWeakStringRemoval(ObjPtr<mirror::String> s) {
   InternStringLog log(s, InternStringLog::kWeakString, InternStringLog::kRemove);
   LogInternedString(log);
 }
@@ -470,10 +471,10 @@
     case InternStringLog::kInsert: {
       switch (string_kind_) {
         case InternStringLog::kStrongString:
-          intern_table->RemoveStrongFromTransaction(str_);
+          intern_table->RemoveStrongFromTransaction(str_.Read());
           break;
         case InternStringLog::kWeakString:
-          intern_table->RemoveWeakFromTransaction(str_);
+          intern_table->RemoveWeakFromTransaction(str_.Read());
           break;
         default:
           LOG(FATAL) << "Unknown interned string kind";
@@ -484,10 +485,10 @@
     case InternStringLog::kRemove: {
       switch (string_kind_) {
         case InternStringLog::kStrongString:
-          intern_table->InsertStrongFromTransaction(str_);
+          intern_table->InsertStrongFromTransaction(str_.Read());
           break;
         case InternStringLog::kWeakString:
-          intern_table->InsertWeakFromTransaction(str_);
+          intern_table->InsertWeakFromTransaction(str_.Read());
           break;
         default:
           LOG(FATAL) << "Unknown interned string kind";
@@ -502,24 +503,34 @@
 }
 
 void Transaction::InternStringLog::VisitRoots(RootVisitor* visitor) {
-  visitor->VisitRoot(reinterpret_cast<mirror::Object**>(&str_), RootInfo(kRootInternedString));
+  str_.VisitRoot(visitor, RootInfo(kRootInternedString));
 }
 
 void Transaction::ResolveStringLog::Undo() {
   dex_cache_.Read()->ClearString(string_idx_);
 }
 
-Transaction::ResolveStringLog::ResolveStringLog(mirror::DexCache* dex_cache, uint32_t string_idx)
+Transaction::ResolveStringLog::ResolveStringLog(ObjPtr<mirror::DexCache> dex_cache,
+                                                dex::StringIndex string_idx)
     : dex_cache_(dex_cache),
       string_idx_(string_idx) {
   DCHECK(dex_cache != nullptr);
-  DCHECK_LT(string_idx_, dex_cache->GetDexFile()->NumStringIds());
+  DCHECK_LT(string_idx_.index_, dex_cache->GetDexFile()->NumStringIds());
 }
 
 void Transaction::ResolveStringLog::VisitRoots(RootVisitor* visitor) {
   dex_cache_.VisitRoot(visitor, RootInfo(kRootVMInternal));
 }
 
+Transaction::InternStringLog::InternStringLog(ObjPtr<mirror::String> s,
+                                              StringKind kind,
+                                              StringOp op)
+    : str_(s),
+      string_kind_(kind),
+      string_op_(op) {
+  DCHECK(s != nullptr);
+}
+
 void Transaction::ArrayLog::LogValue(size_t index, uint64_t value) {
   auto it = array_values_.find(index);
   if (it == array_values_.end()) {
diff --git a/runtime/transaction.h b/runtime/transaction.h
index 584dfb8..1774657 100644
--- a/runtime/transaction.h
+++ b/runtime/transaction.h
@@ -20,6 +20,7 @@
 #include "base/macros.h"
 #include "base/mutex.h"
 #include "base/value_object.h"
+#include "dex_file_types.h"
 #include "gc_root.h"
 #include "object_callbacks.h"
 #include "offsets.h"
@@ -83,21 +84,21 @@
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Record intern string table changes.
-  void RecordStrongStringInsertion(mirror::String* s)
+  void RecordStrongStringInsertion(ObjPtr<mirror::String> s)
       REQUIRES(Locks::intern_table_lock_)
       REQUIRES(!log_lock_);
-  void RecordWeakStringInsertion(mirror::String* s)
+  void RecordWeakStringInsertion(ObjPtr<mirror::String> s)
       REQUIRES(Locks::intern_table_lock_)
       REQUIRES(!log_lock_);
-  void RecordStrongStringRemoval(mirror::String* s)
+  void RecordStrongStringRemoval(ObjPtr<mirror::String> s)
       REQUIRES(Locks::intern_table_lock_)
       REQUIRES(!log_lock_);
-  void RecordWeakStringRemoval(mirror::String* s)
+  void RecordWeakStringRemoval(ObjPtr<mirror::String> s)
       REQUIRES(Locks::intern_table_lock_)
       REQUIRES(!log_lock_);
 
   // Record resolve string.
-  void RecordResolveString(mirror::DexCache* dex_cache, uint32_t string_idx)
+  void RecordResolveString(ObjPtr<mirror::DexCache> dex_cache, dex::StringIndex string_idx)
       REQUIRES_SHARED(Locks::mutator_lock_)
       REQUIRES(!log_lock_);
 
@@ -182,10 +183,7 @@
       kInsert,
       kRemove
     };
-    InternStringLog(mirror::String* s, StringKind kind, StringOp op)
-      : str_(s), string_kind_(kind), string_op_(op) {
-      DCHECK(s != nullptr);
-    }
+    InternStringLog(ObjPtr<mirror::String> s, StringKind kind, StringOp op);
 
     void Undo(InternTable* intern_table)
         REQUIRES_SHARED(Locks::mutator_lock_)
@@ -193,14 +191,14 @@
     void VisitRoots(RootVisitor* visitor) REQUIRES_SHARED(Locks::mutator_lock_);
 
    private:
-    mirror::String* str_;
+    GcRoot<mirror::String> str_;
     const StringKind string_kind_;
     const StringOp string_op_;
   };
 
   class ResolveStringLog : public ValueObject {
    public:
-    ResolveStringLog(mirror::DexCache* dex_cache, uint32_t string_idx);
+    ResolveStringLog(ObjPtr<mirror::DexCache> dex_cache, dex::StringIndex string_idx);
 
     void Undo() REQUIRES_SHARED(Locks::mutator_lock_);
 
@@ -208,7 +206,7 @@
 
    private:
     GcRoot<mirror::DexCache> dex_cache_;
-    const uint32_t string_idx_;
+    const dex::StringIndex string_idx_;
   };
 
   void LogInternedString(const InternStringLog& log)
diff --git a/runtime/transaction_test.cc b/runtime/transaction_test.cc
index 77c2b76..a43c967 100644
--- a/runtime/transaction_test.cc
+++ b/runtime/transaction_test.cc
@@ -26,8 +26,6 @@
 
 namespace art {
 
-static const size_t kDexNoIndex = DexFile::kDexNoIndex;  // Make copy to prevent linking errors.
-
 class TransactionTest : public CommonRuntimeTest {
  public:
   // Tests failing class initialization due to native call with transaction rollback.
@@ -507,8 +505,8 @@
   static const char* kResolvedString = "ResolvedString";
   const DexFile::StringId* string_id = dex_file->FindStringId(kResolvedString);
   ASSERT_TRUE(string_id != nullptr);
-  uint32_t string_idx = dex_file->GetIndexForStringId(*string_id);
-  ASSERT_NE(string_idx, kDexNoIndex);
+  dex::StringIndex string_idx = dex_file->GetIndexForStringId(*string_id);
+  ASSERT_TRUE(string_idx.IsValid());
   // String should only get resolved by the initializer.
   EXPECT_TRUE(class_linker_->LookupString(*dex_file, string_idx, h_dex_cache) == nullptr);
   EXPECT_TRUE(h_dex_cache->GetResolvedString(string_idx) == nullptr);
diff --git a/runtime/type_lookup_table.cc b/runtime/type_lookup_table.cc
index 56e9262..16cd722 100644
--- a/runtime/type_lookup_table.cc
+++ b/runtime/type_lookup_table.cc
@@ -50,17 +50,19 @@
   return num_class_defs != 0u && num_class_defs <= std::numeric_limits<uint16_t>::max();
 }
 
-TypeLookupTable* TypeLookupTable::Create(const DexFile& dex_file, uint8_t* storage) {
+std::unique_ptr<TypeLookupTable> TypeLookupTable::Create(const DexFile& dex_file,
+                                                         uint8_t* storage) {
   const uint32_t num_class_defs = dex_file.NumClassDefs();
-  return SupportedSize(num_class_defs)
+  return std::unique_ptr<TypeLookupTable>(SupportedSize(num_class_defs)
       ? new TypeLookupTable(dex_file, storage)
-      : nullptr;
+      : nullptr);
 }
 
-TypeLookupTable* TypeLookupTable::Open(const uint8_t* dex_file_pointer,
-                                       const uint8_t* raw_data,
-                                       uint32_t num_class_defs) {
-  return new TypeLookupTable(dex_file_pointer, raw_data, num_class_defs);
+std::unique_ptr<TypeLookupTable> TypeLookupTable::Open(const uint8_t* dex_file_pointer,
+                                                       const uint8_t* raw_data,
+                                                       uint32_t num_class_defs) {
+  return std::unique_ptr<TypeLookupTable>(
+      new TypeLookupTable(dex_file_pointer, raw_data, num_class_defs));
 }
 
 TypeLookupTable::TypeLookupTable(const DexFile& dex_file, uint8_t* storage)
diff --git a/runtime/type_lookup_table.h b/runtime/type_lookup_table.h
index 9595743..3f6f76f 100644
--- a/runtime/type_lookup_table.h
+++ b/runtime/type_lookup_table.h
@@ -60,13 +60,14 @@
   }
 
   // Method creates lookup table for dex file
-  static TypeLookupTable* Create(const DexFile& dex_file, uint8_t* storage = nullptr);
+  static std::unique_ptr<TypeLookupTable> Create(const DexFile& dex_file,
+                                                 uint8_t* storage = nullptr);
 
   // Method opens lookup table from binary data. Lookups will traverse strings and other
   // data contained in dex_file as well.  Lookup table does not own raw_data or dex_file.
-  static TypeLookupTable* Open(const uint8_t* dex_file_pointer,
-                               const uint8_t* raw_data,
-                               uint32_t num_class_defs);
+  static std::unique_ptr<TypeLookupTable> Open(const uint8_t* dex_file_pointer,
+                                               const uint8_t* raw_data,
+                                               uint32_t num_class_defs);
 
   // Method returns pointer to binary data of lookup table. Used by the oat writer.
   const uint8_t* RawData() const {
diff --git a/runtime/utf_test.cc b/runtime/utf_test.cc
index 3284925..d1e9751 100644
--- a/runtime/utf_test.cc
+++ b/runtime/utf_test.cc
@@ -113,8 +113,8 @@
   EXPECT_EQ(2u, CountModifiedUtf8Chars(reinterpret_cast<const char *>(kSurrogateEncoding)));
 }
 
-static void AssertConversion(const std::vector<uint16_t> input,
-                             const std::vector<uint8_t> expected) {
+static void AssertConversion(const std::vector<uint16_t>& input,
+                             const std::vector<uint8_t>& expected) {
   ASSERT_EQ(expected.size(), CountUtf8Bytes(&input[0], input.size()));
 
   std::vector<uint8_t> output(expected.size());
diff --git a/runtime/utils.h b/runtime/utils.h
index 21df839..1e98057 100644
--- a/runtime/utils.h
+++ b/runtime/utils.h
@@ -279,24 +279,34 @@
 using UsageFn = void (*)(const char*, ...);
 
 template <typename T>
-static void ParseUintOption(const StringPiece& option,
+static void ParseIntOption(const StringPiece& option,
                             const std::string& option_name,
                             T* out,
-                            UsageFn Usage,
+                            UsageFn usage,
                             bool is_long_option = true) {
   std::string option_prefix = option_name + (is_long_option ? "=" : "");
   DCHECK(option.starts_with(option_prefix)) << option << " " << option_prefix;
   const char* value_string = option.substr(option_prefix.size()).data();
   int64_t parsed_integer_value = 0;
   if (!ParseInt(value_string, &parsed_integer_value)) {
-    Usage("Failed to parse %s '%s' as an integer", option_name.c_str(), value_string);
-  }
-  if (parsed_integer_value < 0) {
-    Usage("%s passed a negative value %d", option_name.c_str(), parsed_integer_value);
+    usage("Failed to parse %s '%s' as an integer", option_name.c_str(), value_string);
   }
   *out = dchecked_integral_cast<T>(parsed_integer_value);
 }
 
+template <typename T>
+static void ParseUintOption(const StringPiece& option,
+                            const std::string& option_name,
+                            T* out,
+                            UsageFn usage,
+                            bool is_long_option = true) {
+  ParseIntOption(option, option_name, out, usage, is_long_option);
+  if (*out < 0) {
+    usage("%s passed a negative value %d", option_name.c_str(), *out);
+    *out = 0;
+  }
+}
+
 void ParseDouble(const std::string& option,
                  char after_char,
                  double min,
diff --git a/runtime/utils/dex_cache_arrays_layout-inl.h b/runtime/utils/dex_cache_arrays_layout-inl.h
index c7875b5..bd1b044 100644
--- a/runtime/utils/dex_cache_arrays_layout-inl.h
+++ b/runtime/utils/dex_cache_arrays_layout-inl.h
@@ -65,8 +65,8 @@
   return PointerSize::k32;
 }
 
-inline size_t DexCacheArraysLayout::TypeOffset(uint32_t type_idx) const {
-  return types_offset_ + ElementOffset(GcRootAsPointerSize<mirror::Class>(), type_idx);
+inline size_t DexCacheArraysLayout::TypeOffset(dex::TypeIndex type_idx) const {
+  return types_offset_ + ElementOffset(GcRootAsPointerSize<mirror::Class>(), type_idx.index_);
 }
 
 inline size_t DexCacheArraysLayout::TypesSize(size_t num_elements) const {
diff --git a/runtime/utils/dex_cache_arrays_layout.h b/runtime/utils/dex_cache_arrays_layout.h
index ae3bfab..7d4b23a 100644
--- a/runtime/utils/dex_cache_arrays_layout.h
+++ b/runtime/utils/dex_cache_arrays_layout.h
@@ -18,6 +18,7 @@
 #define ART_RUNTIME_UTILS_DEX_CACHE_ARRAYS_LAYOUT_H_
 
 #include "dex_file.h"
+#include "dex_file_types.h"
 
 namespace art {
 
@@ -59,7 +60,7 @@
     return types_offset_;
   }
 
-  size_t TypeOffset(uint32_t type_idx) const;
+  size_t TypeOffset(dex::TypeIndex type_idx) const;
 
   size_t TypesSize(size_t num_elements) const;
 
diff --git a/runtime/vdex_file.cc b/runtime/vdex_file.cc
index b3dab58..843be92 100644
--- a/runtime/vdex_file.cc
+++ b/runtime/vdex_file.cc
@@ -20,6 +20,7 @@
 
 #include "base/logging.h"
 #include "base/unix_file/fd_file.h"
+#include "dex_file.h"
 
 namespace art {
 
@@ -73,10 +74,19 @@
     return nullptr;
   }
 
+  return Open(vdex_file->Fd(), vdex_length, vdex_filename, writable, low_4gb, error_msg);
+}
+
+VdexFile* VdexFile::Open(int file_fd,
+                         size_t vdex_length,
+                         const std::string& vdex_filename,
+                         bool writable,
+                         bool low_4gb,
+                         std::string* error_msg) {
   std::unique_ptr<MemMap> mmap(MemMap::MapFile(vdex_length,
                                                writable ? PROT_READ | PROT_WRITE : PROT_READ,
                                                MAP_SHARED,
-                                               vdex_file->Fd(),
+                                               file_fd,
                                                0 /* start offset */,
                                                low_4gb,
                                                vdex_filename.c_str(),
@@ -90,4 +100,16 @@
   return new VdexFile(mmap.release());
 }
 
+const uint8_t* VdexFile::GetNextDexFileData(const uint8_t* cursor) const {
+  DCHECK(cursor == nullptr || (cursor > Begin() && cursor <= End()));
+  if (cursor == nullptr) {
+    // Beginning of the iteration, return the first dex file if there is one.
+    return HasDexSection() ? DexBegin() : nullptr;
+  } else {
+    // Fetch the next dex file. Return null if there is none.
+    const uint8_t* data = cursor + reinterpret_cast<const DexFile::Header*>(cursor)->file_size_;
+    return (data == DexEnd()) ? nullptr : data;
+  }
+}
+
 }  // namespace art
diff --git a/runtime/vdex_file.h b/runtime/vdex_file.h
index 28f9bb3..75a0d5e 100644
--- a/runtime/vdex_file.h
+++ b/runtime/vdex_file.h
@@ -20,6 +20,7 @@
 #include <stdint.h>
 #include <string>
 
+#include "base/array_ref.h"
 #include "base/macros.h"
 #include "mem_map.h"
 #include "os.h"
@@ -44,8 +45,11 @@
    public:
     Header(uint32_t dex_size, uint32_t verifier_deps_size, uint32_t quickening_info_size);
 
+    const char* GetMagic() const { return reinterpret_cast<const char*>(magic_); }
+    const char* GetVersion() const { return reinterpret_cast<const char*>(version_); }
     bool IsMagicValid() const;
     bool IsVersionValid() const;
+    bool IsValid() const { return IsMagicValid() && IsVersionValid(); }
 
     uint32_t GetDexSize() const { return dex_size_; }
     uint32_t GetVerifierDepsSize() const { return verifier_deps_size_; }
@@ -67,13 +71,57 @@
                         bool low_4gb,
                         std::string* error_msg);
 
+  static VdexFile* Open(int file_fd,
+                        size_t vdex_length,
+                        const std::string& vdex_filename,
+                        bool writable,
+                        bool low_4gb,
+                        std::string* error_msg);
+
   const uint8_t* Begin() const { return mmap_->Begin(); }
   const uint8_t* End() const { return mmap_->End(); }
   size_t Size() const { return mmap_->Size(); }
 
+  const Header& GetHeader() const {
+    return *reinterpret_cast<const Header*>(Begin());
+  }
+
+  ArrayRef<const uint8_t> GetVerifierDepsData() const {
+    return ArrayRef<const uint8_t>(
+        Begin() + sizeof(Header) + GetHeader().GetDexSize(), GetHeader().GetVerifierDepsSize());
+  }
+
+  ArrayRef<const uint8_t> GetQuickeningInfo() const {
+    return ArrayRef<const uint8_t>(
+        GetVerifierDepsData().data() + GetHeader().GetVerifierDepsSize(),
+        GetHeader().GetQuickeningInfoSize());
+  }
+
+  bool IsValid() const {
+    return mmap_->Size() >= sizeof(Header) && GetHeader().IsValid();
+  }
+
+  // This method is for iterating over the dex files in the vdex. If `cursor` is null,
+  // the first dex file is returned. If `cursor` is not null, it must point to a dex
+  // file and this method returns the next dex file if there is one, or null if there
+  // is none.
+  const uint8_t* GetNextDexFileData(const uint8_t* cursor) const;
+
  private:
   explicit VdexFile(MemMap* mmap) : mmap_(mmap) {}
 
+  bool HasDexSection() const {
+    return GetHeader().GetDexSize() != 0;
+  }
+
+  const uint8_t* DexBegin() const {
+    return Begin() + sizeof(Header);
+  }
+
+  const uint8_t* DexEnd() const {
+    return Begin() + sizeof(Header) + GetHeader().GetDexSize();
+  }
+
   std::unique_ptr<MemMap> mmap_;
 
   DISALLOW_COPY_AND_ASSIGN(VdexFile);
diff --git a/runtime/verifier/method_verifier-inl.h b/runtime/verifier/method_verifier-inl.h
index def61db..363bd8f 100644
--- a/runtime/verifier/method_verifier-inl.h
+++ b/runtime/verifier/method_verifier-inl.h
@@ -74,7 +74,7 @@
   return !failure_messages_.empty();
 }
 
-inline const RegType& MethodVerifier::ResolveCheckedClass(uint32_t class_idx) {
+inline const RegType& MethodVerifier::ResolveCheckedClass(dex::TypeIndex class_idx) {
   DCHECK(!HasFailures());
   const RegType& result = ResolveClassAndCheckAccess(class_idx);
   DCHECK(!HasFailures());
diff --git a/runtime/verifier/method_verifier.cc b/runtime/verifier/method_verifier.cc
index 97bc79c..ebecc85 100644
--- a/runtime/verifier/method_verifier.cc
+++ b/runtime/verifier/method_verifier.cc
@@ -41,6 +41,7 @@
 #include "mirror/class.h"
 #include "mirror/class-inl.h"
 #include "mirror/dex_cache-inl.h"
+#include "mirror/method_handle_impl.h"
 #include "mirror/object-inl.h"
 #include "mirror/object_array-inl.h"
 #include "reg_type-inl.h"
@@ -100,8 +101,18 @@
 ALWAYS_INLINE static inline bool FailOrAbort(MethodVerifier* verifier, bool condition,
                                              const char* error_msg, uint32_t work_insn_idx) {
   if (kIsDebugBuild) {
-    // In a debug build, abort if the error condition is wrong.
-    DCHECK(condition) << error_msg << work_insn_idx;
+    // In a debug build, abort if the error condition is wrong. Only warn if
+    // we are already aborting (as this verification is likely run to print
+    // lock information).
+    if (LIKELY(gAborting == 0)) {
+      DCHECK(condition) << error_msg << work_insn_idx;
+    } else {
+      if (!condition) {
+        LOG(ERROR) << error_msg << work_insn_idx;
+        verifier->Fail(VERIFY_ERROR_BAD_CLASS_HARD) << error_msg << work_insn_idx;
+        return true;
+      }
+    }
   } else {
     // In a non-debug build, just fail the class.
     if (!condition) {
@@ -400,15 +411,15 @@
       result.kind = kSoftFailure;
       if (method != nullptr &&
           !CanCompilerHandleVerificationFailure(verifier.encountered_failure_types_)) {
-        method->SetAccessFlags(method->GetAccessFlags() | kAccCompileDontBother);
+        method->AddAccessFlags(kAccCompileDontBother);
       }
     }
     if (method != nullptr) {
       if (verifier.HasInstructionThatWillThrow()) {
-        method->SetAccessFlags(method->GetAccessFlags() | kAccCompileDontBother);
+        method->AddAccessFlags(kAccCompileDontBother);
       }
       if ((verifier.encountered_failure_types_ & VerifyError::VERIFY_ERROR_LOCKING) != 0) {
-        method->SetAccessFlags(method->GetAccessFlags() | kAccMustCountLocks);
+        method->AddAccessFlags(kAccMustCountLocks);
       }
     }
   } else {
@@ -956,7 +967,7 @@
   delete last_fail_message;
 }
 
-void MethodVerifier::AppendToLastFailMessage(std::string append) {
+void MethodVerifier::AppendToLastFailMessage(const std::string& append) {
   size_t failure_num = failure_messages_.size();
   DCHECK_NE(failure_num, 0U);
   std::ostringstream* last_fail_message = failure_messages_[failure_num - 1];
@@ -1062,7 +1073,7 @@
       GetInstructionFlags(dex_pc).SetBranchTarget();
       // Ensure exception types are resolved so that they don't need resolution to be delivered,
       // unresolved exception types will be ignored by exception delivery
-      if (iterator.GetHandlerTypeIndex() != DexFile::kDexNoIndex16) {
+      if (iterator.GetHandlerTypeIndex().IsValid()) {
         mirror::Class* exception_type = linker->ResolveType(*dex_file_,
                                                             iterator.GetHandlerTypeIndex(),
                                                             dex_cache_, class_loader_);
@@ -1145,13 +1156,13 @@
       result = result && CheckMethodIndex(inst->VRegB());
       break;
     case Instruction::kVerifyRegBNewInstance:
-      result = result && CheckNewInstance(inst->VRegB());
+      result = result && CheckNewInstance(dex::TypeIndex(inst->VRegB()));
       break;
     case Instruction::kVerifyRegBString:
       result = result && CheckStringIndex(inst->VRegB());
       break;
     case Instruction::kVerifyRegBType:
-      result = result && CheckTypeIndex(inst->VRegB());
+      result = result && CheckTypeIndex(dex::TypeIndex(inst->VRegB()));
       break;
     case Instruction::kVerifyRegBWide:
       result = result && CheckWideRegisterIndex(inst->VRegB());
@@ -1165,15 +1176,20 @@
       result = result && CheckFieldIndex(inst->VRegC());
       break;
     case Instruction::kVerifyRegCNewArray:
-      result = result && CheckNewArray(inst->VRegC());
+      result = result && CheckNewArray(dex::TypeIndex(inst->VRegC()));
       break;
     case Instruction::kVerifyRegCType:
-      result = result && CheckTypeIndex(inst->VRegC());
+      result = result && CheckTypeIndex(dex::TypeIndex(inst->VRegC()));
       break;
     case Instruction::kVerifyRegCWide:
       result = result && CheckWideRegisterIndex(inst->VRegC());
       break;
   }
+  switch (inst->GetVerifyTypeArgumentH()) {
+    case Instruction::kVerifyRegHPrototype:
+      result = result && CheckPrototypeIndex(inst->VRegH());
+      break;
+  }
   switch (inst->GetVerifyExtraFlags()) {
     case Instruction::kVerifyArrayData:
       result = result && CheckArrayData(code_offset);
@@ -1260,9 +1276,9 @@
   return true;
 }
 
-inline bool MethodVerifier::CheckNewInstance(uint32_t idx) {
-  if (idx >= dex_file_->GetHeader().type_ids_size_) {
-    Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "bad type index " << idx << " (max "
+inline bool MethodVerifier::CheckNewInstance(dex::TypeIndex idx) {
+  if (idx.index_ >= dex_file_->GetHeader().type_ids_size_) {
+    Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "bad type index " << idx.index_ << " (max "
                                       << dex_file_->GetHeader().type_ids_size_ << ")";
     return false;
   }
@@ -1279,6 +1295,15 @@
   return true;
 }
 
+inline bool MethodVerifier::CheckPrototypeIndex(uint32_t idx) {
+  if (idx >= dex_file_->GetHeader().proto_ids_size_) {
+    Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "bad prototype index " << idx << " (max "
+                                      << dex_file_->GetHeader().proto_ids_size_ << ")";
+    return false;
+  }
+  return true;
+}
+
 inline bool MethodVerifier::CheckStringIndex(uint32_t idx) {
   if (idx >= dex_file_->GetHeader().string_ids_size_) {
     Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "bad string index " << idx << " (max "
@@ -1288,18 +1313,18 @@
   return true;
 }
 
-inline bool MethodVerifier::CheckTypeIndex(uint32_t idx) {
-  if (idx >= dex_file_->GetHeader().type_ids_size_) {
-    Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "bad type index " << idx << " (max "
+inline bool MethodVerifier::CheckTypeIndex(dex::TypeIndex idx) {
+  if (idx.index_ >= dex_file_->GetHeader().type_ids_size_) {
+    Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "bad type index " << idx.index_ << " (max "
                                       << dex_file_->GetHeader().type_ids_size_ << ")";
     return false;
   }
   return true;
 }
 
-bool MethodVerifier::CheckNewArray(uint32_t idx) {
-  if (idx >= dex_file_->GetHeader().type_ids_size_) {
-    Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "bad type index " << idx << " (max "
+bool MethodVerifier::CheckNewArray(dex::TypeIndex idx) {
+  if (idx.index_ >= dex_file_->GetHeader().type_ids_size_) {
+    Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "bad type index " << idx.index_ << " (max "
                                       << dex_file_->GetHeader().type_ids_size_ << ")";
     return false;
   }
@@ -1935,7 +1960,7 @@
 
 // Returns the index of the first final instance field of the given class, or kDexNoIndex if there
 // is no such field.
-static uint32_t GetFirstFinalInstanceFieldIndex(const DexFile& dex_file, uint16_t type_idx) {
+static uint32_t GetFirstFinalInstanceFieldIndex(const DexFile& dex_file, dex::TypeIndex type_idx) {
   const DexFile::ClassDef* class_def = dex_file.FindClassDef(type_idx);
   DCHECK(class_def != nullptr);
   const uint8_t* class_data = dex_file.GetClassData(*class_def);
@@ -2283,7 +2308,7 @@
     case Instruction::CONST_CLASS: {
       // Get type from instruction if unresolved then we need an access check
       // TODO: check Compiler::CanAccessTypeWithoutChecks returns false when res_type is unresolved
-      const RegType& res_type = ResolveClassAndCheckAccess(inst->VRegB_21c());
+      const RegType& res_type = ResolveClassAndCheckAccess(dex::TypeIndex(inst->VRegB_21c()));
       // Register holds class, ie its type is class, on error it will hold Conflict.
       work_line_->SetRegisterType<LockOp::kClear>(
           this, inst->VRegA_21c(), res_type.IsConflict() ? res_type
@@ -2353,7 +2378,7 @@
        * dec_insn.vA when branching to a handler.
        */
       const bool is_checkcast = (inst->Opcode() == Instruction::CHECK_CAST);
-      const uint32_t type_idx = (is_checkcast) ? inst->VRegB_21c() : inst->VRegC_22c();
+      const dex::TypeIndex type_idx((is_checkcast) ? inst->VRegB_21c() : inst->VRegC_22c());
       const RegType& res_type = ResolveClassAndCheckAccess(type_idx);
       if (res_type.IsConflict()) {
         // If this is a primitive type, fail HARD.
@@ -2423,7 +2448,7 @@
       break;
     }
     case Instruction::NEW_INSTANCE: {
-      const RegType& res_type = ResolveClassAndCheckAccess(inst->VRegB_21c());
+      const RegType& res_type = ResolveClassAndCheckAccess(dex::TypeIndex(inst->VRegB_21c()));
       if (res_type.IsConflict()) {
         DCHECK_NE(failures_.size(), 0U);
         break;  // bad class
@@ -2635,7 +2660,8 @@
         // ensure that subsequent merges don't lose type information - such as becoming an
         // interface from a class that would lose information relevant to field checks.
         const RegType& orig_type = work_line_->GetRegisterType(this, instance_of_inst->VRegB_22c());
-        const RegType& cast_type = ResolveClassAndCheckAccess(instance_of_inst->VRegC_22c());
+        const RegType& cast_type = ResolveClassAndCheckAccess(
+            dex::TypeIndex(instance_of_inst->VRegC_22c()));
 
         if (!orig_type.Equals(cast_type) &&
             !cast_type.IsUnresolvedTypes() && !orig_type.IsUnresolvedTypes() &&
@@ -2873,7 +2899,8 @@
       if (return_type == nullptr) {
         uint32_t method_idx = (is_range) ? inst->VRegB_3rc() : inst->VRegB_35c();
         const DexFile::MethodId& method_id = dex_file_->GetMethodId(method_idx);
-        uint32_t return_type_idx = dex_file_->GetProtoId(method_id.proto_idx_).return_type_idx_;
+        dex::TypeIndex return_type_idx =
+            dex_file_->GetProtoId(method_id.proto_idx_).return_type_idx_;
         const char* descriptor = dex_file_->StringByTypeIdx(return_type_idx);
         return_type = &reg_types_.FromDescriptor(GetClassLoader(), descriptor, false);
       }
@@ -2896,7 +2923,8 @@
         uint32_t method_idx = (is_range) ? inst->VRegB_3rc() : inst->VRegB_35c();
         const DexFile::MethodId& method_id = dex_file_->GetMethodId(method_idx);
         is_constructor = strcmp("<init>", dex_file_->StringDataByIdx(method_id.name_idx_)) == 0;
-        uint32_t return_type_idx = dex_file_->GetProtoId(method_id.proto_idx_).return_type_idx_;
+        dex::TypeIndex return_type_idx =
+            dex_file_->GetProtoId(method_id.proto_idx_).return_type_idx_;
         return_type_descriptor =  dex_file_->StringByTypeIdx(return_type_idx);
       } else {
         is_constructor = called_method->IsConstructor();
@@ -2921,7 +2949,7 @@
          * allowing the latter only if the "this" argument is the same as the "this" argument to
          * this method (which implies that we're in a constructor ourselves).
          */
-        const RegType& this_type = work_line_->GetInvocationThis(this, inst, is_range);
+        const RegType& this_type = work_line_->GetInvocationThis(this, inst);
         if (this_type.IsConflict())  // failure.
           break;
 
@@ -2972,7 +3000,8 @@
         if (called_method == nullptr) {
           uint32_t method_idx = (is_range) ? inst->VRegB_3rc() : inst->VRegB_35c();
           const DexFile::MethodId& method_id = dex_file_->GetMethodId(method_idx);
-          uint32_t return_type_idx = dex_file_->GetProtoId(method_id.proto_idx_).return_type_idx_;
+          dex::TypeIndex return_type_idx =
+              dex_file_->GetProtoId(method_id.proto_idx_).return_type_idx_;
           descriptor = dex_file_->StringByTypeIdx(return_type_idx);
         } else {
           descriptor = called_method->GetReturnTypeDescriptor();
@@ -3001,7 +3030,7 @@
       /* Get the type of the "this" arg, which should either be a sub-interface of called
        * interface or Object (see comments in RegType::JoinClass).
        */
-      const RegType& this_type = work_line_->GetInvocationThis(this, inst, is_range);
+      const RegType& this_type = work_line_->GetInvocationThis(this, inst);
       if (this_type.IsZero()) {
         /* null pointer always passes (and always fails at runtime) */
       } else {
@@ -3026,7 +3055,8 @@
       if (abs_method == nullptr) {
         uint32_t method_idx = (is_range) ? inst->VRegB_3rc() : inst->VRegB_35c();
         const DexFile::MethodId& method_id = dex_file_->GetMethodId(method_idx);
-        uint32_t return_type_idx = dex_file_->GetProtoId(method_id.proto_idx_).return_type_idx_;
+        dex::TypeIndex return_type_idx =
+            dex_file_->GetProtoId(method_id.proto_idx_).return_type_idx_;
         descriptor = dex_file_->StringByTypeIdx(return_type_idx);
       } else {
         descriptor = abs_method->GetReturnTypeDescriptor();
@@ -3042,10 +3072,37 @@
     }
     case Instruction::INVOKE_POLYMORPHIC:
     case Instruction::INVOKE_POLYMORPHIC_RANGE: {
+      bool is_range = (inst->Opcode() == Instruction::INVOKE_POLYMORPHIC_RANGE);
+      ArtMethod* called_method = VerifyInvocationArgs(inst, METHOD_POLYMORPHIC, is_range);
+      if (called_method == nullptr) {
+        // Convert potential soft failures in VerifyInvocationArgs() to hard errors.
+        if (failure_messages_.size() > 0) {
+          std::string message = failure_messages_.back()->str();
+          Fail(VERIFY_ERROR_BAD_CLASS_HARD) << message;
+        } else {
+          Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "invoke-polymorphic verification failure.";
+        }
+        break;
+      }
+      if (!CheckSignaturePolymorphicMethod(called_method) ||
+          !CheckSignaturePolymorphicReceiver(inst)) {
+        break;
+      }
+      const uint32_t proto_idx = (is_range) ? inst->VRegH_4rcc() : inst->VRegH_45cc();
+      const char* descriptor =
+          dex_file_->GetReturnTypeDescriptor(dex_file_->GetProtoId(proto_idx));
+      const RegType& return_type =
+          reg_types_.FromDescriptor(GetClassLoader(), descriptor, false);
+      if (!return_type.IsLowHalf()) {
+        work_line_->SetResultRegisterType(this, return_type);
+      } else {
+        work_line_->SetResultRegisterTypeWide(return_type, return_type.HighHalf(&reg_types_));
+      }
+      // TODO(oth): remove when compiler support is available.
       Fail(VERIFY_ERROR_FORCE_INTERPRETER)
-          << "instruction is not supported by verifier; skipping verification";
+          << "invoke-polymorphic is not supported by compiler";
       have_pending_experimental_failure_ = true;
-      return false;
+      break;
     }
     case Instruction::NEG_INT:
     case Instruction::NOT_INT:
@@ -3401,8 +3458,6 @@
     work_line_->SetResultTypeToUnknown(this);
   }
 
-
-
   /*
    * Handle "branch". Tag the branch target.
    *
@@ -3490,8 +3545,8 @@
     ClassLinker* linker = Runtime::Current()->GetClassLinker();
 
     for (; iterator.HasNext(); iterator.Next()) {
-      uint16_t handler_type_idx = iterator.GetHandlerTypeIndex();
-      if (handler_type_idx == DexFile::kDexNoIndex16) {
+      dex::TypeIndex handler_type_idx = iterator.GetHandlerTypeIndex();
+      if (!handler_type_idx.IsValid()) {
         has_catch_all_handler = true;
       } else {
         // It is also a catch-all if it is java.lang.Throwable.
@@ -3618,7 +3673,7 @@
   return klass->IsInstantiable() || klass->IsPrimitive();
 }
 
-const RegType& MethodVerifier::ResolveClassAndCheckAccess(uint32_t class_idx) {
+const RegType& MethodVerifier::ResolveClassAndCheckAccess(dex::TypeIndex class_idx) {
   mirror::Class* klass = dex_cache_->GetResolvedType(class_idx);
   const RegType* result = nullptr;
   if (klass != nullptr) {
@@ -3674,7 +3729,7 @@
       CatchHandlerIterator iterator(handlers_ptr);
       for (; iterator.HasNext(); iterator.Next()) {
         if (iterator.GetHandlerAddress() == (uint32_t) work_insn_idx_) {
-          if (iterator.GetHandlerTypeIndex() == DexFile::kDexNoIndex16) {
+          if (!iterator.GetHandlerTypeIndex().IsValid()) {
             common_super = &reg_types_.JavaLangThrowable(false);
           } else {
             const RegType& exception = ResolveClassAndCheckAccess(iterator.GetHandlerTypeIndex());
@@ -3725,7 +3780,8 @@
   } else if (method_type == METHOD_SUPER && is_interface) {
     return kInterfaceMethodResolution;
   } else {
-    DCHECK(method_type == METHOD_VIRTUAL || method_type == METHOD_SUPER);
+    DCHECK(method_type == METHOD_VIRTUAL || method_type == METHOD_SUPER
+           || method_type == METHOD_POLYMORPHIC);
     return kVirtualMethodResolution;
   }
 }
@@ -3853,15 +3909,18 @@
     return nullptr;
   }
   // See if the method type implied by the invoke instruction matches the access flags for the
-  // target method.
+  // target method. The flags for METHOD_POLYMORPHIC are based on there being precisely two
+  // signature polymorphic methods supported by the run-time which are native methods with variable
+  // arguments.
   if ((method_type == METHOD_DIRECT && (!res_method->IsDirect() || res_method->IsStatic())) ||
       (method_type == METHOD_STATIC && !res_method->IsStatic()) ||
       ((method_type == METHOD_SUPER ||
         method_type == METHOD_VIRTUAL ||
-        method_type == METHOD_INTERFACE) && res_method->IsDirect())
-      ) {
+        method_type == METHOD_INTERFACE) && res_method->IsDirect()) ||
+      ((method_type == METHOD_POLYMORPHIC) &&
+       (!res_method->IsNative() || !res_method->IsVarargs()))) {
     Fail(VERIFY_ERROR_CLASS_CHANGE) << "invoke type (" << method_type << ") does not match method "
-                                       " type of " << res_method->PrettyMethod();
+                                       "type of " << res_method->PrettyMethod();
     return nullptr;
   }
   return res_method;
@@ -3873,20 +3932,18 @@
   // We use vAA as our expected arg count, rather than res_method->insSize, because we need to
   // match the call to the signature. Also, we might be calling through an abstract method
   // definition (which doesn't have register count values).
-  const size_t expected_args = (is_range) ? inst->VRegA_3rc() : inst->VRegA_35c();
+  const size_t expected_args = inst->VRegA();
   /* caught by static verifier */
   DCHECK(is_range || expected_args <= 5);
-  if (expected_args > code_item_->outs_size_) {
-    Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "invalid argument count (" << expected_args
-        << ") exceeds outsSize (" << code_item_->outs_size_ << ")";
-    return nullptr;
-  }
 
-  uint32_t arg[5];
-  if (!is_range) {
-    inst->GetVarArgs(arg);
+  // TODO(oth): Enable this path for invoke-polymorphic when b/33099829 is resolved.
+  if (method_type != METHOD_POLYMORPHIC) {
+    if (expected_args > code_item_->outs_size_) {
+      Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "invalid argument count (" << expected_args
+                                        << ") exceeds outsSize (" << code_item_->outs_size_ << ")";
+      return nullptr;
+    }
   }
-  uint32_t sig_registers = 0;
 
   /*
    * Check the "this" argument, which must be an instance of the class that declared the method.
@@ -3894,7 +3951,7 @@
    * rigorous check here (which is okay since we have to do it at runtime).
    */
   if (method_type != METHOD_STATIC) {
-    const RegType& actual_arg_type = work_line_->GetInvocationThis(this, inst, is_range);
+    const RegType& actual_arg_type = work_line_->GetInvocationThis(this, inst);
     if (actual_arg_type.IsConflict()) {  // GetInvocationThis failed.
       CHECK(have_pending_hard_failure_);
       return nullptr;
@@ -3930,8 +3987,8 @@
         res_method_class = &FromClass(klass->GetDescriptor(&temp), klass,
                                       klass->CannotBeAssignedFromOtherTypes());
       } else {
-        const uint32_t method_idx = (is_range) ? inst->VRegB_3rc() : inst->VRegB_35c();
-        const uint16_t class_idx = dex_file_->GetMethodId(method_idx).class_idx_;
+        const uint32_t method_idx = inst->VRegB();
+        const dex::TypeIndex class_idx = dex_file_->GetMethodId(method_idx).class_idx_;
         res_method_class = &reg_types_.FromDescriptor(
             GetClassLoader(),
             dex_file_->StringByTypeIdx(class_idx),
@@ -3950,13 +4007,17 @@
         }
       }
     }
-    sig_registers = 1;
   }
 
+  uint32_t arg[5];
+  if (!is_range) {
+    inst->GetVarArgs(arg);
+  }
+  uint32_t sig_registers = (method_type == METHOD_STATIC) ? 0 : 1;
   for ( ; it->HasNext(); it->Next()) {
     if (sig_registers >= expected_args) {
       Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "Rejecting invocation, expected " << inst->VRegA() <<
-          " arguments, found " << sig_registers << " or more.";
+          " argument registers, method signature has " << sig_registers + 1 << " or more";
       return nullptr;
     }
 
@@ -3969,7 +4030,7 @@
     }
 
     const RegType& reg_type = reg_types_.FromDescriptor(GetClassLoader(), param_descriptor, false);
-    uint32_t get_reg = is_range ? inst->VRegC_3rc() + static_cast<uint32_t>(sig_registers) :
+    uint32_t get_reg = is_range ? inst->VRegC() + static_cast<uint32_t>(sig_registers) :
         arg[sig_registers];
     if (reg_type.IsIntegralTypes()) {
       const RegType& src_type = work_line_->GetRegisterType(this, get_reg);
@@ -4005,7 +4066,7 @@
   }
   if (expected_args != sig_registers) {
     Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "Rejecting invocation, expected " << expected_args <<
-        " arguments, found " << sig_registers;
+        " argument registers, method signature has " << sig_registers;
     return nullptr;
   }
   return res_method;
@@ -4017,11 +4078,10 @@
   // As the method may not have been resolved, make this static check against what we expect.
   // The main reason for this code block is to fail hard when we find an illegal use, e.g.,
   // wrong number of arguments or wrong primitive types, even if the method could not be resolved.
-  const uint32_t method_idx = (is_range) ? inst->VRegB_3rc() : inst->VRegB_35c();
+  const uint32_t method_idx = inst->VRegB();
   DexFileParameterIterator it(*dex_file_,
                               dex_file_->GetProtoId(dex_file_->GetMethodId(method_idx).proto_idx_));
-  VerifyInvocationArgsFromIterator<DexFileParameterIterator>(&it, inst, method_type, is_range,
-                                                             nullptr);
+  VerifyInvocationArgsFromIterator(&it, inst, method_type, is_range, nullptr);
 }
 
 class MethodParamListDescriptorIterator {
@@ -4054,8 +4114,7 @@
     const Instruction* inst, MethodType method_type, bool is_range) {
   // Resolve the method. This could be an abstract or concrete method depending on what sort of call
   // we're making.
-  const uint32_t method_idx = (is_range) ? inst->VRegB_3rc() : inst->VRegB_35c();
-
+  const uint32_t method_idx = inst->VRegB();
   ArtMethod* res_method = ResolveMethodAndCheckAccess(method_idx, method_type);
   if (res_method == nullptr) {  // error or class is unresolved
     // Check what we can statically.
@@ -4068,7 +4127,7 @@
   // If we're using invoke-super(method), make sure that the executing method's class' superclass
   // has a vtable entry for the target method. Or the target is on a interface.
   if (method_type == METHOD_SUPER) {
-    uint16_t class_idx = dex_file_->GetMethodId(method_idx).class_idx_;
+    dex::TypeIndex class_idx = dex_file_->GetMethodId(method_idx).class_idx_;
     const RegType& reference_type = reg_types_.FromDescriptor(
         GetClassLoader(),
         dex_file_->StringByTypeIdx(class_idx),
@@ -4118,10 +4177,84 @@
     }
   }
 
-  // Process the target method's signature. This signature may or may not
-  MethodParamListDescriptorIterator it(res_method);
-  return VerifyInvocationArgsFromIterator<MethodParamListDescriptorIterator>(&it, inst, method_type,
-                                                                             is_range, res_method);
+  if (method_type == METHOD_POLYMORPHIC) {
+    // Process the signature of the calling site that is invoking the method handle.
+    DexFileParameterIterator it(*dex_file_, dex_file_->GetProtoId(inst->VRegH()));
+    return VerifyInvocationArgsFromIterator(&it, inst, method_type, is_range, res_method);
+  } else {
+    // Process the target method's signature.
+    MethodParamListDescriptorIterator it(res_method);
+    return VerifyInvocationArgsFromIterator(&it, inst, method_type, is_range, res_method);
+  }
+}
+
+bool MethodVerifier::CheckSignaturePolymorphicMethod(ArtMethod* method) {
+  mirror::Class* klass = method->GetDeclaringClass();
+  if (klass != mirror::MethodHandle::StaticClass()) {
+    Fail(VERIFY_ERROR_BAD_CLASS_HARD)
+        << "Signature polymorphic method must be declared in java.lang.invoke.MethodClass";
+    return false;
+  }
+
+  const char* method_name = method->GetName();
+  if (strcmp(method_name, "invoke") != 0 && strcmp(method_name, "invokeExact") != 0) {
+    Fail(VERIFY_ERROR_BAD_CLASS_HARD)
+        << "Signature polymorphic method name invalid: " << method_name;
+    return false;
+  }
+
+  const DexFile::TypeList* types = method->GetParameterTypeList();
+  if (types->Size() != 1) {
+    Fail(VERIFY_ERROR_BAD_CLASS_HARD)
+        << "Signature polymorphic method has too many arguments " << types->Size() << " != 1";
+    return false;
+  }
+
+  const dex::TypeIndex argument_type_index = types->GetTypeItem(0).type_idx_;
+  const char* argument_descriptor = method->GetTypeDescriptorFromTypeIdx(argument_type_index);
+  if (strcmp(argument_descriptor, "[Ljava/lang/Object;") != 0) {
+    Fail(VERIFY_ERROR_BAD_CLASS_HARD)
+        << "Signature polymorphic method has unexpected argument type: " << argument_descriptor;
+    return false;
+  }
+
+  const char* return_descriptor = method->GetReturnTypeDescriptor();
+  if (strcmp(return_descriptor, "Ljava/lang/Object;") != 0) {
+    Fail(VERIFY_ERROR_BAD_CLASS_HARD)
+        << "Signature polymorphic method has unexpected return type: " << return_descriptor;
+    return false;
+  }
+
+  return true;
+}
+
+bool MethodVerifier::CheckSignaturePolymorphicReceiver(const Instruction* inst) {
+  const RegType& this_type = work_line_->GetInvocationThis(this, inst);
+  if (this_type.IsZero()) {
+    /* null pointer always passes (and always fails at run time) */
+    return true;
+  } else if (!this_type.IsNonZeroReferenceTypes()) {
+    Fail(VERIFY_ERROR_BAD_CLASS_HARD)
+        << "invoke-polymorphic receiver is not a reference: "
+        << this_type;
+    return false;
+  } else if (this_type.IsUninitializedReference()) {
+    Fail(VERIFY_ERROR_BAD_CLASS_HARD)
+        << "invoke-polymorphic receiver is uninitialized: "
+        << this_type;
+    return false;
+  } else if (!this_type.HasClass()) {
+    Fail(VERIFY_ERROR_BAD_CLASS_HARD)
+        << "invoke-polymorphic receiver has no class: "
+        << this_type;
+    return false;
+  } else if (!this_type.GetClass()->IsSubClass(mirror::MethodHandle::StaticClass())) {
+    Fail(VERIFY_ERROR_BAD_CLASS_HARD)
+        << "invoke-polymorphic receiver is not a subclass of MethodHandle: "
+        << this_type;
+    return false;
+  }
+  return true;
 }
 
 ArtMethod* MethodVerifier::GetQuickInvokedMethod(const Instruction* inst, RegisterLine* reg_line,
@@ -4131,7 +4264,7 @@
   } else {
     DCHECK_EQ(inst->Opcode(), Instruction::INVOKE_VIRTUAL_QUICK);
   }
-  const RegType& actual_arg_type = reg_line->GetInvocationThis(this, inst, is_range, allow_failure);
+  const RegType& actual_arg_type = reg_line->GetInvocationThis(this, inst, allow_failure);
   if (!actual_arg_type.HasClass()) {
     VLOG(verifier) << "Failed to get mirror::Class* from '" << actual_arg_type << "'";
     return nullptr;
@@ -4193,7 +4326,7 @@
   // We use vAA as our expected arg count, rather than res_method->insSize, because we need to
   // match the call to the signature. Also, we might be calling through an abstract method
   // definition (which doesn't have register count values).
-  const RegType& actual_arg_type = work_line_->GetInvocationThis(this, inst, is_range);
+  const RegType& actual_arg_type = work_line_->GetInvocationThis(this, inst);
   if (actual_arg_type.IsConflict()) {  // GetInvocationThis failed.
     return nullptr;
   }
@@ -4277,16 +4410,16 @@
 }
 
 void MethodVerifier::VerifyNewArray(const Instruction* inst, bool is_filled, bool is_range) {
-  uint32_t type_idx;
+  dex::TypeIndex type_idx;
   if (!is_filled) {
     DCHECK_EQ(inst->Opcode(), Instruction::NEW_ARRAY);
-    type_idx = inst->VRegC_22c();
+    type_idx = dex::TypeIndex(inst->VRegC_22c());
   } else if (!is_range) {
     DCHECK_EQ(inst->Opcode(), Instruction::FILLED_NEW_ARRAY);
-    type_idx = inst->VRegB_35c();
+    type_idx = dex::TypeIndex(inst->VRegB_35c());
   } else {
     DCHECK_EQ(inst->Opcode(), Instruction::FILLED_NEW_ARRAY_RANGE);
-    type_idx = inst->VRegB_3rc();
+    type_idx = dex::TypeIndex(inst->VRegB_3rc());
   }
   const RegType& res_type = ResolveClassAndCheckAccess(type_idx);
   if (res_type.IsConflict()) {  // bad class
@@ -5001,7 +5134,7 @@
     if (return_type_ == nullptr) {
       const DexFile::MethodId& method_id = dex_file_->GetMethodId(dex_method_idx_);
       const DexFile::ProtoId& proto_id = dex_file_->GetMethodPrototype(method_id);
-      uint16_t return_type_idx = proto_id.return_type_idx_;
+      dex::TypeIndex return_type_idx = proto_id.return_type_idx_;
       const char* descriptor = dex_file_->GetTypeDescriptor(dex_file_->GetTypeId(return_type_idx));
       return_type_ = &reg_types_.FromDescriptor(GetClassLoader(), descriptor, false);
     }
diff --git a/runtime/verifier/method_verifier.h b/runtime/verifier/method_verifier.h
index eb8b7a6..fa5a698 100644
--- a/runtime/verifier/method_verifier.h
+++ b/runtime/verifier/method_verifier.h
@@ -27,6 +27,7 @@
 #include "base/stl_util.h"
 #include "base/value_object.h"
 #include "dex_file.h"
+#include "dex_file_types.h"
 #include "handle.h"
 #include "instruction_flags.h"
 #include "method_reference.h"
@@ -62,7 +63,8 @@
   METHOD_STATIC,      // static
   METHOD_VIRTUAL,     // virtual
   METHOD_SUPER,       // super
-  METHOD_INTERFACE    // interface
+  METHOD_INTERFACE,   // interface
+  METHOD_POLYMORPHIC  // polymorphic
 };
 std::ostream& operator<<(std::ostream& os, const MethodType& rhs);
 
@@ -261,7 +263,7 @@
     return have_any_pending_runtime_throw_failure_;
   }
 
-  const RegType& ResolveCheckedClass(uint32_t class_idx)
+  const RegType& ResolveCheckedClass(dex::TypeIndex class_idx)
       REQUIRES_SHARED(Locks::mutator_lock_);
   // Returns the method of a quick invoke or null if it cannot be found.
   ArtMethod* GetQuickInvokedMethod(const Instruction* inst, RegisterLine* reg_line,
@@ -318,7 +320,7 @@
   void PrependToLastFailMessage(std::string);
 
   // Adds the given string to the end of the last failure message.
-  void AppendToLastFailMessage(std::string);
+  void AppendToLastFailMessage(const std::string& append);
 
   // Verification result for method(s). Includes a (maximum) failure kind, and (the union of)
   // all failure types.
@@ -471,18 +473,22 @@
 
   // Perform static checks on a "new-instance" instruction. Specifically, make sure the class
   // reference isn't for an array class.
-  bool CheckNewInstance(uint32_t idx);
+  bool CheckNewInstance(dex::TypeIndex idx);
+
+  // Perform static checks on a prototype indexing instruction. All we do here is ensure that the
+  // prototype index is in the valid range.
+  bool CheckPrototypeIndex(uint32_t idx);
 
   /* Ensure that the string index is in the valid range. */
   bool CheckStringIndex(uint32_t idx);
 
   // Perform static checks on an instruction that takes a class constant. Ensure that the class
   // index is in the valid range.
-  bool CheckTypeIndex(uint32_t idx);
+  bool CheckTypeIndex(dex::TypeIndex idx);
 
   // Perform static checks on a "new-array" instruction. Specifically, make sure they aren't
   // creating an array of arrays that causes the number of dimensions to exceed 255.
-  bool CheckNewArray(uint32_t idx);
+  bool CheckNewArray(dex::TypeIndex idx);
 
   // Verify an array data table. "cur_offset" is the offset of the fill-array-data instruction.
   bool CheckArrayData(uint32_t cur_offset);
@@ -511,6 +517,12 @@
   // - vA holds word count, vC holds index of first reg.
   bool CheckVarArgRangeRegs(uint32_t vA, uint32_t vC);
 
+  // Checks the method matches the expectations required to be signature polymorphic.
+  bool CheckSignaturePolymorphicMethod(ArtMethod* method) REQUIRES_SHARED(Locks::mutator_lock_);
+
+  // Checks the invoked receiver matches the expectations for signature polymorphic methods.
+  bool CheckSignaturePolymorphicReceiver(const Instruction* inst) REQUIRES_SHARED(Locks::mutator_lock_);
+
   // Extract the relative offset from a branch instruction.
   // Returns "false" on failure (e.g. this isn't a branch instruction).
   bool GetBranchOffset(uint32_t cur_offset, int32_t* pOffset, bool* pConditional,
@@ -625,7 +637,7 @@
 
   // Resolves a class based on an index and performs access checks to ensure the referrer can
   // access the resolved class.
-  const RegType& ResolveClassAndCheckAccess(uint32_t class_idx)
+  const RegType& ResolveClassAndCheckAccess(dex::TypeIndex class_idx)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   /*
diff --git a/runtime/verifier/register_line.cc b/runtime/verifier/register_line.cc
index 4ec2da6..a6088aa 100644
--- a/runtime/verifier/register_line.cc
+++ b/runtime/verifier/register_line.cc
@@ -44,8 +44,9 @@
 }
 
 const RegType& RegisterLine::GetInvocationThis(MethodVerifier* verifier, const Instruction* inst,
-                                               bool is_range, bool allow_failure) {
-  const size_t args_count = is_range ? inst->VRegA_3rc() : inst->VRegA_35c();
+                                               bool allow_failure) {
+  DCHECK(inst->IsInvoke());
+  const size_t args_count = inst->VRegA();
   if (args_count < 1) {
     if (!allow_failure) {
       verifier->Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "invoke lacks 'this'";
@@ -53,7 +54,7 @@
     return verifier->GetRegTypeCache()->Conflict();
   }
   /* Get the element type of the array held in vsrc */
-  const uint32_t this_reg = (is_range) ? inst->VRegC_3rc() : inst->VRegC_35c();
+  const uint32_t this_reg = inst->VRegC();
   const RegType& this_type = GetRegisterType(verifier, this_reg);
   if (!this_type.IsReferenceTypes()) {
     if (!allow_failure) {
@@ -411,7 +412,7 @@
   }
 
   // Scan the map for the same value.
-  for (const std::pair<uint32_t, uint32_t>& pair : search_map) {
+  for (const std::pair<const uint32_t, uint32_t>& pair : search_map) {
     if (pair.first != src && pair.second == src_lock_levels) {
       return true;
     }
diff --git a/runtime/verifier/register_line.h b/runtime/verifier/register_line.h
index 7603a79..221aa80 100644
--- a/runtime/verifier/register_line.h
+++ b/runtime/verifier/register_line.h
@@ -217,7 +217,6 @@
    */
   const RegType& GetInvocationThis(MethodVerifier* verifier,
                                    const Instruction* inst,
-                                   bool is_range,
                                    bool allow_failure = false)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
diff --git a/runtime/verifier/verifier_deps.cc b/runtime/verifier/verifier_deps.cc
index 4d1e337..f9bff23 100644
--- a/runtime/verifier/verifier_deps.cc
+++ b/runtime/verifier/verifier_deps.cc
@@ -16,6 +16,9 @@
 
 #include "verifier_deps.h"
 
+#include <cstring>
+
+#include "base/stl_util.h"
 #include "compiler_callbacks.h"
 #include "leb128.h"
 #include "mirror/class-inl.h"
@@ -26,7 +29,6 @@
 namespace verifier {
 
 VerifierDeps::VerifierDeps(const std::vector<const DexFile*>& dex_files) {
-  MutexLock mu(Thread::Current(), *Locks::verifier_deps_lock_);
   for (const DexFile* dex_file : dex_files) {
     DCHECK(GetDexFileDeps(*dex_file) == nullptr);
     std::unique_ptr<DexFileDeps> deps(new DexFileDeps());
@@ -34,11 +36,38 @@
   }
 }
 
+void VerifierDeps::MergeWith(const VerifierDeps& other,
+                             const std::vector<const DexFile*>& dex_files) {
+  DCHECK(dex_deps_.size() == other.dex_deps_.size());
+  for (const DexFile* dex_file : dex_files) {
+    DexFileDeps* my_deps = GetDexFileDeps(*dex_file);
+    const DexFileDeps& other_deps = *other.GetDexFileDeps(*dex_file);
+    // We currently collect extra strings only on the main `VerifierDeps`,
+    // which should be the one passed as `this` in this method.
+    DCHECK(other_deps.strings_.empty());
+    MergeSets(my_deps->assignable_types_, other_deps.assignable_types_);
+    MergeSets(my_deps->unassignable_types_, other_deps.unassignable_types_);
+    MergeSets(my_deps->classes_, other_deps.classes_);
+    MergeSets(my_deps->fields_, other_deps.fields_);
+    MergeSets(my_deps->direct_methods_, other_deps.direct_methods_);
+    MergeSets(my_deps->virtual_methods_, other_deps.virtual_methods_);
+    MergeSets(my_deps->interface_methods_, other_deps.interface_methods_);
+    for (dex::TypeIndex entry : other_deps.unverified_classes_) {
+      my_deps->unverified_classes_.push_back(entry);
+    }
+  }
+}
+
 VerifierDeps::DexFileDeps* VerifierDeps::GetDexFileDeps(const DexFile& dex_file) {
   auto it = dex_deps_.find(&dex_file);
   return (it == dex_deps_.end()) ? nullptr : it->second.get();
 }
 
+const VerifierDeps::DexFileDeps* VerifierDeps::GetDexFileDeps(const DexFile& dex_file) const {
+  auto it = dex_deps_.find(&dex_file);
+  return (it == dex_deps_.end()) ? nullptr : it->second.get();
+}
+
 template <typename T>
 uint16_t VerifierDeps::GetAccessFlags(T* element) {
   static_assert(kAccJavaFlagsMask == 0xFFFF, "Unexpected value of a constant");
@@ -51,20 +80,117 @@
   }
 }
 
-template <typename T>
-uint32_t VerifierDeps::GetDeclaringClassStringId(const DexFile& dex_file, T* element) {
-  static_assert(kAccJavaFlagsMask == 0xFFFF, "Unexpected value of a constant");
-  if (element == nullptr) {
-    return VerifierDeps::kUnresolvedMarker;
-  } else {
-    std::string temp;
-    uint32_t string_id = GetIdFromString(
-        dex_file, element->GetDeclaringClass()->GetDescriptor(&temp));
-    return string_id;
+dex::StringIndex VerifierDeps::GetClassDescriptorStringId(const DexFile& dex_file,
+                                                          ObjPtr<mirror::Class> klass) {
+  DCHECK(klass != nullptr);
+  ObjPtr<mirror::DexCache> dex_cache = klass->GetDexCache();
+  // Array and proxy classes do not have a dex cache.
+  if (!klass->IsArrayClass() && !klass->IsProxyClass()) {
+    DCHECK(dex_cache != nullptr) << klass->PrettyClass();
+    if (dex_cache->GetDexFile() == &dex_file) {
+      // FindStringId is slow, try to go through the class def if we have one.
+      const DexFile::ClassDef* class_def = klass->GetClassDef();
+      DCHECK(class_def != nullptr) << klass->PrettyClass();
+      const DexFile::TypeId& type_id = dex_file.GetTypeId(class_def->class_idx_);
+      if (kIsDebugBuild) {
+        std::string temp;
+        CHECK_EQ(GetIdFromString(dex_file, klass->GetDescriptor(&temp)), type_id.descriptor_idx_);
+      }
+      return type_id.descriptor_idx_;
+    }
   }
+  std::string temp;
+  return GetIdFromString(dex_file, klass->GetDescriptor(&temp));
 }
 
-uint32_t VerifierDeps::GetIdFromString(const DexFile& dex_file, const std::string& str) {
+// Try to find the string descriptor of the class. type_idx is a best guess of a matching string id.
+static dex::StringIndex TryGetClassDescriptorStringId(const DexFile& dex_file,
+                                                      dex::TypeIndex type_idx,
+                                                      ObjPtr<mirror::Class> klass)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  if (!klass->IsArrayClass()) {
+    const DexFile::TypeId& type_id = dex_file.GetTypeId(type_idx);
+    const DexFile& klass_dex = klass->GetDexFile();
+    const DexFile::TypeId& klass_type_id = klass_dex.GetTypeId(klass->GetClassDef()->class_idx_);
+    if (strcmp(dex_file.GetTypeDescriptor(type_id),
+               klass_dex.GetTypeDescriptor(klass_type_id)) == 0) {
+      return type_id.descriptor_idx_;
+    }
+  }
+  return dex::StringIndex::Invalid();
+}
+
+dex::StringIndex VerifierDeps::GetMethodDeclaringClassStringId(const DexFile& dex_file,
+                                                               uint32_t dex_method_index,
+                                                               ArtMethod* method) {
+  static_assert(kAccJavaFlagsMask == 0xFFFF, "Unexpected value of a constant");
+  if (method == nullptr) {
+    return dex::StringIndex(VerifierDeps::kUnresolvedMarker);
+  }
+  const dex::StringIndex string_id = TryGetClassDescriptorStringId(
+      dex_file,
+      dex_file.GetMethodId(dex_method_index).class_idx_,
+      method->GetDeclaringClass());
+  if (string_id.IsValid()) {
+    // Got lucky using the original dex file, return based on the input dex file.
+    DCHECK_EQ(GetClassDescriptorStringId(dex_file, method->GetDeclaringClass()), string_id);
+    return string_id;
+  }
+  return GetClassDescriptorStringId(dex_file, method->GetDeclaringClass());
+}
+
+dex::StringIndex VerifierDeps::GetFieldDeclaringClassStringId(const DexFile& dex_file,
+                                                              uint32_t dex_field_idx,
+                                                              ArtField* field) {
+  static_assert(kAccJavaFlagsMask == 0xFFFF, "Unexpected value of a constant");
+  if (field == nullptr) {
+    return dex::StringIndex(VerifierDeps::kUnresolvedMarker);
+  }
+  const dex::StringIndex string_id = TryGetClassDescriptorStringId(
+      dex_file,
+      dex_file.GetFieldId(dex_field_idx).class_idx_,
+      field->GetDeclaringClass());
+  if (string_id.IsValid()) {
+    // Got lucky using the original dex file, return based on the input dex file.
+    DCHECK_EQ(GetClassDescriptorStringId(dex_file, field->GetDeclaringClass()), string_id);
+    return string_id;
+  }
+  return GetClassDescriptorStringId(dex_file, field->GetDeclaringClass());
+}
+
+static inline VerifierDeps* GetMainVerifierDeps() {
+  // The main VerifierDeps is the one set in the compiler callbacks, which at the
+  // end of verification will have all the per-thread VerifierDeps merged into it.
+  CompilerCallbacks* callbacks = Runtime::Current()->GetCompilerCallbacks();
+  if (callbacks == nullptr) {
+    return nullptr;
+  }
+  return callbacks->GetVerifierDeps();
+}
+
+static inline VerifierDeps* GetThreadLocalVerifierDeps() {
+  // During AOT, each thread has its own VerifierDeps, to avoid lock contention. At the end
+  // of full verification, these VerifierDeps will be merged into the main one.
+  if (!Runtime::Current()->IsAotCompiler()) {
+    return nullptr;
+  }
+  return Thread::Current()->GetVerifierDeps();
+}
+
+static bool FindExistingStringId(const std::vector<std::string>& strings,
+                                 const std::string& str,
+                                 uint32_t* found_id) {
+  uint32_t num_extra_ids = strings.size();
+  for (size_t i = 0; i < num_extra_ids; ++i) {
+    if (strings[i] == str) {
+      *found_id = i;
+      return true;
+    }
+  }
+  return false;
+}
+
+dex::StringIndex VerifierDeps::GetIdFromString(const DexFile& dex_file, const std::string& str) {
   const DexFile::StringId* string_id = dex_file.FindStringId(str.c_str());
   if (string_id != nullptr) {
     // String is in the DEX file. Return its ID.
@@ -74,41 +200,49 @@
   // String is not in the DEX file. Assign a new ID to it which is higher than
   // the number of strings in the DEX file.
 
-  DexFileDeps* deps = GetDexFileDeps(dex_file);
+  // We use the main `VerifierDeps` for adding new strings to simplify
+  // synchronization/merging of these entries between threads.
+  VerifierDeps* singleton = GetMainVerifierDeps();
+  DexFileDeps* deps = singleton->GetDexFileDeps(dex_file);
   DCHECK(deps != nullptr);
 
   uint32_t num_ids_in_dex = dex_file.NumStringIds();
-  uint32_t num_extra_ids = deps->strings_.size();
+  uint32_t found_id;
 
-  for (size_t i = 0; i < num_extra_ids; ++i) {
-    if (deps->strings_[i] == str) {
-      return num_ids_in_dex + i;
+  {
+    ReaderMutexLock mu(Thread::Current(), *Locks::verifier_deps_lock_);
+    if (FindExistingStringId(deps->strings_, str, &found_id)) {
+      return dex::StringIndex(num_ids_in_dex + found_id);
     }
   }
-
-  deps->strings_.push_back(str);
-
-  uint32_t new_id = num_ids_in_dex + num_extra_ids;
-  CHECK_GE(new_id, num_ids_in_dex);  // check for overflows
-  DCHECK_EQ(str, GetStringFromId(dex_file, new_id));
-
-  return new_id;
-}
-
-std::string VerifierDeps::GetStringFromId(const DexFile& dex_file, uint32_t string_id) {
-  uint32_t num_ids_in_dex = dex_file.NumStringIds();
-  if (string_id < num_ids_in_dex) {
-    return std::string(dex_file.StringDataByIdx(string_id));
-  } else {
-    DexFileDeps* deps = GetDexFileDeps(dex_file);
-    DCHECK(deps != nullptr);
-    string_id -= num_ids_in_dex;
-    CHECK_LT(string_id, deps->strings_.size());
-    return deps->strings_[string_id];
+  {
+    WriterMutexLock mu(Thread::Current(), *Locks::verifier_deps_lock_);
+    if (FindExistingStringId(deps->strings_, str, &found_id)) {
+      return dex::StringIndex(num_ids_in_dex + found_id);
+    }
+    deps->strings_.push_back(str);
+    dex::StringIndex new_id(num_ids_in_dex + deps->strings_.size() - 1);
+    CHECK_GE(new_id.index_, num_ids_in_dex);  // check for overflows
+    DCHECK_EQ(str, singleton->GetStringFromId(dex_file, new_id));
+    return new_id;
   }
 }
 
-bool VerifierDeps::IsInClassPath(ObjPtr<mirror::Class> klass) {
+std::string VerifierDeps::GetStringFromId(const DexFile& dex_file, dex::StringIndex string_id)
+    const {
+  uint32_t num_ids_in_dex = dex_file.NumStringIds();
+  if (string_id.index_ < num_ids_in_dex) {
+    return std::string(dex_file.StringDataByIdx(string_id));
+  } else {
+    const DexFileDeps* deps = GetDexFileDeps(dex_file);
+    DCHECK(deps != nullptr);
+    string_id.index_ -= num_ids_in_dex;
+    CHECK_LT(string_id.index_, deps->strings_.size());
+    return deps->strings_[string_id.index_];
+  }
+}
+
+bool VerifierDeps::IsInClassPath(ObjPtr<mirror::Class> klass) const {
   DCHECK(klass != nullptr);
 
   ObjPtr<mirror::DexCache> dex_cache = klass->GetDexCache();
@@ -132,7 +266,7 @@
 }
 
 void VerifierDeps::AddClassResolution(const DexFile& dex_file,
-                                      uint16_t type_idx,
+                                      dex::TypeIndex type_idx,
                                       mirror::Class* klass) {
   DexFileDeps* dex_deps = GetDexFileDeps(dex_file);
   if (dex_deps == nullptr) {
@@ -146,7 +280,6 @@
     return;
   }
 
-  MutexLock mu(Thread::Current(), *Locks::verifier_deps_lock_);
   dex_deps->classes_.emplace(ClassResolution(type_idx, GetAccessFlags(klass)));
 }
 
@@ -165,9 +298,11 @@
     return;
   }
 
-  MutexLock mu(Thread::Current(), *Locks::verifier_deps_lock_);
-  dex_deps->fields_.emplace(FieldResolution(
-      field_idx, GetAccessFlags(field), GetDeclaringClassStringId(dex_file, field)));
+  dex_deps->fields_.emplace(FieldResolution(field_idx,
+                                            GetAccessFlags(field),
+                                            GetFieldDeclaringClassStringId(dex_file,
+                                                                           field_idx,
+                                                                           field)));
 }
 
 void VerifierDeps::AddMethodResolution(const DexFile& dex_file,
@@ -186,10 +321,9 @@
     return;
   }
 
-  MutexLock mu(Thread::Current(), *Locks::verifier_deps_lock_);
   MethodResolution method_tuple(method_idx,
                                 GetAccessFlags(method),
-                                GetDeclaringClassStringId(dex_file, method));
+                                GetMethodDeclaringClassStringId(dex_file, method_idx, method));
   if (resolution_kind == kDirectMethodResolution) {
     dex_deps->direct_methods_.emplace(method_tuple);
   } else if (resolution_kind == kVirtualMethodResolution) {
@@ -255,15 +389,9 @@
     return;
   }
 
-  MutexLock mu(Thread::Current(), *Locks::verifier_deps_lock_);
-
   // Get string IDs for both descriptors and store in the appropriate set.
-
-  std::string temp1, temp2;
-  std::string destination_desc(destination->GetDescriptor(&temp1));
-  std::string source_desc(source->GetDescriptor(&temp2));
-  uint32_t destination_id = GetIdFromString(dex_file, destination_desc);
-  uint32_t source_id = GetIdFromString(dex_file, source_desc);
+  dex::StringIndex destination_id = GetClassDescriptorStringId(dex_file, destination);
+  dex::StringIndex source_id = GetClassDescriptorStringId(dex_file, source);
 
   if (is_assignable) {
     dex_deps->assignable_types_.emplace(TypeAssignability(destination_id, source_id));
@@ -272,45 +400,36 @@
   }
 }
 
-static inline VerifierDeps* GetVerifierDepsSingleton() {
-  CompilerCallbacks* callbacks = Runtime::Current()->GetCompilerCallbacks();
-  if (callbacks == nullptr) {
-    return nullptr;
-  }
-  return callbacks->GetVerifierDeps();
-}
-
 void VerifierDeps::MaybeRecordVerificationStatus(const DexFile& dex_file,
-                                                 uint16_t type_idx,
+                                                 dex::TypeIndex type_idx,
                                                  MethodVerifier::FailureKind failure_kind) {
   if (failure_kind == MethodVerifier::kNoFailure) {
     // We only record classes that did not fully verify at compile time.
     return;
   }
 
-  VerifierDeps* singleton = GetVerifierDepsSingleton();
-  if (singleton != nullptr) {
-    DexFileDeps* dex_deps = singleton->GetDexFileDeps(dex_file);
-    MutexLock mu(Thread::Current(), *Locks::verifier_deps_lock_);
+  VerifierDeps* thread_deps = GetThreadLocalVerifierDeps();
+  if (thread_deps != nullptr) {
+    DexFileDeps* dex_deps = thread_deps->GetDexFileDeps(dex_file);
     dex_deps->unverified_classes_.push_back(type_idx);
   }
 }
 
 void VerifierDeps::MaybeRecordClassResolution(const DexFile& dex_file,
-                                              uint16_t type_idx,
+                                              dex::TypeIndex type_idx,
                                               mirror::Class* klass) {
-  VerifierDeps* singleton = GetVerifierDepsSingleton();
-  if (singleton != nullptr) {
-    singleton->AddClassResolution(dex_file, type_idx, klass);
+  VerifierDeps* thread_deps = GetThreadLocalVerifierDeps();
+  if (thread_deps != nullptr) {
+    thread_deps->AddClassResolution(dex_file, type_idx, klass);
   }
 }
 
 void VerifierDeps::MaybeRecordFieldResolution(const DexFile& dex_file,
                                               uint32_t field_idx,
                                               ArtField* field) {
-  VerifierDeps* singleton = GetVerifierDepsSingleton();
-  if (singleton != nullptr) {
-    singleton->AddFieldResolution(dex_file, field_idx, field);
+  VerifierDeps* thread_deps = GetThreadLocalVerifierDeps();
+  if (thread_deps != nullptr) {
+    thread_deps->AddFieldResolution(dex_file, field_idx, field);
   }
 }
 
@@ -318,9 +437,9 @@
                                                uint32_t method_idx,
                                                MethodResolutionKind resolution_kind,
                                                ArtMethod* method) {
-  VerifierDeps* singleton = GetVerifierDepsSingleton();
-  if (singleton != nullptr) {
-    singleton->AddMethodResolution(dex_file, method_idx, resolution_kind, method);
+  VerifierDeps* thread_deps = GetThreadLocalVerifierDeps();
+  if (thread_deps != nullptr) {
+    thread_deps->AddMethodResolution(dex_file, method_idx, resolution_kind, method);
   }
 }
 
@@ -329,42 +448,74 @@
                                             mirror::Class* source,
                                             bool is_strict,
                                             bool is_assignable) {
-  VerifierDeps* singleton = GetVerifierDepsSingleton();
-  if (singleton != nullptr) {
-    singleton->AddAssignability(dex_file, destination, source, is_strict, is_assignable);
+  VerifierDeps* thread_deps = GetThreadLocalVerifierDeps();
+  if (thread_deps != nullptr) {
+    thread_deps->AddAssignability(dex_file, destination, source, is_strict, is_assignable);
   }
 }
 
+namespace {
+
 static inline uint32_t DecodeUint32WithOverflowCheck(const uint8_t** in, const uint8_t* end) {
   CHECK_LT(*in, end);
   return DecodeUnsignedLeb128(in);
 }
 
+template<typename T> inline uint32_t Encode(T in);
+
+template<> inline uint32_t Encode<uint16_t>(uint16_t in) {
+  return in;
+}
+template<> inline uint32_t Encode<uint32_t>(uint32_t in) {
+  return in;
+}
+template<> inline uint32_t Encode<dex::TypeIndex>(dex::TypeIndex in) {
+  return in.index_;
+}
+template<> inline uint32_t Encode<dex::StringIndex>(dex::StringIndex in) {
+  return in.index_;
+}
+
+template<typename T> inline T Decode(uint32_t in);
+
+template<> inline uint16_t Decode<uint16_t>(uint32_t in) {
+  return dchecked_integral_cast<uint16_t>(in);
+}
+template<> inline uint32_t Decode<uint32_t>(uint32_t in) {
+  return in;
+}
+template<> inline dex::TypeIndex Decode<dex::TypeIndex>(uint32_t in) {
+  return dex::TypeIndex(in);
+}
+template<> inline dex::StringIndex Decode<dex::StringIndex>(uint32_t in) {
+  return dex::StringIndex(in);
+}
+
 template<typename T1, typename T2>
 static inline void EncodeTuple(std::vector<uint8_t>* out, const std::tuple<T1, T2>& t) {
-  EncodeUnsignedLeb128(out, std::get<0>(t));
-  EncodeUnsignedLeb128(out, std::get<1>(t));
+  EncodeUnsignedLeb128(out, Encode(std::get<0>(t)));
+  EncodeUnsignedLeb128(out, Encode(std::get<1>(t)));
 }
 
 template<typename T1, typename T2>
 static inline void DecodeTuple(const uint8_t** in, const uint8_t* end, std::tuple<T1, T2>* t) {
-  T1 v1 = static_cast<T1>(DecodeUint32WithOverflowCheck(in, end));
-  T2 v2 = static_cast<T2>(DecodeUint32WithOverflowCheck(in, end));
+  T1 v1 = Decode<T1>(DecodeUint32WithOverflowCheck(in, end));
+  T2 v2 = Decode<T2>(DecodeUint32WithOverflowCheck(in, end));
   *t = std::make_tuple(v1, v2);
 }
 
 template<typename T1, typename T2, typename T3>
 static inline void EncodeTuple(std::vector<uint8_t>* out, const std::tuple<T1, T2, T3>& t) {
-  EncodeUnsignedLeb128(out, std::get<0>(t));
-  EncodeUnsignedLeb128(out, std::get<1>(t));
-  EncodeUnsignedLeb128(out, std::get<2>(t));
+  EncodeUnsignedLeb128(out, Encode(std::get<0>(t)));
+  EncodeUnsignedLeb128(out, Encode(std::get<1>(t)));
+  EncodeUnsignedLeb128(out, Encode(std::get<2>(t)));
 }
 
 template<typename T1, typename T2, typename T3>
 static inline void DecodeTuple(const uint8_t** in, const uint8_t* end, std::tuple<T1, T2, T3>* t) {
-  T1 v1 = static_cast<T1>(DecodeUint32WithOverflowCheck(in, end));
-  T2 v2 = static_cast<T2>(DecodeUint32WithOverflowCheck(in, end));
-  T3 v3 = static_cast<T2>(DecodeUint32WithOverflowCheck(in, end));
+  T1 v1 = Decode<T1>(DecodeUint32WithOverflowCheck(in, end));
+  T2 v2 = Decode<T2>(DecodeUint32WithOverflowCheck(in, end));
+  T3 v3 = Decode<T3>(DecodeUint32WithOverflowCheck(in, end));
   *t = std::make_tuple(v1, v2, v3);
 }
 
@@ -376,11 +527,12 @@
   }
 }
 
+template <typename T>
 static inline void EncodeUint16Vector(std::vector<uint8_t>* out,
-                                      const std::vector<uint16_t>& vector) {
+                                      const std::vector<T>& vector) {
   EncodeUnsignedLeb128(out, vector.size());
-  for (uint16_t entry : vector) {
-    EncodeUnsignedLeb128(out, entry);
+  for (const T& entry : vector) {
+    EncodeUnsignedLeb128(out, Encode(entry));
   }
 }
 
@@ -395,14 +547,16 @@
   }
 }
 
+template<typename T>
 static inline void DecodeUint16Vector(const uint8_t** in,
                                       const uint8_t* end,
-                                      std::vector<uint16_t>* vector) {
+                                      std::vector<T>* vector) {
   DCHECK(vector->empty());
   size_t num_entries = DecodeUint32WithOverflowCheck(in, end);
   vector->reserve(num_entries);
   for (size_t i = 0; i < num_entries; ++i) {
-    vector->push_back(dchecked_integral_cast<uint16_t>(DecodeUint32WithOverflowCheck(in, end)));
+    vector->push_back(
+        Decode<T>(dchecked_integral_cast<uint16_t>(DecodeUint32WithOverflowCheck(in, end))));
   }
 }
 
@@ -431,42 +585,51 @@
   }
 }
 
-void VerifierDeps::Encode(std::vector<uint8_t>* buffer) const {
-  MutexLock mu(Thread::Current(), *Locks::verifier_deps_lock_);
-  for (auto& entry : dex_deps_) {
-    EncodeStringVector(buffer, entry.second->strings_);
-    EncodeSet(buffer, entry.second->assignable_types_);
-    EncodeSet(buffer, entry.second->unassignable_types_);
-    EncodeSet(buffer, entry.second->classes_);
-    EncodeSet(buffer, entry.second->fields_);
-    EncodeSet(buffer, entry.second->direct_methods_);
-    EncodeSet(buffer, entry.second->virtual_methods_);
-    EncodeSet(buffer, entry.second->interface_methods_);
-    EncodeUint16Vector(buffer, entry.second->unverified_classes_);
+}  // namespace
+
+void VerifierDeps::Encode(const std::vector<const DexFile*>& dex_files,
+                          std::vector<uint8_t>* buffer) const {
+  for (const DexFile* dex_file : dex_files) {
+    const DexFileDeps& deps = *GetDexFileDeps(*dex_file);
+    EncodeStringVector(buffer, deps.strings_);
+    EncodeSet(buffer, deps.assignable_types_);
+    EncodeSet(buffer, deps.unassignable_types_);
+    EncodeSet(buffer, deps.classes_);
+    EncodeSet(buffer, deps.fields_);
+    EncodeSet(buffer, deps.direct_methods_);
+    EncodeSet(buffer, deps.virtual_methods_);
+    EncodeSet(buffer, deps.interface_methods_);
+    EncodeUint16Vector(buffer, deps.unverified_classes_);
   }
 }
 
-VerifierDeps::VerifierDeps(const std::vector<const DexFile*>& dex_files, ArrayRef<uint8_t> data)
+VerifierDeps::VerifierDeps(const std::vector<const DexFile*>& dex_files,
+                           ArrayRef<const uint8_t> data)
     : VerifierDeps(dex_files) {
+  if (data.empty()) {
+    // Return eagerly, as the first thing we expect from VerifierDeps data is
+    // the number of created strings, even if there is no dependency.
+    // Currently, only the boot image does not have any VerifierDeps data.
+    return;
+  }
   const uint8_t* data_start = data.data();
   const uint8_t* data_end = data_start + data.size();
-  for (auto& entry : dex_deps_) {
-    DecodeStringVector(&data_start, data_end, &entry.second->strings_);
-    DecodeSet(&data_start, data_end, &entry.second->assignable_types_);
-    DecodeSet(&data_start, data_end, &entry.second->unassignable_types_);
-    DecodeSet(&data_start, data_end, &entry.second->classes_);
-    DecodeSet(&data_start, data_end, &entry.second->fields_);
-    DecodeSet(&data_start, data_end, &entry.second->direct_methods_);
-    DecodeSet(&data_start, data_end, &entry.second->virtual_methods_);
-    DecodeSet(&data_start, data_end, &entry.second->interface_methods_);
-    DecodeUint16Vector(&data_start, data_end, &entry.second->unverified_classes_);
+  for (const DexFile* dex_file : dex_files) {
+    DexFileDeps* deps = GetDexFileDeps(*dex_file);
+    DecodeStringVector(&data_start, data_end, &deps->strings_);
+    DecodeSet(&data_start, data_end, &deps->assignable_types_);
+    DecodeSet(&data_start, data_end, &deps->unassignable_types_);
+    DecodeSet(&data_start, data_end, &deps->classes_);
+    DecodeSet(&data_start, data_end, &deps->fields_);
+    DecodeSet(&data_start, data_end, &deps->direct_methods_);
+    DecodeSet(&data_start, data_end, &deps->virtual_methods_);
+    DecodeSet(&data_start, data_end, &deps->interface_methods_);
+    DecodeUint16Vector(&data_start, data_end, &deps->unverified_classes_);
   }
   CHECK_LE(data_start, data_end);
 }
 
 bool VerifierDeps::Equals(const VerifierDeps& rhs) const {
-  MutexLock mu(Thread::Current(), *Locks::verifier_deps_lock_);
-
   if (dex_deps_.size() != rhs.dex_deps_.size()) {
     return false;
   }
@@ -504,5 +667,355 @@
          (unverified_classes_ == rhs.unverified_classes_);
 }
 
+void VerifierDeps::Dump(VariableIndentationOutputStream* vios) const {
+  for (const auto& dep : dex_deps_) {
+    const DexFile& dex_file = *dep.first;
+    vios->Stream()
+        << "Dependencies of "
+        << dex_file.GetLocation()
+        << ":\n";
+
+    ScopedIndentation indent(vios);
+
+    for (const std::string& str : dep.second->strings_) {
+      vios->Stream() << "Extra string: " << str << "\n";
+    }
+
+    for (const TypeAssignability& entry : dep.second->assignable_types_) {
+      vios->Stream()
+        << GetStringFromId(dex_file, entry.GetSource())
+        << " must be assignable to "
+        << GetStringFromId(dex_file, entry.GetDestination())
+        << "\n";
+    }
+
+    for (const TypeAssignability& entry : dep.second->unassignable_types_) {
+      vios->Stream()
+        << GetStringFromId(dex_file, entry.GetSource())
+        << " must not be assignable to "
+        << GetStringFromId(dex_file, entry.GetDestination())
+        << "\n";
+    }
+
+    for (const ClassResolution& entry : dep.second->classes_) {
+      vios->Stream()
+          << dex_file.StringByTypeIdx(entry.GetDexTypeIndex())
+          << (entry.IsResolved() ? " must be resolved " : "must not be resolved ")
+          << " with access flags " << std::hex << entry.GetAccessFlags() << std::dec
+          << "\n";
+    }
+
+    for (const FieldResolution& entry : dep.second->fields_) {
+      const DexFile::FieldId& field_id = dex_file.GetFieldId(entry.GetDexFieldIndex());
+      vios->Stream()
+          << dex_file.GetFieldDeclaringClassDescriptor(field_id) << "->"
+          << dex_file.GetFieldName(field_id) << ":"
+          << dex_file.GetFieldTypeDescriptor(field_id)
+          << " is expected to be ";
+      if (!entry.IsResolved()) {
+        vios->Stream() << "unresolved\n";
+      } else {
+        vios->Stream()
+          << "in class "
+          << GetStringFromId(dex_file, entry.GetDeclaringClassIndex())
+          << ", and have the access flags " << std::hex << entry.GetAccessFlags() << std::dec
+          << "\n";
+      }
+    }
+
+    for (const auto& entry :
+            { std::make_pair(kDirectMethodResolution, dep.second->direct_methods_),
+              std::make_pair(kVirtualMethodResolution, dep.second->virtual_methods_),
+              std::make_pair(kInterfaceMethodResolution, dep.second->interface_methods_) }) {
+      for (const MethodResolution& method : entry.second) {
+        const DexFile::MethodId& method_id = dex_file.GetMethodId(method.GetDexMethodIndex());
+        vios->Stream()
+            << dex_file.GetMethodDeclaringClassDescriptor(method_id) << "->"
+            << dex_file.GetMethodName(method_id)
+            << dex_file.GetMethodSignature(method_id).ToString()
+            << " is expected to be ";
+        if (!method.IsResolved()) {
+          vios->Stream() << "unresolved\n";
+        } else {
+          vios->Stream()
+            << "in class "
+            << GetStringFromId(dex_file, method.GetDeclaringClassIndex())
+            << ", have the access flags " << std::hex << method.GetAccessFlags() << std::dec
+            << ", and be of kind " << entry.first
+            << "\n";
+        }
+      }
+    }
+
+    for (dex::TypeIndex type_index : dep.second->unverified_classes_) {
+      vios->Stream()
+          << dex_file.StringByTypeIdx(type_index)
+          << " is expected to be verified at runtime\n";
+    }
+  }
+}
+
+bool VerifierDeps::ValidateDependencies(Handle<mirror::ClassLoader> class_loader,
+                                        Thread* self) const {
+  for (const auto& entry : dex_deps_) {
+    if (!VerifyDexFile(class_loader, *entry.first, *entry.second, self)) {
+      return false;
+    }
+  }
+  return true;
+}
+
+// TODO: share that helper with other parts of the compiler that have
+// the same lookup pattern.
+static mirror::Class* FindClassAndClearException(ClassLinker* class_linker,
+                                                 Thread* self,
+                                                 const char* name,
+                                                 Handle<mirror::ClassLoader> class_loader)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  mirror::Class* result = class_linker->FindClass(self, name, class_loader);
+  if (result == nullptr) {
+    DCHECK(self->IsExceptionPending());
+    self->ClearException();
+  }
+  return result;
+}
+
+bool VerifierDeps::VerifyAssignability(Handle<mirror::ClassLoader> class_loader,
+                                       const DexFile& dex_file,
+                                       const std::set<TypeAssignability>& assignables,
+                                       bool expected_assignability,
+                                       Thread* self) const {
+  StackHandleScope<2> hs(self);
+  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+  MutableHandle<mirror::Class> source(hs.NewHandle<mirror::Class>(nullptr));
+  MutableHandle<mirror::Class> destination(hs.NewHandle<mirror::Class>(nullptr));
+
+  for (const auto& entry : assignables) {
+    const std::string& destination_desc = GetStringFromId(dex_file, entry.GetDestination());
+    destination.Assign(
+        FindClassAndClearException(class_linker, self, destination_desc.c_str(), class_loader));
+    const std::string& source_desc = GetStringFromId(dex_file, entry.GetSource());
+    source.Assign(
+        FindClassAndClearException(class_linker, self, source_desc.c_str(), class_loader));
+
+    if (destination.Get() == nullptr) {
+      LOG(INFO) << "VerifiersDeps: Could not resolve class " << destination_desc;
+      return false;
+    }
+
+    if (source.Get() == nullptr) {
+      LOG(INFO) << "VerifierDeps: Could not resolve class " << source_desc;
+      return false;
+    }
+
+    DCHECK(destination->IsResolved() && source->IsResolved());
+    if (destination->IsAssignableFrom(source.Get()) != expected_assignability) {
+      LOG(INFO) << "VerifierDeps: Class "
+                << destination_desc
+                << (expected_assignability ? " not " : " ")
+                << "assignable from "
+                << source_desc;
+      return false;
+    }
+  }
+  return true;
+}
+
+bool VerifierDeps::VerifyClasses(Handle<mirror::ClassLoader> class_loader,
+                                 const DexFile& dex_file,
+                                 const std::set<ClassResolution>& classes,
+                                 Thread* self) const {
+  StackHandleScope<1> hs(self);
+  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+  MutableHandle<mirror::Class> cls(hs.NewHandle<mirror::Class>(nullptr));
+  for (const auto& entry : classes) {
+    const char* descriptor = dex_file.StringByTypeIdx(entry.GetDexTypeIndex());
+    cls.Assign(FindClassAndClearException(class_linker, self, descriptor, class_loader));
+
+    if (entry.IsResolved()) {
+      if (cls.Get() == nullptr) {
+        LOG(INFO) << "VerifierDeps: Could not resolve class " << descriptor;
+        return false;
+      } else if (entry.GetAccessFlags() != GetAccessFlags(cls.Get())) {
+        LOG(INFO) << "VerifierDeps: Unexpected access flags on class "
+                  << descriptor
+                  << std::hex
+                  << " (expected="
+                  << entry.GetAccessFlags()
+                  << ", actual="
+                  << GetAccessFlags(cls.Get()) << ")"
+                  << std::dec;
+        return false;
+      }
+    } else if (cls.Get() != nullptr) {
+      LOG(INFO) << "VerifierDeps: Unexpected successful resolution of class " << descriptor;
+      return false;
+    }
+  }
+  return true;
+}
+
+static std::string GetFieldDescription(const DexFile& dex_file, uint32_t index) {
+  const DexFile::FieldId& field_id = dex_file.GetFieldId(index);
+  return std::string(dex_file.GetFieldDeclaringClassDescriptor(field_id))
+      + "->"
+      + dex_file.GetFieldName(field_id)
+      + ":"
+      + dex_file.GetFieldTypeDescriptor(field_id);
+}
+
+bool VerifierDeps::VerifyFields(Handle<mirror::ClassLoader> class_loader,
+                                const DexFile& dex_file,
+                                const std::set<FieldResolution>& fields,
+                                Thread* self) const {
+  // Check recorded fields are resolved the same way, have the same recorded class,
+  // and have the same recorded flags.
+  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+  StackHandleScope<1> hs(self);
+  Handle<mirror::DexCache> dex_cache(
+      hs.NewHandle(class_linker->FindDexCache(self, dex_file, /* allow_failure */ false)));
+  for (const auto& entry : fields) {
+    ArtField* field = class_linker->ResolveFieldJLS(
+        dex_file, entry.GetDexFieldIndex(), dex_cache, class_loader);
+
+    if (field == nullptr) {
+      DCHECK(self->IsExceptionPending());
+      self->ClearException();
+    }
+
+    if (entry.IsResolved()) {
+      std::string expected_decl_klass = GetStringFromId(dex_file, entry.GetDeclaringClassIndex());
+      std::string temp;
+      if (field == nullptr) {
+        LOG(INFO) << "VerifierDeps: Could not resolve field "
+                  << GetFieldDescription(dex_file, entry.GetDexFieldIndex());
+        return false;
+      } else if (expected_decl_klass != field->GetDeclaringClass()->GetDescriptor(&temp)) {
+        LOG(INFO) << "VerifierDeps: Unexpected declaring class for field resolution "
+                  << GetFieldDescription(dex_file, entry.GetDexFieldIndex())
+                  << " (expected=" << expected_decl_klass
+                  << ", actual=" << field->GetDeclaringClass()->GetDescriptor(&temp) << ")";
+        return false;
+      } else if (entry.GetAccessFlags() != GetAccessFlags(field)) {
+        LOG(INFO) << "VerifierDeps: Unexpected access flags for resolved field "
+                  << GetFieldDescription(dex_file, entry.GetDexFieldIndex())
+                  << std::hex << " (expected=" << entry.GetAccessFlags()
+                  << ", actual=" << GetAccessFlags(field) << ")" << std::dec;
+        return false;
+      }
+    } else if (field != nullptr) {
+      LOG(INFO) << "VerifierDeps: Unexpected successful resolution of field "
+                << GetFieldDescription(dex_file, entry.GetDexFieldIndex());
+      return false;
+    }
+  }
+  return true;
+}
+
+static std::string GetMethodDescription(const DexFile& dex_file, uint32_t index) {
+  const DexFile::MethodId& method_id = dex_file.GetMethodId(index);
+  return std::string(dex_file.GetMethodDeclaringClassDescriptor(method_id))
+      + "->"
+      + dex_file.GetMethodName(method_id)
+      + dex_file.GetMethodSignature(method_id).ToString();
+}
+
+bool VerifierDeps::VerifyMethods(Handle<mirror::ClassLoader> class_loader,
+                                 const DexFile& dex_file,
+                                 const std::set<MethodResolution>& methods,
+                                 MethodResolutionKind kind,
+                                 Thread* self) const {
+  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+  PointerSize pointer_size = class_linker->GetImagePointerSize();
+
+  for (const auto& entry : methods) {
+    const DexFile::MethodId& method_id = dex_file.GetMethodId(entry.GetDexMethodIndex());
+
+    const char* name = dex_file.GetMethodName(method_id);
+    const Signature signature = dex_file.GetMethodSignature(method_id);
+    const char* descriptor = dex_file.GetMethodDeclaringClassDescriptor(method_id);
+
+    mirror::Class* cls = FindClassAndClearException(class_linker, self, descriptor, class_loader);
+    if (cls == nullptr) {
+      LOG(INFO) << "VerifierDeps: Could not resolve class " << descriptor;
+      return false;
+    }
+    DCHECK(cls->IsResolved());
+    ArtMethod* method = nullptr;
+    if (kind == kDirectMethodResolution) {
+      method = cls->FindDirectMethod(name, signature, pointer_size);
+    } else if (kind == kVirtualMethodResolution) {
+      method = cls->FindVirtualMethod(name, signature, pointer_size);
+    } else {
+      DCHECK_EQ(kind, kInterfaceMethodResolution);
+      method = cls->FindInterfaceMethod(name, signature, pointer_size);
+    }
+
+    if (entry.IsResolved()) {
+      std::string temp;
+      std::string expected_decl_klass = GetStringFromId(dex_file, entry.GetDeclaringClassIndex());
+      if (method == nullptr) {
+        LOG(INFO) << "VerifierDeps: Could not resolve "
+                  << kind
+                  << " method "
+                  << GetMethodDescription(dex_file, entry.GetDexMethodIndex());
+        return false;
+      } else if (expected_decl_klass != method->GetDeclaringClass()->GetDescriptor(&temp)) {
+        LOG(INFO) << "VerifierDeps: Unexpected declaring class for "
+                  << kind
+                  << " method resolution "
+                  << GetMethodDescription(dex_file, entry.GetDexMethodIndex())
+                  << " (expected="
+                  << expected_decl_klass
+                  << ", actual="
+                  << method->GetDeclaringClass()->GetDescriptor(&temp)
+                  << ")";
+        return false;
+      } else if (entry.GetAccessFlags() != GetAccessFlags(method)) {
+        LOG(INFO) << "VerifierDeps: Unexpected access flags for resolved "
+                  << kind
+                  << " method resolution "
+                  << GetMethodDescription(dex_file, entry.GetDexMethodIndex())
+                  << std::hex
+                  << " (expected="
+                  << entry.GetAccessFlags()
+                  << ", actual="
+                  << GetAccessFlags(method) << ")"
+                  << std::dec;
+        return false;
+      }
+    } else if (method != nullptr) {
+      LOG(INFO) << "VerifierDeps: Unexpected successful resolution of "
+                << kind
+                << " method "
+                << GetMethodDescription(dex_file, entry.GetDexMethodIndex());
+      return false;
+    }
+  }
+  return true;
+}
+
+bool VerifierDeps::VerifyDexFile(Handle<mirror::ClassLoader> class_loader,
+                                 const DexFile& dex_file,
+                                 const DexFileDeps& deps,
+                                 Thread* self) const {
+  bool result = VerifyAssignability(
+      class_loader, dex_file, deps.assignable_types_, /* expected_assignability */ true, self);
+  result = result && VerifyAssignability(
+      class_loader, dex_file, deps.unassignable_types_, /* expected_assignability */ false, self);
+
+  result = result && VerifyClasses(class_loader, dex_file, deps.classes_, self);
+  result = result && VerifyFields(class_loader, dex_file, deps.fields_, self);
+
+  result = result && VerifyMethods(
+      class_loader, dex_file, deps.direct_methods_, kDirectMethodResolution, self);
+  result = result && VerifyMethods(
+      class_loader, dex_file, deps.virtual_methods_, kVirtualMethodResolution, self);
+  result = result && VerifyMethods(
+      class_loader, dex_file, deps.interface_methods_, kInterfaceMethodResolution, self);
+
+  return result;
+}
+
 }  // namespace verifier
 }  // namespace art
diff --git a/runtime/verifier/verifier_deps.h b/runtime/verifier/verifier_deps.h
index 9d2622d..4b8206f 100644
--- a/runtime/verifier/verifier_deps.h
+++ b/runtime/verifier/verifier_deps.h
@@ -25,6 +25,7 @@
 #include "art_method.h"
 #include "base/array_ref.h"
 #include "base/mutex.h"
+#include "indenter.h"
 #include "method_resolution_kind.h"
 #include "method_verifier.h"  // For MethodVerifier::FailureKind.
 #include "obj_ptr.h"
@@ -41,25 +42,30 @@
 // which are being compiled. Classes defined in DEX files outside of this set
 // (or synthesized classes without associated DEX files) are considered being
 // in the classpath.
-// During code-flow verification, the MethodVerifier informs the VerifierDeps
-// singleton about the outcome of every resolution and assignability test, and
-// the singleton records them if their outcome may change with changes in the
-// classpath.
+// During code-flow verification, the MethodVerifier informs VerifierDeps
+// about the outcome of every resolution and assignability test, and
+// the VerifierDeps object records them if their outcome may change with
+// changes in the classpath.
 class VerifierDeps {
  public:
-  explicit VerifierDeps(const std::vector<const DexFile*>& dex_files)
-      REQUIRES(!Locks::verifier_deps_lock_);
+  explicit VerifierDeps(const std::vector<const DexFile*>& dex_files);
+
+  VerifierDeps(const std::vector<const DexFile*>& dex_files, ArrayRef<const uint8_t> data);
+
+  // Merge `other` into this `VerifierDeps`'. `other` and `this` must be for the
+  // same set of dex files.
+  void MergeWith(const VerifierDeps& other, const std::vector<const DexFile*>& dex_files);
 
   // Record the verification status of the class at `type_idx`.
   static void MaybeRecordVerificationStatus(const DexFile& dex_file,
-                                            uint16_t type_idx,
+                                            dex::TypeIndex type_idx,
                                             MethodVerifier::FailureKind failure_kind)
       REQUIRES(!Locks::verifier_deps_lock_);
 
   // Record the outcome `klass` of resolving type `type_idx` from `dex_file`.
   // If `klass` is null, the class is assumed unresolved.
   static void MaybeRecordClassResolution(const DexFile& dex_file,
-                                         uint16_t type_idx,
+                                         dex::TypeIndex type_idx,
                                          mirror::Class* klass)
       REQUIRES_SHARED(Locks::mutator_lock_)
       REQUIRES(!Locks::verifier_deps_lock_);
@@ -94,63 +100,72 @@
       REQUIRES(!Locks::verifier_deps_lock_);
 
   // Serialize the recorded dependencies and store the data into `buffer`.
-  void Encode(std::vector<uint8_t>* buffer) const
-      REQUIRES(!Locks::verifier_deps_lock_);
+  // `dex_files` provides the order of the dex files in which the dependencies
+  // should be emitted.
+  void Encode(const std::vector<const DexFile*>& dex_files, std::vector<uint8_t>* buffer) const;
+
+  void Dump(VariableIndentationOutputStream* vios) const;
+
+  // Verify the encoded dependencies of this `VerifierDeps` are still valid.
+  bool ValidateDependencies(Handle<mirror::ClassLoader> class_loader, Thread* self) const
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
+  const std::vector<dex::TypeIndex>& GetUnverifiedClasses(const DexFile& dex_file) const {
+    return GetDexFileDeps(dex_file)->unverified_classes_;
+  }
 
  private:
   static constexpr uint16_t kUnresolvedMarker = static_cast<uint16_t>(-1);
 
-  // Only used in tests to reconstruct the data structure from serialized data.
-  VerifierDeps(const std::vector<const DexFile*>& dex_files, ArrayRef<uint8_t> data)
-      REQUIRES(!Locks::verifier_deps_lock_);
-
-  using ClassResolutionBase = std::tuple<uint32_t, uint16_t>;
+  using ClassResolutionBase = std::tuple<dex::TypeIndex, uint16_t>;
   struct ClassResolution : public ClassResolutionBase {
     ClassResolution() = default;
     ClassResolution(const ClassResolution&) = default;
-    ClassResolution(uint32_t type_idx, uint16_t access_flags)
+    ClassResolution(dex::TypeIndex type_idx, uint16_t access_flags)
         : ClassResolutionBase(type_idx, access_flags) {}
 
     bool IsResolved() const { return GetAccessFlags() != kUnresolvedMarker; }
-    uint32_t GetDexTypeIndex() const { return std::get<0>(*this); }
+    dex::TypeIndex GetDexTypeIndex() const { return std::get<0>(*this); }
     uint16_t GetAccessFlags() const { return std::get<1>(*this); }
   };
 
-  using FieldResolutionBase = std::tuple<uint32_t, uint16_t, uint32_t>;
+  using FieldResolutionBase = std::tuple<uint32_t, uint16_t, dex::StringIndex>;
   struct FieldResolution : public FieldResolutionBase {
     FieldResolution() = default;
     FieldResolution(const FieldResolution&) = default;
-    FieldResolution(uint32_t field_idx, uint16_t access_flags, uint32_t declaring_class_idx)
+    FieldResolution(uint32_t field_idx, uint16_t access_flags, dex::StringIndex declaring_class_idx)
         : FieldResolutionBase(field_idx, access_flags, declaring_class_idx) {}
 
     bool IsResolved() const { return GetAccessFlags() != kUnresolvedMarker; }
     uint32_t GetDexFieldIndex() const { return std::get<0>(*this); }
     uint16_t GetAccessFlags() const { return std::get<1>(*this); }
-    uint32_t GetDeclaringClassIndex() const { return std::get<2>(*this); }
+    dex::StringIndex GetDeclaringClassIndex() const { return std::get<2>(*this); }
   };
 
-  using MethodResolutionBase = std::tuple<uint32_t, uint16_t, uint32_t>;
+  using MethodResolutionBase = std::tuple<uint32_t, uint16_t, dex::StringIndex>;
   struct MethodResolution : public MethodResolutionBase {
     MethodResolution() = default;
     MethodResolution(const MethodResolution&) = default;
-    MethodResolution(uint32_t method_idx, uint16_t access_flags, uint32_t declaring_class_idx)
+    MethodResolution(uint32_t method_idx,
+                     uint16_t access_flags,
+                     dex::StringIndex declaring_class_idx)
         : MethodResolutionBase(method_idx, access_flags, declaring_class_idx) {}
 
     bool IsResolved() const { return GetAccessFlags() != kUnresolvedMarker; }
     uint32_t GetDexMethodIndex() const { return std::get<0>(*this); }
     uint16_t GetAccessFlags() const { return std::get<1>(*this); }
-    uint32_t GetDeclaringClassIndex() const { return std::get<2>(*this); }
+    dex::StringIndex GetDeclaringClassIndex() const { return std::get<2>(*this); }
   };
 
-  using TypeAssignabilityBase = std::tuple<uint32_t, uint32_t>;
+  using TypeAssignabilityBase = std::tuple<dex::StringIndex, dex::StringIndex>;
   struct TypeAssignability : public TypeAssignabilityBase {
     TypeAssignability() = default;
     TypeAssignability(const TypeAssignability&) = default;
-    TypeAssignability(uint32_t destination_idx, uint32_t source_idx)
+    TypeAssignability(dex::StringIndex destination_idx, dex::StringIndex source_idx)
         : TypeAssignabilityBase(destination_idx, source_idx) {}
 
-    uint32_t GetDestination() const { return std::get<0>(*this); }
-    uint32_t GetSource() const { return std::get<1>(*this); }
+    dex::StringIndex GetDestination() const { return std::get<0>(*this); }
+    dex::StringIndex GetSource() const { return std::get<1>(*this); }
   };
 
   // Data structure representing dependencies collected during verification of
@@ -173,49 +188,56 @@
     std::set<MethodResolution> interface_methods_;
 
     // List of classes that were not fully verified in that dex file.
-    std::vector<uint16_t> unverified_classes_;
+    std::vector<dex::TypeIndex> unverified_classes_;
 
     bool Equals(const DexFileDeps& rhs) const;
   };
 
   // Finds the DexFileDep instance associated with `dex_file`, or nullptr if
   // `dex_file` is not reported as being compiled.
-  // We disable thread safety analysis. The method only reads the key set of
-  // `dex_deps_` which stays constant after initialization.
-  DexFileDeps* GetDexFileDeps(const DexFile& dex_file)
-      NO_THREAD_SAFETY_ANALYSIS;
+  DexFileDeps* GetDexFileDeps(const DexFile& dex_file);
+
+  const DexFileDeps* GetDexFileDeps(const DexFile& dex_file) const;
 
   // Returns true if `klass` is null or not defined in any of dex files which
   // were reported as being compiled.
-  bool IsInClassPath(ObjPtr<mirror::Class> klass)
+  bool IsInClassPath(ObjPtr<mirror::Class> klass) const
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Returns the index of `str`. If it is defined in `dex_file_`, this is the dex
   // string ID. If not, an ID is assigned to the string and cached in `strings_`
   // of the corresponding DexFileDeps structure (either provided or inferred from
   // `dex_file`).
-  uint32_t GetIdFromString(const DexFile& dex_file, const std::string& str)
-      REQUIRES(Locks::verifier_deps_lock_);
+  dex::StringIndex GetIdFromString(const DexFile& dex_file, const std::string& str)
+      REQUIRES(!Locks::verifier_deps_lock_);
 
   // Returns the string represented by `id`.
-  std::string GetStringFromId(const DexFile& dex_file, uint32_t string_id)
-      REQUIRES(Locks::verifier_deps_lock_);
+  std::string GetStringFromId(const DexFile& dex_file, dex::StringIndex string_id) const;
 
   // Returns the bytecode access flags of `element` (bottom 16 bits), or
   // `kUnresolvedMarker` if `element` is null.
   template <typename T>
-  uint16_t GetAccessFlags(T* element)
+  static uint16_t GetAccessFlags(T* element)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Returns a string ID of the descriptor of the declaring class of `element`,
   // or `kUnresolvedMarker` if `element` is null.
-  template <typename T>
-  uint32_t GetDeclaringClassStringId(const DexFile& dex_file, T* element)
+  dex::StringIndex GetMethodDeclaringClassStringId(const DexFile& dex_file,
+                                                   uint32_t dex_method_idx,
+                                                   ArtMethod* method)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+  dex::StringIndex GetFieldDeclaringClassStringId(const DexFile& dex_file,
+                                                  uint32_t dex_field_idx,
+                                                  ArtField* field)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
+  // Returns a string ID of the descriptor of the class.
+  dex::StringIndex GetClassDescriptorStringId(const DexFile& dex_file, ObjPtr<mirror::Class> klass)
       REQUIRES_SHARED(Locks::mutator_lock_)
-      REQUIRES(Locks::verifier_deps_lock_);
+      REQUIRES(!Locks::verifier_deps_lock_);
 
   void AddClassResolution(const DexFile& dex_file,
-                          uint16_t type_idx,
+                          dex::TypeIndex type_idx,
                           mirror::Class* klass)
       REQUIRES_SHARED(Locks::mutator_lock_)
       REQUIRES(!Locks::verifier_deps_lock_);
@@ -238,19 +260,63 @@
                         mirror::Class* source,
                         bool is_strict,
                         bool is_assignable)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
+  bool Equals(const VerifierDeps& rhs) const;
+
+  // Verify `dex_file` according to the `deps`, that is going over each
+  // `DexFileDeps` field, and checking that the recorded information still
+  // holds.
+  bool VerifyDexFile(Handle<mirror::ClassLoader> class_loader,
+                     const DexFile& dex_file,
+                     const DexFileDeps& deps,
+                     Thread* self) const
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
+  bool VerifyAssignability(Handle<mirror::ClassLoader> class_loader,
+                           const DexFile& dex_file,
+                           const std::set<TypeAssignability>& assignables,
+                           bool expected_assignability,
+                           Thread* self) const
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
+  // Verify that the set of resolved classes at the point of creation
+  // of this `VerifierDeps` is still the same.
+  bool VerifyClasses(Handle<mirror::ClassLoader> class_loader,
+                     const DexFile& dex_file,
+                     const std::set<ClassResolution>& classes,
+                     Thread* self) const
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
+  // Verify that the set of resolved fields at the point of creation
+  // of this `VerifierDeps` is still the same, and each field resolves to the
+  // same field holder and access flags.
+  bool VerifyFields(Handle<mirror::ClassLoader> class_loader,
+                    const DexFile& dex_file,
+                    const std::set<FieldResolution>& classes,
+                    Thread* self) const
       REQUIRES_SHARED(Locks::mutator_lock_)
       REQUIRES(!Locks::verifier_deps_lock_);
 
-  bool Equals(const VerifierDeps& rhs) const
-      REQUIRES(!Locks::verifier_deps_lock_);
+  // Verify that the set of resolved methods at the point of creation
+  // of this `VerifierDeps` is still the same, and each method resolves to the
+  // same method holder, access flags, and invocation kind.
+  bool VerifyMethods(Handle<mirror::ClassLoader> class_loader,
+                     const DexFile& dex_file,
+                     const std::set<MethodResolution>& methods,
+                     MethodResolutionKind kind,
+                     Thread* self) const
+      REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Map from DexFiles into dependencies collected from verification of their methods.
-  std::map<const DexFile*, std::unique_ptr<DexFileDeps>> dex_deps_
-      GUARDED_BY(Locks::verifier_deps_lock_);
+  std::map<const DexFile*, std::unique_ptr<DexFileDeps>> dex_deps_;
 
   friend class VerifierDepsTest;
   ART_FRIEND_TEST(VerifierDepsTest, StringToId);
   ART_FRIEND_TEST(VerifierDepsTest, EncodeDecode);
+  ART_FRIEND_TEST(VerifierDepsTest, EncodeDecodeMulti);
+  ART_FRIEND_TEST(VerifierDepsTest, VerifyDeps);
+  ART_FRIEND_TEST(VerifierDepsTest, CompilerDriver);
 };
 
 }  // namespace verifier
diff --git a/runtime/well_known_classes.cc b/runtime/well_known_classes.cc
index 153c7ef..3549586 100644
--- a/runtime/well_known_classes.cc
+++ b/runtime/well_known_classes.cc
@@ -22,6 +22,7 @@
 
 #include "base/logging.h"
 #include "entrypoints/quick/quick_entrypoints_enum.h"
+#include "jni_internal.h"
 #include "mirror/class.h"
 #include "mirror/throwable.h"
 #include "obj_ptr-inl.h"
@@ -34,6 +35,8 @@
 jclass WellKnownClasses::com_android_dex_Dex;
 jclass WellKnownClasses::dalvik_annotation_optimization_CriticalNative;
 jclass WellKnownClasses::dalvik_annotation_optimization_FastNative;
+jclass WellKnownClasses::dalvik_system_BaseDexClassLoader;
+jclass WellKnownClasses::dalvik_system_DexClassLoader;
 jclass WellKnownClasses::dalvik_system_DexFile;
 jclass WellKnownClasses::dalvik_system_DexPathList;
 jclass WellKnownClasses::dalvik_system_DexPathList__Element;
@@ -108,7 +111,7 @@
 
 jfieldID WellKnownClasses::dalvik_system_DexFile_cookie;
 jfieldID WellKnownClasses::dalvik_system_DexFile_fileName;
-jfieldID WellKnownClasses::dalvik_system_PathClassLoader_pathList;
+jfieldID WellKnownClasses::dalvik_system_BaseDexClassLoader_pathList;
 jfieldID WellKnownClasses::dalvik_system_DexPathList_dexElements;
 jfieldID WellKnownClasses::dalvik_system_DexPathList__Element_dexFile;
 jfieldID WellKnownClasses::java_lang_Thread_daemon;
@@ -217,9 +220,9 @@
   ScopedObjectAccess soa(Thread::Current());
   #define LOAD_STRING_INIT(init_runtime_name, init_signature, new_runtime_name,             \
                            new_java_name, new_signature, ...)                               \
-      init_runtime_name = soa.DecodeMethod(                                                 \
+      init_runtime_name = jni::DecodeArtMethod(                                             \
           CacheMethod(env, java_lang_String, false, "<init>", init_signature));             \
-      new_runtime_name = soa.DecodeMethod(                                                  \
+      new_runtime_name = jni::DecodeArtMethod(                                              \
           CacheMethod(env, java_lang_StringFactory, true, new_java_name, new_signature));
       STRING_INIT_LIST(LOAD_STRING_INIT)
   #undef LOAD_STRING_INIT
@@ -237,8 +240,8 @@
 ArtMethod* WellKnownClasses::StringInitToStringFactory(ArtMethod* string_init) {
   #define TO_STRING_FACTORY(init_runtime_name, init_signature, new_runtime_name,            \
                             new_java_name, new_signature, entry_point_name)                 \
-      if (string_init == init_runtime_name) {                                               \
-        return new_runtime_name;                                                            \
+      if (string_init == (init_runtime_name)) {                                             \
+        return (new_runtime_name);                                                          \
       }
       STRING_INIT_LIST(TO_STRING_FACTORY)
   #undef TO_STRING_FACTORY
@@ -249,7 +252,7 @@
 uint32_t WellKnownClasses::StringInitToEntryPoint(ArtMethod* string_init) {
   #define TO_ENTRY_POINT(init_runtime_name, init_signature, new_runtime_name,               \
                          new_java_name, new_signature, entry_point_name)                    \
-      if (string_init == init_runtime_name) {                                               \
+      if (string_init == (init_runtime_name)) {                                             \
         return kQuick ## entry_point_name;                                                  \
       }
       STRING_INIT_LIST(TO_ENTRY_POINT)
@@ -264,6 +267,8 @@
   dalvik_annotation_optimization_CriticalNative =
       CacheClass(env, "dalvik/annotation/optimization/CriticalNative");
   dalvik_annotation_optimization_FastNative = CacheClass(env, "dalvik/annotation/optimization/FastNative");
+  dalvik_system_BaseDexClassLoader = CacheClass(env, "dalvik/system/BaseDexClassLoader");
+  dalvik_system_DexClassLoader = CacheClass(env, "dalvik/system/DexClassLoader");
   dalvik_system_DexFile = CacheClass(env, "dalvik/system/DexFile");
   dalvik_system_DexPathList = CacheClass(env, "dalvik/system/DexPathList");
   dalvik_system_DexPathList__Element = CacheClass(env, "dalvik/system/DexPathList$Element");
@@ -332,9 +337,9 @@
   org_apache_harmony_dalvik_ddmc_DdmServer_broadcast = CacheMethod(env, org_apache_harmony_dalvik_ddmc_DdmServer, true, "broadcast", "(I)V");
   org_apache_harmony_dalvik_ddmc_DdmServer_dispatch = CacheMethod(env, org_apache_harmony_dalvik_ddmc_DdmServer, true, "dispatch", "(I[BII)Lorg/apache/harmony/dalvik/ddmc/Chunk;");
 
+  dalvik_system_BaseDexClassLoader_pathList = CacheField(env, dalvik_system_BaseDexClassLoader, false, "pathList", "Ldalvik/system/DexPathList;");
   dalvik_system_DexFile_cookie = CacheField(env, dalvik_system_DexFile, false, "mCookie", "Ljava/lang/Object;");
   dalvik_system_DexFile_fileName = CacheField(env, dalvik_system_DexFile, false, "mFileName", "Ljava/lang/String;");
-  dalvik_system_PathClassLoader_pathList = CacheField(env, dalvik_system_PathClassLoader, false, "pathList", "Ldalvik/system/DexPathList;");
   dalvik_system_DexPathList_dexElements = CacheField(env, dalvik_system_DexPathList, false, "dexElements", "[Ldalvik/system/DexPathList$Element;");
   dalvik_system_DexPathList__Element_dexFile = CacheField(env, dalvik_system_DexPathList__Element, false, "dexFile", "Ldalvik/system/DexFile;");
   java_lang_Thread_daemon = CacheField(env, java_lang_Thread, false, "daemon", "Z");
@@ -389,7 +394,9 @@
 }
 
 ObjPtr<mirror::Class> WellKnownClasses::ToClass(jclass global_jclass) {
-  return ObjPtr<mirror::Class>::DownCast(Thread::Current()->DecodeJObject(global_jclass));
+  auto ret = ObjPtr<mirror::Class>::DownCast(Thread::Current()->DecodeJObject(global_jclass));
+  DCHECK(!ret.IsNull());
+  return ret;
 }
 
 }  // namespace art
diff --git a/runtime/well_known_classes.h b/runtime/well_known_classes.h
index 2fb5bb4..227996a 100644
--- a/runtime/well_known_classes.h
+++ b/runtime/well_known_classes.h
@@ -47,6 +47,8 @@
   static jclass com_android_dex_Dex;
   static jclass dalvik_annotation_optimization_CriticalNative;
   static jclass dalvik_annotation_optimization_FastNative;
+  static jclass dalvik_system_BaseDexClassLoader;
+  static jclass dalvik_system_DexClassLoader;
   static jclass dalvik_system_DexFile;
   static jclass dalvik_system_DexPathList;
   static jclass dalvik_system_DexPathList__Element;
@@ -119,11 +121,11 @@
   static jmethodID org_apache_harmony_dalvik_ddmc_DdmServer_broadcast;
   static jmethodID org_apache_harmony_dalvik_ddmc_DdmServer_dispatch;
 
+  static jfieldID dalvik_system_BaseDexClassLoader_pathList;
   static jfieldID dalvik_system_DexFile_cookie;
   static jfieldID dalvik_system_DexFile_fileName;
   static jfieldID dalvik_system_DexPathList_dexElements;
   static jfieldID dalvik_system_DexPathList__Element_dexFile;
-  static jfieldID dalvik_system_PathClassLoader_pathList;
   static jfieldID java_lang_reflect_Executable_artMethod;
   static jfieldID java_lang_reflect_Proxy_h;
   static jfieldID java_lang_Thread_daemon;
diff --git a/test/015-switch/src/Main.java b/test/015-switch/src/Main.java
index 2a7995a..2b724a1 100644
--- a/test/015-switch/src/Main.java
+++ b/test/015-switch/src/Main.java
@@ -113,7 +113,7 @@
     }
 
     // Long packed-switch that might lead to not creating chained-ifs.
-    public static void packedSwitch7(int value) {
+    public static long packedSwitch7(int value) {
         switch (value) {
             case 1:
                 System.out.println(1); break;
@@ -148,6 +148,113 @@
             default:
                 System.out.println("default"); break;
         }
+
+        // Jump tables previously were emitted in the end of the method code buffer. The
+        // following boilerplate code aims to fill the emitted code buffer extensively
+        // and check that even for big method jump table is correctly emitted, its address
+        // is within a range of corresponded pc-relative instructions (this applies to
+        // ARM mainly).
+        long temp = value;
+        temp = Long.rotateLeft(temp, value);
+        temp = Long.rotateLeft(temp, value);
+        temp = Long.rotateLeft(temp, value);
+        temp = Long.rotateLeft(temp, value);
+        temp = Long.rotateLeft(temp, value);
+        temp = Long.rotateLeft(temp, value);
+        temp = Long.rotateLeft(temp, value);
+        temp = Long.rotateLeft(temp, value);
+        temp = Long.rotateLeft(temp, value);
+        temp = Long.rotateLeft(temp, value);
+        temp = Long.rotateLeft(temp, value);
+        temp = Long.rotateLeft(temp, value);
+        temp = Long.rotateLeft(temp, value);
+        temp = Long.rotateLeft(temp, value);
+        temp = Long.rotateLeft(temp, value);
+        temp = Long.rotateLeft(temp, value);
+        temp = Long.rotateLeft(temp, value);
+        temp = Long.rotateLeft(temp, value);
+        temp = Long.rotateLeft(temp, value);
+        temp = Long.rotateLeft(temp, value);
+        temp = Long.rotateLeft(temp, value);
+        temp = Long.rotateLeft(temp, value);
+        temp = Long.rotateLeft(temp, value);
+        temp = Long.rotateLeft(temp, value);
+        temp = Long.rotateLeft(temp, value);
+        temp = Long.rotateLeft(temp, value);
+        temp = Long.rotateLeft(temp, value);
+        temp = Long.rotateLeft(temp, value);
+        temp = Long.rotateLeft(temp, value);
+        temp = Long.rotateLeft(temp, value);
+        temp = Long.rotateLeft(temp, value);
+        temp = Long.rotateLeft(temp, value);
+        temp = Long.rotateLeft(temp, value);
+        temp = Long.rotateLeft(temp, value);
+        temp = Long.rotateLeft(temp, value);
+        temp = Long.rotateLeft(temp, value);
+        temp = Long.rotateLeft(temp, value);
+        temp = Long.rotateLeft(temp, value);
+        temp = Long.rotateLeft(temp, value);
+        temp = Long.rotateLeft(temp, value);
+        temp = Long.rotateLeft(temp, value);
+        temp = Long.rotateLeft(temp, value);
+        temp = Long.rotateLeft(temp, value);
+        temp = Long.rotateLeft(temp, value);
+        temp = Long.rotateLeft(temp, value);
+        temp = Long.rotateLeft(temp, value);
+        temp = Long.rotateLeft(temp, value);
+        temp = Long.rotateLeft(temp, value);
+        temp = Long.rotateLeft(temp, value);
+        temp = Long.rotateLeft(temp, value);
+        temp = Long.rotateLeft(temp, value);
+        temp = Long.rotateLeft(temp, value);
+        temp = Long.rotateLeft(temp, value);
+        temp = Long.rotateLeft(temp, value);
+        temp = Long.rotateLeft(temp, value);
+        temp = Long.rotateLeft(temp, value);
+        temp = Long.rotateLeft(temp, value);
+        temp = Long.rotateLeft(temp, value);
+        temp = Long.rotateLeft(temp, value);
+        temp = Long.rotateLeft(temp, value);
+        temp = Long.rotateLeft(temp, value);
+        temp = Long.rotateLeft(temp, value);
+        temp = Long.rotateLeft(temp, value);
+        temp = Long.rotateLeft(temp, value);
+        temp = Long.rotateLeft(temp, value);
+        temp = Long.rotateLeft(temp, value);
+        temp = Long.rotateLeft(temp, value);
+        temp = Long.rotateLeft(temp, value);
+        temp = Long.rotateLeft(temp, value);
+        temp = Long.rotateLeft(temp, value);
+        temp = Long.rotateLeft(temp, value);
+        temp = Long.rotateLeft(temp, value);
+        temp = Long.rotateLeft(temp, value);
+        temp = Long.rotateLeft(temp, value);
+        temp = Long.rotateLeft(temp, value);
+        temp = Long.rotateLeft(temp, value);
+        temp = Long.rotateLeft(temp, value);
+        temp = Long.rotateLeft(temp, value);
+        temp = Long.rotateLeft(temp, value);
+        temp = Long.rotateLeft(temp, value);
+        temp = Long.rotateLeft(temp, value);
+        temp = Long.rotateLeft(temp, value);
+        temp = Long.rotateLeft(temp, value);
+        temp = Long.rotateLeft(temp, value);
+        temp = Long.rotateLeft(temp, value);
+        temp = Long.rotateLeft(temp, value);
+        temp = Long.rotateLeft(temp, value);
+        temp = Long.rotateLeft(temp, value);
+        temp = Long.rotateLeft(temp, value);
+        temp = Long.rotateLeft(temp, value);
+        temp = Long.rotateLeft(temp, value);
+        temp = Long.rotateLeft(temp, value);
+        temp = Long.rotateLeft(temp, value);
+        temp = Long.rotateLeft(temp, value);
+        temp = Long.rotateLeft(temp, value);
+        temp = Long.rotateLeft(temp, value);
+        temp = Long.rotateLeft(temp, value);
+        temp = Long.rotateLeft(temp, value);
+
+        return temp;
     }
 
     // Sparse switch, just leave a gap.
diff --git a/test/021-string2/src/Main.java b/test/021-string2/src/Main.java
index a848fba..51351e1 100644
--- a/test/021-string2/src/Main.java
+++ b/test/021-string2/src/Main.java
@@ -431,6 +431,22 @@
                 "\u0440\u0440\u0440\u0440\u0440\u0440z\u0440",
                 "\u0440\u0440\u0440\u0440\u0440\u0440\u0440z\u0440",
                 "\u0440\u0440\u0440\u0440\u0440\u0440\u0440\u0440z\u0440",
+                "\u0000",
+                "\u0000\u0000",
+                "\u0000\u0000\u0000",
+                "\u0000\u0000\u0000\u0000",
+                "\u0000\u0000\u0000\u0000\u0000",
+                "\u0000\u0000\u0000\u0000\u0000\u0000",
+                "\u0000\u0000\u0000\u0000\u0000\u0000\u0000",
+                "\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000",
+                "\u0000z\u0000",
+                "\u0000\u0000z\u0000",
+                "\u0000\u0000\u0000z\u0000",
+                "\u0000\u0000\u0000\u0000z\u0000",
+                "\u0000\u0000\u0000\u0000\u0000z\u0000",
+                "\u0000\u0000\u0000\u0000\u0000\u0000z\u0000",
+                "\u0000\u0000\u0000\u0000\u0000\u0000\u0000z\u0000",
+                "\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000z\u0000",
         };
         String[] suffixes = {
                 "",
@@ -458,30 +474,40 @@
                     String full = p + c + s;
                     int expX = (c.isEmpty() || c.charAt(0) != 'x') ? -1 : p.length();
                     int exp0440 = (c.isEmpty() || c.charAt(0) != '\u0440') ? -1 : p.length();
+                    int exp0000 = (c.isEmpty() || c.charAt(0) != '\u0000') ? -1 : p.length();
                     Assert.assertEquals(expX, $noinline$indexOf(full, 'x'));
                     Assert.assertEquals(exp0440, $noinline$indexOf(full, '\u0440'));
+                    Assert.assertEquals(exp0000, $noinline$indexOf(full, '\u0000'));
                     Assert.assertEquals(expX, $noinline$indexOf(full, 'x', -1));
                     Assert.assertEquals(exp0440, $noinline$indexOf(full, '\u0440', -1));
+                    Assert.assertEquals(exp0000, $noinline$indexOf(full, '\u0000', -1));
                     Assert.assertEquals(-1, $noinline$indexOf(full, 'x', full.length() + 1));
                     Assert.assertEquals(-1, $noinline$indexOf(full, '\u0440', full.length() + 1));
+                    Assert.assertEquals(-1, $noinline$indexOf(full, '\u0000', full.length() + 1));
                     for (int from = 0; from != full.length(); ++from) {
                         final int eX;
                         final int e0440;
+                        final int e0000;
                         if (from <= p.length()) {
                             eX = expX;
                             e0440 = exp0440;
+                            e0000 = exp0000;
                         } else if (from >= p.length() + c.length()) {
                             eX = -1;
                             e0440 = -1;
+                            e0000 = -1;
                         } else if (full.charAt(from) == 'z') {
                             eX = (full.charAt(from + 1) != 'x') ? -1 : from + 1;
                             e0440 = (full.charAt(from + 1) != '\u0440') ? -1 : from + 1;
+                            e0000 = (full.charAt(from + 1) != '\u0000') ? -1 : from + 1;
                         } else {
                             eX = (full.charAt(from) != 'x') ? -1 : from;
                             e0440 = (full.charAt(from) != '\u0440') ? -1 : from;
+                            e0000 = (full.charAt(from) != '\u0000') ? -1 : from;
                         }
                         Assert.assertEquals(eX, $noinline$indexOf(full, 'x', from));
                         Assert.assertEquals(e0440, $noinline$indexOf(full, '\u0440', from));
+                        Assert.assertEquals(e0000, $noinline$indexOf(full, '\u0000', from));
                     }
                 }
             }
diff --git a/test/030-bad-finalizer/expected.txt b/test/030-bad-finalizer/expected.txt
index ee9cfff..74e208c 100644
--- a/test/030-bad-finalizer/expected.txt
+++ b/test/030-bad-finalizer/expected.txt
@@ -1,4 +1,4 @@
-About to null reference and request GC.
+About to null reference.
 Finalizer started and spinning...
 Finalizer done spinning.
 Finalizer sleeping forever now.
diff --git a/test/030-bad-finalizer/src/Main.java b/test/030-bad-finalizer/src/Main.java
index 942ee25..0e69a96 100644
--- a/test/030-bad-finalizer/src/Main.java
+++ b/test/030-bad-finalizer/src/Main.java
@@ -14,26 +14,60 @@
  * limitations under the License.
  */
 
+import java.util.concurrent.CountDownLatch;
+import static java.util.concurrent.TimeUnit.MINUTES;
+
 /**
  * Test a class with a bad finalizer.
+ *
+ * This test is inherently flaky. It assumes that the system will schedule the finalizer daemon
+ * and finalizer watchdog daemon enough to reach the timeout and throwing the fatal exception.
  */
 public class Main {
-    public static void main(String[] args) {
-        BadFinalizer bf = new BadFinalizer();
+    public static void main(String[] args) throws Exception {
+        CountDownLatch finalizerWait = new CountDownLatch(1);
 
-        System.out.println("About to null reference and request GC.");
-        bf = null;
-        Runtime.getRuntime().gc();
+        // A separate method to ensure no dex register keeps the object alive.
+        createBadFinalizer(finalizerWait);
 
-        for (int i = 0; i < 8; i++) {
-            snooze(4000);
+        // Should have at least two iterations to trigger finalization, but just to make sure run
+        // some more.
+        for (int i = 0; i < 5; i++) {
             Runtime.getRuntime().gc();
         }
 
+        // Now wait for the finalizer to start running. Give it a minute.
+        finalizerWait.await(1, MINUTES);
+
+        // Now fall asleep with a timeout. The timeout is large enough that we expect the
+        // finalizer daemon to have killed the process before the deadline elapses.
+        // Note: the timeout is here (instead of an infinite sleep) to protect the test
+        //       environment (e.g., in case this is run without a timeout wrapper).
+        final long timeout = 60 * 1000;  // 1 minute.
+        long remainingWait = timeout;
+        final long waitStart = System.currentTimeMillis();
+        while (remainingWait > 0) {
+            synchronized (args) {  // Just use an already existing object for simplicity...
+                try {
+                    args.wait(remainingWait);
+                } catch (Exception e) {
+                }
+            }
+            remainingWait = timeout - (System.currentTimeMillis() - waitStart);
+        }
+
+        // We should not get here.
         System.out.println("UNREACHABLE");
         System.exit(0);
     }
 
+    private static void createBadFinalizer(CountDownLatch finalizerWait) {
+        BadFinalizer bf = new BadFinalizer(finalizerWait);
+
+        System.out.println("About to null reference.");
+        bf = null;  // Not that this would make a difference, could be eliminated earlier.
+    }
+
     public static void snooze(int ms) {
         try {
             Thread.sleep(ms);
@@ -45,9 +79,17 @@
      * Class with a bad finalizer.
      */
     public static class BadFinalizer {
+        private CountDownLatch finalizerWait;
+        private volatile int j = 0;  // Volatile in an effort to curb loop optimization.
+
+        public BadFinalizer(CountDownLatch finalizerWait) {
+            this.finalizerWait = finalizerWait;
+        }
+
         protected void finalize() {
+            finalizerWait.countDown();
+
             System.out.println("Finalizer started and spinning...");
-            int j = 0;
 
             /* spin for a bit */
             long start, end;
diff --git a/test/039-join-main/src/Main.java b/test/039-join-main/src/Main.java
index 2373221..60791e4 100644
--- a/test/039-join-main/src/Main.java
+++ b/test/039-join-main/src/Main.java
@@ -14,35 +14,48 @@
  * limitations under the License.
  */
 
+import java.util.concurrent.CountDownLatch;
+
 /**
  * Make sure that a sub-thread can join the main thread.
  */
 public class Main {
-    public static void main(String[] args) {
+    public static void main(String[] args) throws Exception {
         Thread t;
+        CountDownLatch waitLatch = new CountDownLatch(1);
+        CountDownLatch progressLatch = new CountDownLatch(1);
 
-        t = new Thread(new JoinMainSub(Thread.currentThread()), "Joiner");
+        t = new Thread(new JoinMainSub(Thread.currentThread(), waitLatch, progressLatch), "Joiner");
         System.out.print("Starting thread '" + t.getName() + "'\n");
         t.start();
 
-        try { Thread.sleep(1000); }
-        catch (InterruptedException ie) {}
-
+        waitLatch.await();
         System.out.print("JoinMain starter returning\n");
+        progressLatch.countDown();
+
+        // Keep the thread alive a little longer, giving the other thread a chance to join on a
+        // live thread (though that isn't critically important for the test).
+        Thread.currentThread().sleep(500);
     }
 }
 
 class JoinMainSub implements Runnable {
     private Thread mJoinMe;
+    private CountDownLatch waitLatch;
+    private CountDownLatch progressLatch;
 
-    public JoinMainSub(Thread joinMe) {
+    public JoinMainSub(Thread joinMe, CountDownLatch waitLatch, CountDownLatch progressLatch) {
         mJoinMe = joinMe;
+        this.waitLatch = waitLatch;
+        this.progressLatch = progressLatch;
     }
 
     public void run() {
         System.out.print("@ JoinMainSub running\n");
 
         try {
+            waitLatch.countDown();
+            progressLatch.await();
             mJoinMe.join();
             System.out.print("@ JoinMainSub successfully joined main\n");
         } catch (InterruptedException ie) {
diff --git a/test/115-native-bridge/nativebridge.cc b/test/115-native-bridge/nativebridge.cc
index 56d737f..41329af 100644
--- a/test/115-native-bridge/nativebridge.cc
+++ b/test/115-native-bridge/nativebridge.cc
@@ -466,7 +466,7 @@
   return 0;
 }
 
-extern "C" char* native_bridge_getError() {
+extern "C" const char* native_bridge_getError() {
   printf("dlerror() in native bridge.\n");
   return nullptr;
 }
diff --git a/test/141-class-unload/expected.txt b/test/141-class-unload/expected.txt
index 2b77b29..0a03ecb 100644
--- a/test/141-class-unload/expected.txt
+++ b/test/141-class-unload/expected.txt
@@ -21,3 +21,4 @@
 class null false test
 JNI_OnUnload called
 Number of loaded unload-ex maps 0
+Too small false
diff --git a/test/141-class-unload/src/Main.java b/test/141-class-unload/src/Main.java
index f9b6180..2a6e944 100644
--- a/test/141-class-unload/src/Main.java
+++ b/test/141-class-unload/src/Main.java
@@ -47,6 +47,8 @@
             stressTest(constructor);
             // Test that the oat files are unloaded.
             testOatFilesUnloaded(getPid());
+            // Test that objects keep class loader live for sticky GC.
+            testStickyUnload(constructor);
         } catch (Exception e) {
             e.printStackTrace();
         }
@@ -161,6 +163,30 @@
         return intHolder;
     }
 
+    private static Object allocObjectInOtherClassLoader(Constructor<?> constructor)
+            throws Exception {
+      ClassLoader loader = (ClassLoader) constructor.newInstance(
+              DEX_FILE, LIBRARY_SEARCH_PATH, ClassLoader.getSystemClassLoader());
+      return loader.loadClass("IntHolder").newInstance();
+    }
+
+    // Regression test for public issue 227182.
+    private static void testStickyUnload(Constructor<?> constructor) throws Exception {
+        String s = "";
+        for (int i = 0; i < 10; ++i) {
+            s = "";
+            // The object is the only thing preventing the class loader from being unloaded.
+            Object o = allocObjectInOtherClassLoader(constructor);
+            for (int j = 0; j < 1000; ++j) {
+                s += j + " ";
+            }
+            // Make sure the object still has a valid class (hasn't been incorrectly unloaded).
+            s += o.getClass().getName();
+            o = null;
+        }
+        System.out.println("Too small " + (s.length() < 1000));
+    }
+
     private static WeakReference<Class> setUpUnloadClassWeak(Constructor<?> constructor)
             throws Exception {
         return new WeakReference<Class>(setUpUnloadClass(constructor));
diff --git a/test/445-checker-licm/expected.txt b/test/445-checker-licm/expected.txt
index e69de29..b0aad4d 100644
--- a/test/445-checker-licm/expected.txt
+++ b/test/445-checker-licm/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/445-checker-licm/src/Main.java b/test/445-checker-licm/src/Main.java
index 061fe6e..00ce3a9 100644
--- a/test/445-checker-licm/src/Main.java
+++ b/test/445-checker-licm/src/Main.java
@@ -164,8 +164,43 @@
     return result;
   }
 
+  //
+  // All operations up to the null check can be hoisted out of the
+  // loop. The null check itself sees the induction in its environment.
+  //
+  /// CHECK-START: int Main.doWhile(int) licm (before)
+  /// CHECK-DAG: <<Add:i\d+>> Add                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG:              LoadClass           loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get:l\d+>> StaticFieldGet      loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:              NullCheck [<<Get>>] env:[[<<Add>>,<<Get>>,{{i\d+}}]] loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:              ArrayLength         loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:              BoundsCheck         loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:              ArrayGet            loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START: int Main.doWhile(int) licm (after)
+  /// CHECK-NOT: LoadClass      loop:{{B\d+}}
+  /// CHECK-NOT: StaticFieldGet loop:{{B\d+}}
+  //
+  /// CHECK-START: int Main.doWhile(int) licm (after)
+  /// CHECK-DAG:              LoadClass           loop:none
+  /// CHECK-DAG: <<Get:l\d+>> StaticFieldGet      loop:none
+  /// CHECK-DAG: <<Add:i\d+>> Add                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG:              NullCheck [<<Get>>] env:[[<<Add>>,<<Get>>,{{i\d+}}]] loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:              ArrayLength         loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:              BoundsCheck         loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:              ArrayGet            loop:<<Loop>>      outer_loop:none
+  public static int doWhile(int k) {
+    int i = k;
+    do {
+      i += 2;
+    } while (staticArray[i] == 0);
+    return i;
+  }
+
   public static int staticField = 42;
 
+  public static int[] staticArray = null;
+
   public static void assertEquals(int expected, int actual) {
     if (expected != actual) {
       throw new Error("Expected " + expected + ", got " + actual);
@@ -181,5 +216,24 @@
     assertEquals(21, divAndIntrinsic(new int[] { 4, -2, 8, -3 }));
     assertEquals(45, invariantBoundIntrinsic(-10));
     assertEquals(30, invariantBodyIntrinsic(2, 3));
+
+    staticArray = null;
+    try {
+      doWhile(0);
+      throw new Error("Expected NPE");
+    } catch (NullPointerException e) {
+    }
+    staticArray = new int[5];
+    staticArray[4] = 1;
+    assertEquals(4, doWhile(-2));
+    assertEquals(4, doWhile(0));
+    assertEquals(4, doWhile(2));
+    try {
+      doWhile(1);
+      throw new Error("Expected IOOBE");
+    } catch (IndexOutOfBoundsException e) {
+    }
+
+    System.out.println("passed");
   }
 }
diff --git a/test/454-get-vreg/get_vreg_jni.cc b/test/454-get-vreg/get_vreg_jni.cc
index 0360eda..5fc5464 100644
--- a/test/454-get-vreg/get_vreg_jni.cc
+++ b/test/454-get-vreg/get_vreg_jni.cc
@@ -46,12 +46,12 @@
       CHECK_EQ(value, 42u);
 
       bool success = GetVReg(m, 1, kIntVReg, &value);
-      if (!IsCurrentFrameInInterpreter() && GetCurrentOatQuickMethodHeader()->IsOptimized()) {
+      if (!IsShadowFrame() && GetCurrentOatQuickMethodHeader()->IsOptimized()) {
         CHECK(!success);
       }
 
       success = GetVReg(m, 2, kIntVReg, &value);
-      if (!IsCurrentFrameInInterpreter() && GetCurrentOatQuickMethodHeader()->IsOptimized()) {
+      if (!IsShadowFrame() && GetCurrentOatQuickMethodHeader()->IsOptimized()) {
         CHECK(!success);
       }
 
@@ -83,12 +83,12 @@
       CHECK_EQ(value, 42u);
 
       bool success = GetVRegPair(m, 2, kLongLoVReg, kLongHiVReg, &value);
-      if (!IsCurrentFrameInInterpreter() && GetCurrentOatQuickMethodHeader()->IsOptimized()) {
+      if (!IsShadowFrame() && GetCurrentOatQuickMethodHeader()->IsOptimized()) {
         CHECK(!success);
       }
 
       success = GetVRegPair(m, 4, kLongLoVReg, kLongHiVReg, &value);
-      if (!IsCurrentFrameInInterpreter() && GetCurrentOatQuickMethodHeader()->IsOptimized()) {
+      if (!IsShadowFrame() && GetCurrentOatQuickMethodHeader()->IsOptimized()) {
         CHECK(!success);
       }
 
diff --git a/test/457-regs/regs_jni.cc b/test/457-regs/regs_jni.cc
index f62a77d..f867bdf 100644
--- a/test/457-regs/regs_jni.cc
+++ b/test/457-regs/regs_jni.cc
@@ -64,7 +64,7 @@
       CHECK_EQ(value, 1u);
 
       bool success = GetVReg(m, 2, kIntVReg, &value);
-      if (!IsCurrentFrameInInterpreter() && GetCurrentOatQuickMethodHeader()->IsOptimized()) {
+      if (!IsShadowFrame() && GetCurrentOatQuickMethodHeader()->IsOptimized()) {
         CHECK(!success);
       }
 
diff --git a/test/562-no-intermediate/expected.txt b/test/478-checker-inline-noreturn/expected.txt
similarity index 100%
copy from test/562-no-intermediate/expected.txt
copy to test/478-checker-inline-noreturn/expected.txt
diff --git a/test/478-checker-inline-noreturn/info.txt b/test/478-checker-inline-noreturn/info.txt
new file mode 100644
index 0000000..64f42ed
--- /dev/null
+++ b/test/478-checker-inline-noreturn/info.txt
@@ -0,0 +1,3 @@
+Tests inlining a function with a no-exit loop into a loop. LinearOrder
+computation fails because of incorrect HLoopInformation if we inline
+a loop without an exit.
diff --git a/test/478-checker-inline-noreturn/src/Main.java b/test/478-checker-inline-noreturn/src/Main.java
new file mode 100644
index 0000000..7aaeac0
--- /dev/null
+++ b/test/478-checker-inline-noreturn/src/Main.java
@@ -0,0 +1,60 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+/*
+ * A test that checks that the inliner does not inline functions that contain
+ * a loop with no exit.  This because the incremental update to
+ * HLoopInformation done by the inliner does not work with the LinearOrder
+ * computation if the inlined function does not always return.
+ */
+
+public class Main {
+
+  public static void assertIntEquals(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  public static int $opt$noinline$Function(int x, int y) {
+    int result;
+    if (x <= y) {
+      result = 42;
+    } else {
+      while (true);
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.callerLoop(int, int) inliner (before)
+  /// CHECK:         InvokeStaticOrDirect method_name:Main.$opt$noinline$Function  loop:{{B\d+}}
+
+  /// CHECK-START: int Main.callerLoop(int, int) inliner (after)
+  /// CHECK:         InvokeStaticOrDirect method_name:Main.$opt$noinline$Function  loop:{{B\d+}}
+
+  public static int callerLoop(int max_x, int max_y) {
+    int total = 0;
+    for (int x = 0; x < max_x; ++x) {
+      total += $opt$noinline$Function(x, max_y);
+    }
+    return total;
+  }
+
+  public static void main(String[] args) {
+    assertIntEquals(42, callerLoop(1, 1));
+  }
+}
diff --git a/test/480-checker-dead-blocks/src/Main.java b/test/480-checker-dead-blocks/src/Main.java
index 141054d..0ca822f 100644
--- a/test/480-checker-dead-blocks/src/Main.java
+++ b/test/480-checker-dead-blocks/src/Main.java
@@ -30,7 +30,7 @@
     return false;
   }
 
-  /// CHECK-START: int Main.testTrueBranch(int, int) dead_code_elimination$final (before)
+  /// CHECK-START: int Main.testTrueBranch(int, int) dead_code_elimination$after_inlining (before)
   /// CHECK-DAG:     <<ArgX:i\d+>>    ParameterValue
   /// CHECK-DAG:     <<ArgY:i\d+>>    ParameterValue
   /// CHECK-DAG:                      If
@@ -39,13 +39,13 @@
   /// CHECK-DAG:     <<Phi:i\d+>>     Phi [<<Add>>,<<Sub>>]
   /// CHECK-DAG:                      Return [<<Phi>>]
 
-  /// CHECK-START: int Main.testTrueBranch(int, int) dead_code_elimination$final (after)
+  /// CHECK-START: int Main.testTrueBranch(int, int) dead_code_elimination$after_inlining (after)
   /// CHECK-DAG:     <<ArgX:i\d+>>    ParameterValue
   /// CHECK-DAG:     <<ArgY:i\d+>>    ParameterValue
   /// CHECK-DAG:     <<Add:i\d+>>     Add [<<ArgX>>,<<ArgY>>]
   /// CHECK-DAG:                      Return [<<Add>>]
 
-  /// CHECK-START: int Main.testTrueBranch(int, int) dead_code_elimination$final (after)
+  /// CHECK-START: int Main.testTrueBranch(int, int) dead_code_elimination$after_inlining (after)
   /// CHECK-NOT:                      If
   /// CHECK-NOT:                      Sub
   /// CHECK-NOT:                      Phi
@@ -62,7 +62,7 @@
     return z;
   }
 
-  /// CHECK-START: int Main.testFalseBranch(int, int) dead_code_elimination$final (before)
+  /// CHECK-START: int Main.testFalseBranch(int, int) dead_code_elimination$after_inlining (before)
   /// CHECK-DAG:     <<ArgX:i\d+>>    ParameterValue
   /// CHECK-DAG:     <<ArgY:i\d+>>    ParameterValue
   /// CHECK-DAG:                      If
@@ -71,13 +71,13 @@
   /// CHECK-DAG:     <<Phi:i\d+>>     Phi [<<Add>>,<<Sub>>]
   /// CHECK-DAG:                      Return [<<Phi>>]
 
-  /// CHECK-START: int Main.testFalseBranch(int, int) dead_code_elimination$final (after)
+  /// CHECK-START: int Main.testFalseBranch(int, int) dead_code_elimination$after_inlining (after)
   /// CHECK-DAG:     <<ArgX:i\d+>>    ParameterValue
   /// CHECK-DAG:     <<ArgY:i\d+>>    ParameterValue
   /// CHECK-DAG:     <<Sub:i\d+>>     Sub [<<ArgX>>,<<ArgY>>]
   /// CHECK-DAG:                      Return [<<Sub>>]
 
-  /// CHECK-START: int Main.testFalseBranch(int, int) dead_code_elimination$final (after)
+  /// CHECK-START: int Main.testFalseBranch(int, int) dead_code_elimination$after_inlining (after)
   /// CHECK-NOT:                      If
   /// CHECK-NOT:                      Add
   /// CHECK-NOT:                      Phi
@@ -94,10 +94,10 @@
     return z;
   }
 
-  /// CHECK-START: int Main.testRemoveLoop(int) dead_code_elimination$final (before)
+  /// CHECK-START: int Main.testRemoveLoop(int) dead_code_elimination$after_inlining (before)
   /// CHECK:                          Mul
 
-  /// CHECK-START: int Main.testRemoveLoop(int) dead_code_elimination$final (after)
+  /// CHECK-START: int Main.testRemoveLoop(int) dead_code_elimination$after_inlining (after)
   /// CHECK-NOT:                      Mul
 
   public static int testRemoveLoop(int x) {
@@ -109,11 +109,11 @@
     return x;
   }
 
-  /// CHECK-START: int Main.testInfiniteLoop(int) dead_code_elimination$final (before)
+  /// CHECK-START: int Main.testInfiniteLoop(int) dead_code_elimination$after_inlining (before)
   /// CHECK-DAG:                      Return
   /// CHECK-DAG:                      Exit
 
-  /// CHECK-START: int Main.testInfiniteLoop(int) dead_code_elimination$final (after)
+  /// CHECK-START: int Main.testInfiniteLoop(int) dead_code_elimination$after_inlining (after)
   /// CHECK-NOT:                      Return
   /// CHECK-NOT:                      Exit
 
@@ -124,15 +124,15 @@
     return x;
   }
 
-  /// CHECK-START: int Main.testDeadLoop(int) dead_code_elimination$final (before)
+  /// CHECK-START: int Main.testDeadLoop(int) dead_code_elimination$after_inlining (before)
   /// CHECK-DAG:                      If
   /// CHECK-DAG:                      Add
 
-  /// CHECK-START: int Main.testDeadLoop(int) dead_code_elimination$final (after)
+  /// CHECK-START: int Main.testDeadLoop(int) dead_code_elimination$after_inlining (after)
   /// CHECK-DAG:     <<Arg:i\d+>>     ParameterValue
   /// CHECK-DAG:                      Return [<<Arg>>]
 
-  /// CHECK-START: int Main.testDeadLoop(int) dead_code_elimination$final (after)
+  /// CHECK-START: int Main.testDeadLoop(int) dead_code_elimination$after_inlining (after)
   /// CHECK-NOT:                      If
   /// CHECK-NOT:                      Add
 
@@ -143,16 +143,16 @@
     return x;
   }
 
-  /// CHECK-START: int Main.testUpdateLoopInformation(int) dead_code_elimination$final (before)
+  /// CHECK-START: int Main.testUpdateLoopInformation(int) dead_code_elimination$after_inlining (before)
   /// CHECK-DAG:                      If
   /// CHECK-DAG:                      If
   /// CHECK-DAG:                      Add
 
-  /// CHECK-START: int Main.testUpdateLoopInformation(int) dead_code_elimination$final (after)
+  /// CHECK-START: int Main.testUpdateLoopInformation(int) dead_code_elimination$after_inlining (after)
   /// CHECK-DAG:     <<Arg:i\d+>>     ParameterValue
   /// CHECK-DAG:                      Return [<<Arg>>]
 
-  /// CHECK-START: int Main.testUpdateLoopInformation(int) dead_code_elimination$final (after)
+  /// CHECK-START: int Main.testUpdateLoopInformation(int) dead_code_elimination$after_inlining (after)
   /// CHECK-NOT:                      If
   /// CHECK-NOT:                      Add
 
@@ -165,13 +165,13 @@
     return x;
   }
 
-  /// CHECK-START: int Main.testRemoveSuspendCheck(int, int) dead_code_elimination$final (before)
+  /// CHECK-START: int Main.testRemoveSuspendCheck(int, int) dead_code_elimination$after_inlining (before)
   /// CHECK:                          SuspendCheck
   /// CHECK:                          SuspendCheck
   /// CHECK:                          SuspendCheck
   /// CHECK-NOT:                      SuspendCheck
 
-  /// CHECK-START: int Main.testRemoveSuspendCheck(int, int) dead_code_elimination$final (after)
+  /// CHECK-START: int Main.testRemoveSuspendCheck(int, int) dead_code_elimination$after_inlining (after)
   /// CHECK:                          SuspendCheck
   /// CHECK:                          SuspendCheck
   /// CHECK-NOT:                      SuspendCheck
diff --git a/test/485-checker-dce-loop-update/smali/TestCase.smali b/test/485-checker-dce-loop-update/smali/TestCase.smali
index e3617c7..cda6f73 100644
--- a/test/485-checker-dce-loop-update/smali/TestCase.smali
+++ b/test/485-checker-dce-loop-update/smali/TestCase.smali
@@ -23,7 +23,7 @@
 .end method
 
 
-## CHECK-START: int TestCase.testSingleExit(int, boolean) dead_code_elimination$final (before)
+## CHECK-START: int TestCase.testSingleExit(int, boolean) dead_code_elimination$after_inlining (before)
 ## CHECK-DAG:     <<ArgX:i\d+>>  ParameterValue
 ## CHECK-DAG:     <<ArgY:z\d+>>  ParameterValue
 ## CHECK-DAG:     <<Cst1:i\d+>>  IntConstant 1
@@ -36,7 +36,7 @@
 ## CHECK-DAG:     <<Add7>>       Add [<<PhiX>>,<<Cst7>>]                    loop:<<HeaderY>>
 ## CHECK-DAG:                    Return [<<PhiX>>]                          loop:none
 
-## CHECK-START: int TestCase.testSingleExit(int, boolean) dead_code_elimination$final (after)
+## CHECK-START: int TestCase.testSingleExit(int, boolean) dead_code_elimination$after_inlining (after)
 ## CHECK-DAG:     <<ArgX:i\d+>>  ParameterValue
 ## CHECK-DAG:     <<ArgY:z\d+>>  ParameterValue
 ## CHECK-DAG:     <<Cst7:i\d+>>  IntConstant 7
@@ -73,7 +73,7 @@
 .end method
 
 
-## CHECK-START: int TestCase.testMultipleExits(int, boolean, boolean) dead_code_elimination$final (before)
+## CHECK-START: int TestCase.testMultipleExits(int, boolean, boolean) dead_code_elimination$after_inlining (before)
 ## CHECK-DAG:     <<ArgX:i\d+>>  ParameterValue
 ## CHECK-DAG:     <<ArgY:z\d+>>  ParameterValue
 ## CHECK-DAG:     <<ArgZ:z\d+>>  ParameterValue
@@ -88,7 +88,7 @@
 ## CHECK-DAG:     <<Add7>>       Add [<<PhiX>>,<<Cst7>>]                    loop:<<HeaderY>>
 ## CHECK-DAG:                    Return [<<PhiX>>]                          loop:none
 
-## CHECK-START: int TestCase.testMultipleExits(int, boolean, boolean) dead_code_elimination$final (after)
+## CHECK-START: int TestCase.testMultipleExits(int, boolean, boolean) dead_code_elimination$after_inlining (after)
 ## CHECK-DAG:     <<ArgX:i\d+>>  ParameterValue
 ## CHECK-DAG:     <<ArgY:z\d+>>  ParameterValue
 ## CHECK-DAG:     <<ArgZ:z\d+>>  ParameterValue
@@ -129,7 +129,7 @@
 .end method
 
 
-## CHECK-START: int TestCase.testExitPredecessors(int, boolean, boolean) dead_code_elimination$final (before)
+## CHECK-START: int TestCase.testExitPredecessors(int, boolean, boolean) dead_code_elimination$after_inlining (before)
 ## CHECK-DAG:     <<ArgX:i\d+>>  ParameterValue
 ## CHECK-DAG:     <<ArgY:z\d+>>  ParameterValue
 ## CHECK-DAG:     <<ArgZ:z\d+>>  ParameterValue
@@ -146,7 +146,7 @@
 ## CHECK-DAG:     <<Add7>>       Add [<<PhiX>>,<<Cst7>>]                    loop:<<HeaderY>>
 ## CHECK-DAG:                    Return [<<SelX>>]                          loop:none
 
-## CHECK-START: int TestCase.testExitPredecessors(int, boolean, boolean) dead_code_elimination$final (after)
+## CHECK-START: int TestCase.testExitPredecessors(int, boolean, boolean) dead_code_elimination$after_inlining (after)
 ## CHECK-DAG:     <<ArgX:i\d+>>  ParameterValue
 ## CHECK-DAG:     <<ArgY:z\d+>>  ParameterValue
 ## CHECK-DAG:     <<ArgZ:z\d+>>  ParameterValue
@@ -194,7 +194,7 @@
 .end method
 
 
-## CHECK-START: int TestCase.testInnerLoop(int, boolean, boolean) dead_code_elimination$final (before)
+## CHECK-START: int TestCase.testInnerLoop(int, boolean, boolean) dead_code_elimination$after_inlining (before)
 ## CHECK-DAG:     <<ArgX:i\d+>>  ParameterValue
 ## CHECK-DAG:     <<ArgY:z\d+>>  ParameterValue
 ## CHECK-DAG:     <<ArgZ:z\d+>>  ParameterValue
@@ -217,7 +217,7 @@
 ## CHECK-DAG:     <<Add7>>       Add [<<PhiX>>,<<Cst7>>]                    loop:<<HeaderY>>
 ## CHECK-DAG:                    Return [<<PhiX>>]                          loop:none
 
-## CHECK-START: int TestCase.testInnerLoop(int, boolean, boolean) dead_code_elimination$final (after)
+## CHECK-START: int TestCase.testInnerLoop(int, boolean, boolean) dead_code_elimination$after_inlining (after)
 ## CHECK-DAG:     <<ArgX:i\d+>>  ParameterValue
 ## CHECK-DAG:     <<ArgY:z\d+>>  ParameterValue
 ## CHECK-DAG:     <<ArgZ:z\d+>>  ParameterValue
diff --git a/test/485-checker-dce-switch/src/Main.java b/test/485-checker-dce-switch/src/Main.java
index 7d5fd4f..95b1a93 100644
--- a/test/485-checker-dce-switch/src/Main.java
+++ b/test/485-checker-dce-switch/src/Main.java
@@ -20,14 +20,14 @@
     return 5;
   }
 
-  /// CHECK-START: int Main.wholeSwitchDead(int) dead_code_elimination$final (before)
+  /// CHECK-START: int Main.wholeSwitchDead(int) dead_code_elimination$after_inlining (before)
   /// CHECK-DAG:                      PackedSwitch
 
-  /// CHECK-START: int Main.wholeSwitchDead(int) dead_code_elimination$final (after)
+  /// CHECK-START: int Main.wholeSwitchDead(int) dead_code_elimination$after_inlining (after)
   /// CHECK-DAG:    <<Const100:i\d+>> IntConstant 100
   /// CHECK-DAG:                      Return [<<Const100>>]
 
-  /// CHECK-START: int Main.wholeSwitchDead(int) dead_code_elimination$final (after)
+  /// CHECK-START: int Main.wholeSwitchDead(int) dead_code_elimination$after_inlining (after)
   /// CHECK-NOT:                      PackedSwitch
 
   public static int wholeSwitchDead(int j) {
@@ -60,14 +60,14 @@
     return l;
   }
 
-  /// CHECK-START: int Main.constantSwitch_InRange() dead_code_elimination$final (before)
+  /// CHECK-START: int Main.constantSwitch_InRange() dead_code_elimination$after_inlining (before)
   /// CHECK-DAG:                      PackedSwitch
 
-  /// CHECK-START: int Main.constantSwitch_InRange() dead_code_elimination$final (after)
+  /// CHECK-START: int Main.constantSwitch_InRange() dead_code_elimination$after_inlining (after)
   /// CHECK-DAG:     <<Const7:i\d+>>  IntConstant 7
   /// CHECK-DAG:                      Return [<<Const7>>]
 
-  /// CHECK-START: int Main.constantSwitch_InRange() dead_code_elimination$final (after)
+  /// CHECK-START: int Main.constantSwitch_InRange() dead_code_elimination$after_inlining (after)
   /// CHECK-NOT:                      PackedSwitch
 
   public static int constantSwitch_InRange() {
@@ -96,14 +96,14 @@
     return i;
   }
 
-  /// CHECK-START: int Main.constantSwitch_AboveRange() dead_code_elimination$final (before)
+  /// CHECK-START: int Main.constantSwitch_AboveRange() dead_code_elimination$after_inlining (before)
   /// CHECK-DAG:                      PackedSwitch
 
-  /// CHECK-START: int Main.constantSwitch_AboveRange() dead_code_elimination$final (after)
+  /// CHECK-START: int Main.constantSwitch_AboveRange() dead_code_elimination$after_inlining (after)
   /// CHECK-DAG:     <<Const15:i\d+>> IntConstant 15
   /// CHECK-DAG:                      Return [<<Const15>>]
 
-  /// CHECK-START: int Main.constantSwitch_AboveRange() dead_code_elimination$final (after)
+  /// CHECK-START: int Main.constantSwitch_AboveRange() dead_code_elimination$after_inlining (after)
   /// CHECK-NOT:                      PackedSwitch
 
   public static int constantSwitch_AboveRange() {
@@ -132,14 +132,14 @@
     return i;
   }
 
-  /// CHECK-START: int Main.constantSwitch_BelowRange() dead_code_elimination$final (before)
+  /// CHECK-START: int Main.constantSwitch_BelowRange() dead_code_elimination$after_inlining (before)
   /// CHECK-DAG:                      PackedSwitch
 
-  /// CHECK-START: int Main.constantSwitch_BelowRange() dead_code_elimination$final (after)
+  /// CHECK-START: int Main.constantSwitch_BelowRange() dead_code_elimination$after_inlining (after)
   /// CHECK-DAG:     <<ConstM5:i\d+>> IntConstant -5
   /// CHECK-DAG:                      Return [<<ConstM5>>]
 
-  /// CHECK-START: int Main.constantSwitch_BelowRange() dead_code_elimination$final (after)
+  /// CHECK-START: int Main.constantSwitch_BelowRange() dead_code_elimination$after_inlining (after)
   /// CHECK-NOT:                      PackedSwitch
 
   public static int constantSwitch_BelowRange() {
diff --git a/test/527-checker-array-access-split/info.txt b/test/527-checker-array-access-split/info.txt
index 9206804..a39bea3 100644
--- a/test/527-checker-array-access-split/info.txt
+++ b/test/527-checker-array-access-split/info.txt
@@ -1 +1 @@
-Test arm64-specific array access optimization.
+Test arm- and arm64-specific array access optimization.
diff --git a/test/530-checker-loops3/src/Main.java b/test/530-checker-loops3/src/Main.java
index 6b5c657..209786a 100644
--- a/test/530-checker-loops3/src/Main.java
+++ b/test/530-checker-loops3/src/Main.java
@@ -246,7 +246,7 @@
 
     oneConstantIndex(a, b);
     for (int i = 0; i < a.length; i++) {
-      expectEquals(2, a[i]);;
+      expectEquals(2, a[i]);
     }
     try {
       oneConstantIndex(a, b1);
@@ -256,7 +256,7 @@
 
     multipleConstantIndices(a, b);
     for (int i = 0; i < a.length; i++) {
-      expectEquals(6, a[i]);;
+      expectEquals(6, a[i]);
     }
     try {
       multipleConstantIndices(a, b1);
@@ -266,7 +266,7 @@
 
     oneInvariantIndex(a, b, 1);
     for (int i = 0; i < a.length; i++) {
-      expectEquals(2, a[i]);;
+      expectEquals(2, a[i]);
     }
     try {
       oneInvariantIndex(a, b1, 1);
@@ -276,7 +276,7 @@
 
     multipleInvariantIndices(a, b, 1);
     for (int i = 0; i < a.length; i++) {
-      expectEquals(6, a[i]);;
+      expectEquals(6, a[i]);
     }
     try {
       multipleInvariantIndices(a, b1, 1);
@@ -286,18 +286,18 @@
 
     oneUnitStride(a, b);
     for (int i = 0; i < a.length; i++) {
-      expectEquals(i + 1, a[i]);;
+      expectEquals(i + 1, a[i]);
     }
     try {
       oneUnitStride(a, b1);
       throw new Error("Should throw AIOOBE");
     } catch (ArrayIndexOutOfBoundsException e) {
-      expectEquals(100, a[0]);;
+      expectEquals(100, a[0]);
     }
 
     multipleUnitStrides(a, b);
     for (int i = 1; i < a.length - 1; i++) {
-      expectEquals(3 * i + 3, a[i]);;
+      expectEquals(3 * i + 3, a[i]);
     }
     try {
       multipleUnitStrides(a, b1);
@@ -308,7 +308,7 @@
     multipleUnitStridesConditional(a, b);
     for (int i = 2; i < a.length - 2; i++) {
       int e = 3 * i + 3 + (((i & 1) == 0) ? i + 2 : i);
-      expectEquals(e, a[i]);;
+      expectEquals(e, a[i]);
     }
     try {
       multipleUnitStridesConditional(a, b1);
diff --git a/test/530-checker-lse/expected.txt b/test/530-checker-lse/expected.txt
index e69de29..ddae16a 100644
--- a/test/530-checker-lse/expected.txt
+++ b/test/530-checker-lse/expected.txt
@@ -0,0 +1 @@
+java.lang.ArrayIndexOutOfBoundsException: length=3; index=3
diff --git a/test/530-checker-lse/src/Main.java b/test/530-checker-lse/src/Main.java
index 6b0dedf..9f4be6c 100644
--- a/test/530-checker-lse/src/Main.java
+++ b/test/530-checker-lse/src/Main.java
@@ -18,6 +18,9 @@
   Circle(double radius) {
     this.radius = radius;
   }
+  public double getRadius() {
+    return radius;
+  }
   public double getArea() {
     return radius * radius * Math.PI;
   }
@@ -744,6 +747,44 @@
     return 1.0f;
   }
 
+  /// CHECK-START: double Main.getCircleArea(double, boolean) load_store_elimination (before)
+  /// CHECK: NewInstance
+
+  /// CHECK-START: double Main.getCircleArea(double, boolean) load_store_elimination (after)
+  /// CHECK-NOT: NewInstance
+
+  private static double getCircleArea(double radius, boolean b) {
+    double area = 0d;
+    if (b) {
+      area = new Circle(radius).getArea();
+    }
+    return area;
+  }
+
+  /// CHECK-START: double Main.testDeoptimize(int[], double[], double) load_store_elimination (before)
+  /// CHECK: Deoptimize
+  /// CHECK: NewInstance
+  /// CHECK: Deoptimize
+  /// CHECK: NewInstance
+
+  /// CHECK-START: double Main.testDeoptimize(int[], double[], double) load_store_elimination (after)
+  /// CHECK: Deoptimize
+  /// CHECK: NewInstance
+  /// CHECK: Deoptimize
+  /// CHECK-NOT: NewInstance
+
+  private static double testDeoptimize(int[] iarr, double[] darr, double radius) {
+    iarr[0] = 1;  // One HDeoptimize here. Not triggered.
+    iarr[1] = 1;
+    Circle circle1 = new Circle(radius);
+    iarr[2] = 1;
+    darr[0] = circle1.getRadius();  // One HDeoptimize here, which holds circle1 live. Triggered.
+    darr[1] = circle1.getRadius();
+    darr[2] = circle1.getRadius();
+    darr[3] = circle1.getRadius();
+    return new Circle(Math.PI).getArea();
+  }
+
   static void assertIntEquals(int result, int expected) {
     if (expected != result) {
       throw new Error("Expected: " + expected + ", found: " + result);
@@ -808,6 +849,22 @@
     assertIntEquals(sumWithinRange(array, 1, 5), 11);
     assertFloatEquals(testAllocationEliminationWithLoops(), 1.0f);
     assertFloatEquals(mF, 0f);
+    assertDoubleEquals(Math.PI * Math.PI * Math.PI, getCircleArea(Math.PI, true));
+    assertDoubleEquals(0d, getCircleArea(Math.PI, false));
+
+    int[] iarray = {0, 0, 0};
+    double[] darray = {0d, 0d, 0d};
+    try {
+      assertDoubleEquals(Math.PI * Math.PI * Math.PI, testDeoptimize(iarray, darray, Math.PI));
+    } catch (Exception e) {
+      System.out.println(e);
+    }
+    assertIntEquals(iarray[0], 1);
+    assertIntEquals(iarray[1], 1);
+    assertIntEquals(iarray[2], 1);
+    assertDoubleEquals(darray[0], Math.PI);
+    assertDoubleEquals(darray[1], Math.PI);
+    assertDoubleEquals(darray[2], Math.PI);
   }
 
   static boolean sFlag;
diff --git a/test/530-checker-lse2/expected.txt b/test/530-checker-lse2/expected.txt
new file mode 100644
index 0000000..e18fc7e
--- /dev/null
+++ b/test/530-checker-lse2/expected.txt
@@ -0,0 +1,8 @@
+Start....
+r  = 9.649776E8
+mZ = false
+mI = 0
+mJ = -576460752303423488
+mF = NaN
+mD = NaN
+Done....
diff --git a/test/530-checker-lse2/info.txt b/test/530-checker-lse2/info.txt
new file mode 100644
index 0000000..8dd3f50
--- /dev/null
+++ b/test/530-checker-lse2/info.txt
@@ -0,0 +1,2 @@
+Checker test for testing store/allocation elimination in presence of
+HDeoptimize.
diff --git a/test/530-checker-lse2/src/Main.java b/test/530-checker-lse2/src/Main.java
new file mode 100644
index 0000000..0fe3d87
--- /dev/null
+++ b/test/530-checker-lse2/src/Main.java
@@ -0,0 +1,208 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.Arrays;
+
+// Modified from a fuzz test.
+public class Main {
+
+  private interface X {
+    int x();
+  }
+
+  private class A {
+    public int a() {
+      return (+ (Math.multiplyExact(mI, mI)));
+    }
+  }
+
+  private class B extends A implements X {
+    public int a() {
+      return super.a() + ((int) (Math.max(364746077.0f, ((float) mD))));
+    }
+    public int x() {
+      return (mI >> (mI++));
+    }
+  }
+
+  private static class C implements X {
+    public static int s() {
+      return 671468641;
+    }
+    public int c() {
+      return -383762838;
+    }
+    public int x() {
+      return -138813312;
+    }
+  }
+
+  private A mA  = new B();
+  private B mB  = new B();
+  private X mBX = new B();
+  private C mC  = new C();
+  private X mCX = new C();
+
+  private boolean mZ = false;
+  private int     mI = 0;
+  private long    mJ = 0;
+  private float   mF = 0;
+  private double  mD = 0;
+
+  private boolean[] mArray = new boolean[576];
+
+  private Main() {
+    boolean a = false;
+    for (int i0 = 0; i0 < 576; i0++) {
+      mArray[i0] = a;
+      a = !a;
+    }
+  }
+
+  /// CHECK-START: float Main.testMethod() load_store_elimination (before)
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: NewInstance
+  /// CHECK-DAG: NewInstance
+  /// CHECK-DAG: NewInstance
+  /// CHECK-DAG: NewInstance
+  /// CHECK-DAG: NewInstance
+  /// CHECK-DAG: NewInstance
+  /// CHECK-DAG: NewInstance
+  /// CHECK-DAG: NewInstance
+  /// CHECK-DAG: NewInstance
+  /// CHECK-DAG: NewInstance
+  /// CHECK-DAG: NewInstance
+  /// CHECK-DAG: NewInstance
+  /// CHECK-DAG: NewInstance
+  /// CHECK-DAG: NewInstance
+  /// CHECK-NOT: NewInstance
+
+  /// CHECK-START: float Main.testMethod() load_store_elimination (after)
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-NOT: NewInstance
+
+  private float testMethod() {
+    {
+      int lI0 = (-1456058746 << mI);
+      mD = ((double)(int)(double) mD);
+      for (int i0 = 56 - 1; i0 >= 0; i0--) {
+        mArray[i0] &= (Boolean.logicalOr(((true ? ((boolean) new Boolean((mZ))) : mZ) || mArray[i0]), (mZ)));
+        mF *= (mF * mF);
+        if ((mZ ^ true)) {
+          mF *= ((float)(int)(float) 267827331.0f);
+          mZ ^= ((false & ((boolean) new Boolean(false))) | mZ);
+          for (int i1 = 576 - 1; i1 >= 0; i1--) {
+            mZ &= ((mArray[279]) | ((boolean) new Boolean(true)));
+            mD -= (--mD);
+            for (int i2 = 56 - 1; i2 >= 0; i2--) {
+              mF /= (mF - mF);
+              mI = (Math.min(((int) new Integer(mI)), (766538816 * (++mI))));
+              mF += (mZ ? (mB.a()) : ((! mZ) ? -752042357.0f : (++mF)));
+              mJ |= ((long) new Long((-2084191070L + (mJ | mJ))));
+              lI0 |= ((int) new Integer(((int) new Integer(mI))));
+              if (((boolean) new Boolean(false))) {
+                mZ &= (mZ);
+                mF *= (mF--);
+                mD = (Double.POSITIVE_INFINITY);
+                mF += ((float)(int)(float) (-2026938813.0f * 638401585.0f));
+                mJ = (--mJ);
+                for (int i3 = 56 - 1; i3 >= 0; i3--) {
+                  mI &= (- mI);
+                  mD = (--mD);
+                  mArray[426] = (mZ || false);
+                  mF -= (((this instanceof Main) ? mF : mF) + 976981405.0f);
+                  mZ &= ((mZ) & (this instanceof Main));
+                }
+                mZ ^= (Float.isFinite(-1975953895.0f));
+              } else {
+                mJ /= ((long) (Math.nextDown(-1519600008.0f)));
+                mJ <<= (Math.round(1237681786.0));
+              }
+            }
+            mArray[i0] &= (false || ((1256071300.0f != -353296391.0f) ? false : (mZ ^ mArray[i0])));
+            mF *= (+ ((float) mD));
+            for (int i2 = 0; i2 < 576; i2++) {
+              mD *= ((double) lI0);
+              lI0 = (lI0 & (Integer.MIN_VALUE));
+              mF -= (--mF);
+            }
+            if ((this instanceof Main)) {
+              mZ ^= ((boolean) new Boolean(true));
+            } else {
+              {
+                int lI1 = (mZ ? (--lI0) : 1099574344);
+                mJ >>= (Math.incrementExact(mJ));
+                mJ = (~ -2103354070L);
+              }
+            }
+          }
+        } else {
+          mJ *= (- ((long) new Long(479832084L)));
+          mJ %= (Long.MAX_VALUE);
+          mD /= (--mD);
+          if ((mI > ((mBX.x()) << mI))) {
+            {
+              long lJ0 = (mJ--);
+              mI >>>= (mBX.x());
+            }
+            mF = (+ 505094603.0f);
+            mD *= (((boolean) new Boolean((! false))) ? mD : 1808773781.0);
+            mI *= (Integer.MIN_VALUE);
+            for (int i1 = 576 - 1; i1 >= 0; i1--) {
+              if (((boolean) new Boolean(false))) {
+                mD += ((double)(float)(double) -1051436901.0);
+              } else {
+                mF -= ((float)(int)(float) (Float.min(mF, (mF--))));
+              }
+              for (int i2 = 0; i2 < 576; i2++) {
+                mJ -= ((long) new Long(-1968644857L));
+                mJ ^= (+ (mC.s()));
+              }
+            }
+          } else {
+            mF -= ((- mF) + -2145489966.0f);
+          }
+          mD -= (mD++);
+          mD = (949112777.0 * 1209996119.0);
+        }
+        mZ &= (Boolean.logicalAnd(true, ((mZ) & (((boolean) new Boolean(true)) && true))));
+      }
+    }
+    return ((float) 964977619L);
+  }
+
+  public static void main(String[] args) {
+    System.out.println("Start....");
+    Main t = new Main();
+    float r = 1883600237.0f;
+    try {
+      r = t.testMethod();
+    } catch (Exception e) {
+      // Arithmetic, null pointer, index out of bounds, etc.
+      System.out.println("An exception was caught.");
+    }
+    System.out.println("r  = " + r);
+    System.out.println("mZ = " + t.mZ);
+    System.out.println("mI = " + t.mI);
+    System.out.println("mJ = " + t.mJ);
+    System.out.println("mF = " + t.mF);
+    System.out.println("mD = " + t.mD);
+    System.out.println("Done....");
+  }
+}
+
diff --git a/test/538-checker-embed-constants/src/Main.java b/test/538-checker-embed-constants/src/Main.java
index 02c609e..0329e63 100644
--- a/test/538-checker-embed-constants/src/Main.java
+++ b/test/538-checker-embed-constants/src/Main.java
@@ -30,7 +30,7 @@
 
   /// CHECK-START-ARM: int Main.and255(int) disassembly (after)
   /// CHECK-NOT:            movs {{r\d+}}, #255
-  /// CHECK:                and {{r\d+}}, {{r\d+}}, #255
+  /// CHECK:                and {{r\d+}}, {{r\d+}}, #0xff
 
   public static int and255(int arg) {
     return arg & 255;
@@ -46,7 +46,7 @@
 
   /// CHECK-START-ARM: int Main.andNot15(int) disassembly (after)
   /// CHECK-NOT:            mvn {{r\d+}}, #15
-  /// CHECK:                bic {{r\d+}}, {{r\d+}}, #15
+  /// CHECK:                bic {{r\d+}}, {{r\d+}}, #0xf
 
   public static int andNot15(int arg) {
     return arg & ~15;
@@ -54,7 +54,7 @@
 
   /// CHECK-START-ARM: int Main.or255(int) disassembly (after)
   /// CHECK-NOT:            movs {{r\d+}}, #255
-  /// CHECK:                orr {{r\d+}}, {{r\d+}}, #255
+  /// CHECK:                orr {{r\d+}}, {{r\d+}}, #0xff
 
   public static int or255(int arg) {
     return arg | 255;
@@ -70,7 +70,7 @@
 
   /// CHECK-START-ARM: int Main.orNot15(int) disassembly (after)
   /// CHECK-NOT:            mvn {{r\d+}}, #15
-  /// CHECK:                orn {{r\d+}}, {{r\d+}}, #15
+  /// CHECK:                orn {{r\d+}}, {{r\d+}}, #0xf
 
   public static int orNot15(int arg) {
     return arg | ~15;
@@ -78,7 +78,7 @@
 
   /// CHECK-START-ARM: int Main.xor255(int) disassembly (after)
   /// CHECK-NOT:            movs {{r\d+}}, #255
-  /// CHECK:                eor {{r\d+}}, {{r\d+}}, #255
+  /// CHECK:                eor {{r\d+}}, {{r\d+}}, #0xff
 
   public static int xor255(int arg) {
     return arg ^ 255;
@@ -104,8 +104,8 @@
   /// CHECK-NOT:            movs {{r\d+}}, #255
   /// CHECK-NOT:            and{{(\.w)?}}
   /// CHECK-NOT:            bic{{(\.w)?}}
-  /// CHECK-DAG:            and {{r\d+}}, {{r\d+}}, #255
-  /// CHECK-DAG:            movs {{r\d+}}, #0
+  /// CHECK-DAG:            and {{r\d+}}, {{r\d+}}, #0xff
+  /// CHECK-DAG:            mov{{s?}} {{r\d+}}, #0
   /// CHECK-NOT:            and{{(\.w)?}}
   /// CHECK-NOT:            bic{{(\.w)?}}
 
@@ -115,7 +115,7 @@
 
   /// CHECK-START-ARM: long Main.and511(long) disassembly (after)
   /// CHECK:                mov {{r\d+}}, #511
-  /// CHECK-NEXT:           movs {{r\d+}}, #0
+  /// CHECK-NEXT:           mov{{s?}} {{r\d+}}, #0
   /// CHECK-NOT:            and{{(\.w)?}}
   /// CHECK-NOT:            bic{{(\.w)?}}
   /// CHECK:                and{{(\.w)?}} {{r\d+}}, {{r\d+}}, {{r\d+}}
@@ -131,7 +131,7 @@
   /// CHECK-NOT:            mvn {{r\d+}}, #15
   /// CHECK-NOT:            and{{(\.w)?}}
   /// CHECK-NOT:            bic{{(\.w)?}}
-  /// CHECK:                bic {{r\d+}}, {{r\d+}}, #15
+  /// CHECK:                bic {{r\d+}}, {{r\d+}}, #0xf
   /// CHECK-NOT:            and{{(\.w)?}}
   /// CHECK-NOT:            bic{{(\.w)?}}
 
@@ -144,8 +144,8 @@
   /// CHECK-NOT:            mvn {{r\d+}}, #15
   /// CHECK-NOT:            and{{(\.w)?}}
   /// CHECK-NOT:            bic{{(\.w)?}}
-  /// CHECK-DAG:            and {{r\d+}}, {{r\d+}}, #15
-  /// CHECK-DAG:            bic {{r\d+}}, {{r\d+}}, #15
+  /// CHECK-DAG:            and {{r\d+}}, {{r\d+}}, #0xf
+  /// CHECK-DAG:            bic {{r\d+}}, {{r\d+}}, #0xf
   /// CHECK-NOT:            and{{(\.w)?}}
   /// CHECK-NOT:            bic{{(\.w)?}}
 
@@ -157,7 +157,7 @@
   /// CHECK-NOT:            movs {{r\d+}}, #255
   /// CHECK-NOT:            orr{{(\.w)?}}
   /// CHECK-NOT:            orn
-  /// CHECK:                orr {{r\d+}}, {{r\d+}}, #255
+  /// CHECK:                orr {{r\d+}}, {{r\d+}}, #0xff
   /// CHECK-NOT:            orr{{(\.w)?}}
   /// CHECK-NOT:            orn
 
@@ -167,7 +167,7 @@
 
   /// CHECK-START-ARM: long Main.or511(long) disassembly (after)
   /// CHECK:                mov {{r\d+}}, #511
-  /// CHECK-NEXT:           movs {{r\d+}}, #0
+  /// CHECK-NEXT:           mov{{s?}} {{r\d+}}, #0
   /// CHECK-NOT:            orr{{(\.w)?}}
   /// CHECK-NOT:            orn
   /// CHECK:                orr{{(\.w)?}} {{r\d+}}, {{r\d+}}, {{r\d+}}
@@ -183,7 +183,7 @@
   /// CHECK-NOT:            mvn {{r\d+}}, #15
   /// CHECK-NOT:            orr{{(\.w)?}}
   /// CHECK-NOT:            orn
-  /// CHECK-DAG:            orn {{r\d+}}, {{r\d+}}, #15
+  /// CHECK-DAG:            orn {{r\d+}}, {{r\d+}}, #0xf
   /// CHECK-DAG:            mvn {{r\d+}}, #0
   /// CHECK-NOT:            orr{{(\.w)?}}
   /// CHECK-NOT:            orn
@@ -197,8 +197,8 @@
   /// CHECK-NOT:            mvn {{r\d+}}, #15
   /// CHECK-NOT:            orr{{(\.w)?}}
   /// CHECK-NOT:            orn
-  /// CHECK-DAG:            orr {{r\d+}}, {{r\d+}}, #15
-  /// CHECK-DAG:            orn {{r\d+}}, {{r\d+}}, #15
+  /// CHECK-DAG:            orr {{r\d+}}, {{r\d+}}, #0xf
+  /// CHECK-DAG:            orn {{r\d+}}, {{r\d+}}, #0xf
   /// CHECK-NOT:            orr{{(\.w)?}}
   /// CHECK-NOT:            orn
 
@@ -209,7 +209,7 @@
   /// CHECK-START-ARM: long Main.xor255(long) disassembly (after)
   /// CHECK-NOT:            movs {{r\d+}}, #255
   /// CHECK-NOT:            eor{{(\.w)?}}
-  /// CHECK:                eor {{r\d+}}, {{r\d+}}, #255
+  /// CHECK:                eor {{r\d+}}, {{r\d+}}, #0xff
   /// CHECK-NOT:            eor{{(\.w)?}}
 
   public static long xor255(long arg) {
@@ -218,7 +218,7 @@
 
   /// CHECK-START-ARM: long Main.xor511(long) disassembly (after)
   /// CHECK:                mov {{r\d+}}, #511
-  /// CHECK-NEXT:           movs {{r\d+}}, #0
+  /// CHECK-NEXT:           mov{{s?}} {{r\d+}}, #0
   /// CHECK-NOT:            eor{{(\.w)?}}
   /// CHECK:                eor{{(\.w)?}} {{r\d+}}, {{r\d+}}, {{r\d+}}
   /// CHECK-NEXT:           eor{{(\.w)?}} {{r\d+}}, {{r\d+}}, {{r\d+}}
@@ -242,7 +242,7 @@
 
   // Note: No support for partial long constant embedding.
   /// CHECK-START-ARM: long Main.xor0xfffffff00000000f(long) disassembly (after)
-  /// CHECK-DAG:            movs {{r\d+}}, #15
+  /// CHECK-DAG:            mov{{s?}} {{r\d+}}, #15
   /// CHECK-DAG:            mvn {{r\d+}}, #15
   /// CHECK-NOT:            eor{{(\.w)?}}
   /// CHECK-DAG:            eor{{(\.w)?}} {{r\d+}}, {{r\d+}}, {{r\d+}}
@@ -257,8 +257,8 @@
   /// CHECK-NOT:            movs {{r\d+}}, #15
   /// CHECK-NOT:            mov.w {{r\d+}}, #-268435456
   /// CHECK-NOT:            eor{{(\.w)?}}
-  /// CHECK-DAG:            eor {{r\d+}}, {{r\d+}}, #15
-  /// CHECK-DAG:            eor {{r\d+}}, {{r\d+}}, #4026531840
+  /// CHECK-DAG:            eor {{r\d+}}, {{r\d+}}, #0xf
+  /// CHECK-DAG:            eor {{r\d+}}, {{r\d+}}, #0xf0000000
   /// CHECK-NOT:            eor{{(\.w)?}}
 
   public static long xor0xf00000000000000f(long arg) {
@@ -507,7 +507,7 @@
   /// CHECK:     <<Arg:j\d+>>       ParameterValue
   /// CHECK:     <<ConstM1:j\d+>>   LongConstant -1
   /// CHECK:                        Add [<<Arg>>,<<ConstM1>>]
-  /// CHECK-NEXT:                   subs r{{\d+}}, #1
+  /// CHECK-NEXT:                   {{adds|subs}} r{{\d+}}, #{{4294967295|1}}
   /// CHECK-NEXT:                   adc r{{\d+}}, r{{\d+}}, #4294967295
   /// CHECK:                        Sub [<<Arg>>,<<ConstM1>>]
   /// CHECK-NEXT:                   adds r{{\d+}}, #1
diff --git a/test/543-checker-dce-trycatch/smali/TestCase.smali b/test/543-checker-dce-trycatch/smali/TestCase.smali
index 5557c7b..f50e01e 100644
--- a/test/543-checker-dce-trycatch/smali/TestCase.smali
+++ b/test/543-checker-dce-trycatch/smali/TestCase.smali
@@ -26,18 +26,18 @@
 # Test a case when one entering TryBoundary is dead but the rest of the try
 # block remains live.
 
-## CHECK-START: int TestCase.testDeadEntry(int, int, int, int) dead_code_elimination$final (before)
+## CHECK-START: int TestCase.testDeadEntry(int, int, int, int) dead_code_elimination$after_inlining (before)
 ## CHECK: Add
 
-## CHECK-START: int TestCase.testDeadEntry(int, int, int, int) dead_code_elimination$final (before)
+## CHECK-START: int TestCase.testDeadEntry(int, int, int, int) dead_code_elimination$after_inlining (before)
 ## CHECK:     TryBoundary kind:entry
 ## CHECK:     TryBoundary kind:entry
 ## CHECK-NOT: TryBoundary kind:entry
 
-## CHECK-START: int TestCase.testDeadEntry(int, int, int, int) dead_code_elimination$final (after)
+## CHECK-START: int TestCase.testDeadEntry(int, int, int, int) dead_code_elimination$after_inlining (after)
 ## CHECK-NOT: Add
 
-## CHECK-START: int TestCase.testDeadEntry(int, int, int, int) dead_code_elimination$final (after)
+## CHECK-START: int TestCase.testDeadEntry(int, int, int, int) dead_code_elimination$after_inlining (after)
 ## CHECK:     TryBoundary kind:entry
 ## CHECK-NOT: TryBoundary kind:entry
 
@@ -71,18 +71,18 @@
 # Test a case when one exiting TryBoundary is dead but the rest of the try
 # block remains live.
 
-## CHECK-START: int TestCase.testDeadExit(int, int, int, int) dead_code_elimination$final (before)
+## CHECK-START: int TestCase.testDeadExit(int, int, int, int) dead_code_elimination$after_inlining (before)
 ## CHECK: Add
 
-## CHECK-START: int TestCase.testDeadExit(int, int, int, int) dead_code_elimination$final (before)
+## CHECK-START: int TestCase.testDeadExit(int, int, int, int) dead_code_elimination$after_inlining (before)
 ## CHECK:     TryBoundary kind:exit
 ## CHECK:     TryBoundary kind:exit
 ## CHECK-NOT: TryBoundary kind:exit
 
-## CHECK-START: int TestCase.testDeadExit(int, int, int, int) dead_code_elimination$final (after)
+## CHECK-START: int TestCase.testDeadExit(int, int, int, int) dead_code_elimination$after_inlining (after)
 ## CHECK-NOT: Add
 
-## CHECK-START: int TestCase.testDeadExit(int, int, int, int) dead_code_elimination$final (after)
+## CHECK-START: int TestCase.testDeadExit(int, int, int, int) dead_code_elimination$after_inlining (after)
 ## CHECK:     TryBoundary kind:exit
 ## CHECK-NOT: TryBoundary kind:exit
 
@@ -117,21 +117,21 @@
 # Test that a catch block remains live and consistent if some of try blocks
 # throwing into it are removed.
 
-## CHECK-START: int TestCase.testOneTryBlockDead(int, int, int, int) dead_code_elimination$final (before)
+## CHECK-START: int TestCase.testOneTryBlockDead(int, int, int, int) dead_code_elimination$after_inlining (before)
 ## CHECK:     TryBoundary kind:entry
 ## CHECK:     TryBoundary kind:entry
 ## CHECK-NOT: TryBoundary kind:entry
 
-## CHECK-START: int TestCase.testOneTryBlockDead(int, int, int, int) dead_code_elimination$final (before)
+## CHECK-START: int TestCase.testOneTryBlockDead(int, int, int, int) dead_code_elimination$after_inlining (before)
 ## CHECK:     TryBoundary kind:exit
 ## CHECK:     TryBoundary kind:exit
 ## CHECK-NOT: TryBoundary kind:exit
 
-## CHECK-START: int TestCase.testOneTryBlockDead(int, int, int, int) dead_code_elimination$final (after)
+## CHECK-START: int TestCase.testOneTryBlockDead(int, int, int, int) dead_code_elimination$after_inlining (after)
 ## CHECK:     TryBoundary kind:entry
 ## CHECK-NOT: TryBoundary kind:entry
 
-## CHECK-START: int TestCase.testOneTryBlockDead(int, int, int, int) dead_code_elimination$final (after)
+## CHECK-START: int TestCase.testOneTryBlockDead(int, int, int, int) dead_code_elimination$after_inlining (after)
 ## CHECK:     TryBoundary kind:exit
 ## CHECK-NOT: TryBoundary kind:exit
 
@@ -203,7 +203,7 @@
 
 # Test that DCE removes catch phi uses of instructions defined in dead try blocks.
 
-## CHECK-START: int TestCase.testCatchPhiInputs_DefinedInTryBlock(int, int, int, int) dead_code_elimination$final (before)
+## CHECK-START: int TestCase.testCatchPhiInputs_DefinedInTryBlock(int, int, int, int) dead_code_elimination$after_inlining (before)
 ## CHECK-DAG:     <<Arg0:i\d+>>      ParameterValue
 ## CHECK-DAG:     <<Arg1:i\d+>>      ParameterValue
 ## CHECK-DAG:     <<Const0xa:i\d+>>  IntConstant 10
@@ -220,7 +220,7 @@
 ## CHECK-DAG:                        Phi [<<Add>>,<<Const0xc>>,<<Const0xe>>] reg:2 is_catch_phi:true
 ## CHECK-DAG:                        Phi [<<Select>>,<<Const0x10>>,<<Const0x11>>] reg:3 is_catch_phi:true
 
-## CHECK-START: int TestCase.testCatchPhiInputs_DefinedInTryBlock(int, int, int, int) dead_code_elimination$final (after)
+## CHECK-START: int TestCase.testCatchPhiInputs_DefinedInTryBlock(int, int, int, int) dead_code_elimination$after_inlining (after)
 ## CHECK-DAG:     <<Const0xb:i\d+>>  IntConstant 11
 ## CHECK-DAG:     <<Const0xc:i\d+>>  IntConstant 12
 ## CHECK-DAG:     <<Const0xd:i\d+>>  IntConstant 13
@@ -277,7 +277,7 @@
 # Test that DCE does not remove catch phi uses of instructions defined outside
 # dead try blocks.
 
-## CHECK-START: int TestCase.testCatchPhiInputs_DefinedOutsideTryBlock(int, int, int, int) dead_code_elimination$final (before)
+## CHECK-START: int TestCase.testCatchPhiInputs_DefinedOutsideTryBlock(int, int, int, int) dead_code_elimination$after_inlining (before)
 ## CHECK-DAG:     <<Const0xa:i\d+>> IntConstant 10
 ## CHECK-DAG:     <<Const0xb:i\d+>> IntConstant 11
 ## CHECK-DAG:     <<Const0xc:i\d+>> IntConstant 12
@@ -287,7 +287,7 @@
 ## CHECK-DAG:                       Phi [<<Const0xa>>,<<Const0xb>>,<<Const0xd>>] reg:1 is_catch_phi:true
 ## CHECK-DAG:                       Phi [<<Const0xf>>,<<Const0xc>>,<<Const0xe>>] reg:2 is_catch_phi:true
 
-## CHECK-START: int TestCase.testCatchPhiInputs_DefinedOutsideTryBlock(int, int, int, int) dead_code_elimination$final (after)
+## CHECK-START: int TestCase.testCatchPhiInputs_DefinedOutsideTryBlock(int, int, int, int) dead_code_elimination$after_inlining (after)
 ## CHECK-DAG:     <<Const0xa:i\d+>> IntConstant 10
 ## CHECK-DAG:     <<Const0xb:i\d+>> IntConstant 11
 ## CHECK-DAG:     <<Const0xc:i\d+>> IntConstant 12
diff --git a/test/543-checker-dce-trycatch/src/Main.java b/test/543-checker-dce-trycatch/src/Main.java
index 19587e7..0d7596a 100644
--- a/test/543-checker-dce-trycatch/src/Main.java
+++ b/test/543-checker-dce-trycatch/src/Main.java
@@ -35,10 +35,10 @@
   // where TryBoundary still has exception handler successors after having removed
   // some already.
 
-  /// CHECK-START: void Main.testDeadTryCatch(boolean) dead_code_elimination$final (after)
+  /// CHECK-START: void Main.testDeadTryCatch(boolean) dead_code_elimination$after_inlining (after)
   /// CHECK-NOT: TryBoundary
 
-  /// CHECK-START: void Main.testDeadTryCatch(boolean) dead_code_elimination$final (after)
+  /// CHECK-START: void Main.testDeadTryCatch(boolean) dead_code_elimination$after_inlining (after)
   /// CHECK: begin_block
   /// CHECK: begin_block
   /// CHECK: begin_block
@@ -63,4 +63,4 @@
   public static void main(String[] args) {
 
   }
-}
\ No newline at end of file
+}
diff --git a/test/552-checker-sharpening/src/Main.java b/test/552-checker-sharpening/src/Main.java
index 3c053cf..9e475ab 100644
--- a/test/552-checker-sharpening/src/Main.java
+++ b/test/552-checker-sharpening/src/Main.java
@@ -252,27 +252,27 @@
   /// CHECK-START-X86: java.lang.String Main.$noinline$getBootImageString() sharpening (after)
   // Note: load kind depends on PIC/non-PIC
   // TODO: Remove DexCacheViaMethod when read barrier config supports BootImageAddress.
-  /// CHECK:                LoadString load_kind:{{BootImageAddress|DexCachePcRelative|DexCacheViaMethod}}
+  /// CHECK:                LoadString load_kind:{{BootImageAddress|BssEntry|DexCacheViaMethod}}
 
   /// CHECK-START-X86_64: java.lang.String Main.$noinline$getBootImageString() sharpening (after)
   // Note: load kind depends on PIC/non-PIC
   // TODO: Remove DexCacheViaMethod when read barrier config supports BootImageAddress.
-  /// CHECK:                LoadString load_kind:{{BootImageAddress|DexCachePcRelative|DexCacheViaMethod}}
+  /// CHECK:                LoadString load_kind:{{BootImageAddress|BssEntry|DexCacheViaMethod}}
 
   /// CHECK-START-ARM: java.lang.String Main.$noinline$getBootImageString() sharpening (after)
   // Note: load kind depends on PIC/non-PIC
   // TODO: Remove DexCacheViaMethod when read barrier config supports BootImageAddress.
-  /// CHECK:                LoadString load_kind:{{BootImageAddress|DexCachePcRelative|DexCacheViaMethod}}
+  /// CHECK:                LoadString load_kind:{{BootImageAddress|BssEntry|DexCacheViaMethod}}
 
   /// CHECK-START-ARM64: java.lang.String Main.$noinline$getBootImageString() sharpening (after)
   // Note: load kind depends on PIC/non-PIC
   // TODO: Remove DexCacheViaMethod when read barrier config supports BootImageAddress.
-  /// CHECK:                LoadString load_kind:{{BootImageAddress|DexCachePcRelative|DexCacheViaMethod}}
+  /// CHECK:                LoadString load_kind:{{BootImageAddress|BssEntry|DexCacheViaMethod}}
 
   /// CHECK-START-MIPS: java.lang.String Main.$noinline$getBootImageString() sharpening (after)
   // Note: load kind depends on PIC/non-PIC
   // TODO: Remove DexCacheViaMethod when read barrier config supports BootImageAddress.
-  /// CHECK:                LoadString load_kind:{{BootImageAddress|DexCachePcRelative|DexCacheViaMethod}}
+  /// CHECK:                LoadString load_kind:{{BootImageAddress|BssEntry|DexCacheViaMethod}}
 
   public static String $noinline$getBootImageString() {
     // Prevent inlining to avoid the string comparison being optimized away.
@@ -303,10 +303,6 @@
   /// CHECK-START-MIPS: java.lang.String Main.$noinline$getNonBootImageString() sharpening (after)
   /// CHECK:                LoadString load_kind:BssEntry
 
-  /// CHECK-START-MIPS: java.lang.String Main.$noinline$getNonBootImageString() pc_relative_fixups_mips (after)
-  /// CHECK-DAG:            MipsComputeBaseMethodAddress
-  /// CHECK-DAG:            LoadString load_kind:BssEntry
-
   public static String $noinline$getNonBootImageString() {
     // Prevent inlining to avoid the string comparison being optimized away.
     if (doThrow) { throw new Error(); }
diff --git a/test/559-checker-irreducible-loop/smali/IrreducibleLoop.smali b/test/559-checker-irreducible-loop/smali/IrreducibleLoop.smali
index 5d4aa56..af43973 100644
--- a/test/559-checker-irreducible-loop/smali/IrreducibleLoop.smali
+++ b/test/559-checker-irreducible-loop/smali/IrreducibleLoop.smali
@@ -196,7 +196,7 @@
   const-class v0, LMain;
   if-ne v0, v2, :exit
   :other_loop_entry
-  const-class v1, LIrreducibleLoop;
+  const-class v1, Ljava/lang/Class;  # LoadClass that can throw
   goto :loop_entry
   :exit
   return-object v0
@@ -250,7 +250,7 @@
   const/4 v0, 0
   if-ne p0, v0, :other_loop_entry
   :loop_entry
-  const-class v1, LIrreducibleLoop;
+  const-class v1, Ljava/lang/Class;  # LoadClass that can throw
   if-ne v0, p0, :exit
   :other_loop_entry
   sub-int v1, p0, p0
@@ -286,7 +286,7 @@
 .method public static licm3(III)I
   .registers 4
   :loop_entry
-  const-class v0, LIrreducibleLoop;
+  const-class v0, Ljava/lang/Class;  # LoadClass that can throw
   if-ne p1, p2, :exit
   goto :loop_body
 
diff --git a/test/562-no-intermediate/expected.txt b/test/562-checker-no-intermediate/expected.txt
similarity index 100%
rename from test/562-no-intermediate/expected.txt
rename to test/562-checker-no-intermediate/expected.txt
diff --git a/test/562-checker-no-intermediate/info.txt b/test/562-checker-no-intermediate/info.txt
new file mode 100644
index 0000000..38f1f65
--- /dev/null
+++ b/test/562-checker-no-intermediate/info.txt
@@ -0,0 +1,2 @@
+Regression test for optimizing, checking that there is no
+intermediate address live across a Java call.
diff --git a/test/562-checker-no-intermediate/src/Main.java b/test/562-checker-no-intermediate/src/Main.java
new file mode 100644
index 0000000..104ba8b
--- /dev/null
+++ b/test/562-checker-no-intermediate/src/Main.java
@@ -0,0 +1,97 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  /**
+   * Check that the intermediate address computation is not reordered or merged
+   * across the call to Math.abs().
+   */
+
+  /// CHECK-START-ARM: void Main.main(java.lang.String[]) instruction_simplifier_arm (before)
+  /// CHECK-DAG:           <<ConstM42:i\d+>>      IntConstant -42
+  /// CHECK-DAG:           <<Array:l\d+>>         NullCheck
+  /// CHECK-DAG:           <<Index:i\d+>>         BoundsCheck
+  /// CHECK-DAG:           <<ArrayGet:i\d+>>      ArrayGet [<<Array>>,<<Index>>]
+  /// CHECK-DAG:           <<AbsM42:i\d+>>        InvokeStaticOrDirect [<<ConstM42>>] intrinsic:MathAbsInt
+  /// CHECK-DAG:           <<Add:i\d+>>           Add [<<ArrayGet>>,<<AbsM42>>]
+  /// CHECK-DAG:                                  ArraySet [<<Array>>,<<Index>>,<<Add>>]
+
+  /// CHECK-START-ARM: void Main.main(java.lang.String[]) instruction_simplifier_arm (after)
+  /// CHECK-DAG:           <<ConstM42:i\d+>>      IntConstant -42
+  /// CHECK-DAG:           <<DataOffset:i\d+>>    IntConstant
+  /// CHECK-DAG:           <<Array:l\d+>>         NullCheck
+  /// CHECK-DAG:           <<Index:i\d+>>         BoundsCheck
+  /// CHECK-DAG:           <<Address1:i\d+>>      IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK-DAG:           <<ArrayGet:i\d+>>      ArrayGet [<<Address1>>,<<Index>>]
+  /// CHECK-DAG:           <<AbsM42:i\d+>>        InvokeStaticOrDirect [<<ConstM42>>] intrinsic:MathAbsInt
+  /// CHECK-DAG:           <<Add:i\d+>>           Add [<<ArrayGet>>,<<AbsM42>>]
+  /// CHECK-DAG:           <<Address2:i\d+>>      IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK-DAG:                                  ArraySet [<<Address2>>,<<Index>>,<<Add>>]
+
+  /// CHECK-START-ARM: void Main.main(java.lang.String[]) GVN$after_arch (after)
+  /// CHECK-DAG:           <<ConstM42:i\d+>>      IntConstant -42
+  /// CHECK-DAG:           <<DataOffset:i\d+>>    IntConstant
+  /// CHECK-DAG:           <<Array:l\d+>>         NullCheck
+  /// CHECK-DAG:           <<Index:i\d+>>         BoundsCheck
+  /// CHECK-DAG:           <<Address1:i\d+>>      IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK-DAG:           <<ArrayGet:i\d+>>      ArrayGet [<<Address1>>,<<Index>>]
+  /// CHECK-DAG:           <<AbsM42:i\d+>>        InvokeStaticOrDirect [<<ConstM42>>] intrinsic:MathAbsInt
+  /// CHECK-DAG:           <<Add:i\d+>>           Add [<<ArrayGet>>,<<AbsM42>>]
+  /// CHECK-DAG:           <<Address2:i\d+>>      IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK-DAG:                                  ArraySet [<<Address2>>,<<Index>>,<<Add>>]
+
+
+  /// CHECK-START-ARM64: void Main.main(java.lang.String[]) instruction_simplifier_arm64 (before)
+  /// CHECK-DAG:           <<ConstM42:i\d+>>      IntConstant -42
+  /// CHECK-DAG:           <<Array:l\d+>>         NullCheck
+  /// CHECK-DAG:           <<Index:i\d+>>         BoundsCheck
+  /// CHECK-DAG:           <<ArrayGet:i\d+>>      ArrayGet [<<Array>>,<<Index>>]
+  /// CHECK-DAG:           <<AbsM42:i\d+>>        InvokeStaticOrDirect [<<ConstM42>>] intrinsic:MathAbsInt
+  /// CHECK-DAG:           <<Add:i\d+>>           Add [<<ArrayGet>>,<<AbsM42>>]
+  /// CHECK-DAG:                                  ArraySet [<<Array>>,<<Index>>,<<Add>>]
+
+  /// CHECK-START-ARM64: void Main.main(java.lang.String[]) instruction_simplifier_arm64 (after)
+  /// CHECK-DAG:           <<ConstM42:i\d+>>      IntConstant -42
+  /// CHECK-DAG:           <<DataOffset:i\d+>>    IntConstant
+  /// CHECK-DAG:           <<Array:l\d+>>         NullCheck
+  /// CHECK-DAG:           <<Index:i\d+>>         BoundsCheck
+  /// CHECK-DAG:           <<Address1:i\d+>>      IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK-DAG:           <<ArrayGet:i\d+>>      ArrayGet [<<Address1>>,<<Index>>]
+  /// CHECK-DAG:           <<AbsM42:i\d+>>        InvokeStaticOrDirect [<<ConstM42>>] intrinsic:MathAbsInt
+  /// CHECK-DAG:           <<Add:i\d+>>           Add [<<ArrayGet>>,<<AbsM42>>]
+  /// CHECK-DAG:           <<Address2:i\d+>>      IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK-DAG:                                  ArraySet [<<Address2>>,<<Index>>,<<Add>>]
+
+  /// CHECK-START-ARM64: void Main.main(java.lang.String[]) GVN$after_arch (after)
+  /// CHECK-DAG:           <<ConstM42:i\d+>>      IntConstant -42
+  /// CHECK-DAG:           <<DataOffset:i\d+>>    IntConstant
+  /// CHECK-DAG:           <<Array:l\d+>>         NullCheck
+  /// CHECK-DAG:           <<Index:i\d+>>         BoundsCheck
+  /// CHECK-DAG:           <<Address1:i\d+>>      IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK-DAG:           <<ArrayGet:i\d+>>      ArrayGet [<<Address1>>,<<Index>>]
+  /// CHECK-DAG:           <<AbsM42:i\d+>>        InvokeStaticOrDirect [<<ConstM42>>] intrinsic:MathAbsInt
+  /// CHECK-DAG:           <<Add:i\d+>>           Add [<<ArrayGet>>,<<AbsM42>>]
+  /// CHECK-DAG:           <<Address2:i\d+>>      IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK-DAG:                                  ArraySet [<<Address2>>,<<Index>>,<<Add>>]
+
+  public static void main(String[] args) {
+    array[index] += Math.abs(-42);
+  }
+
+  static int index = 0;
+  static int[] array = new int[2];
+}
diff --git a/test/562-no-intermediate/info.txt b/test/562-no-intermediate/info.txt
deleted file mode 100644
index 4f21aeb..0000000
--- a/test/562-no-intermediate/info.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-Regression test for optimizing, checking that there is no
-intermediate address between a Java call.
diff --git a/test/570-checker-osr/osr.cc b/test/570-checker-osr/osr.cc
index 50e8382..8eca6b2 100644
--- a/test/570-checker-osr/osr.cc
+++ b/test/570-checker-osr/osr.cc
@@ -43,7 +43,7 @@
           Runtime::Current()->GetJit()->GetCodeCache()->LookupOsrMethodHeader(m);
       if (header != nullptr && header == GetCurrentOatQuickMethodHeader()) {
         in_osr_method_ = true;
-      } else if (IsCurrentFrameInInterpreter()) {
+      } else if (IsShadowFrame()) {
         in_interpreter_ = true;
       }
       return false;
diff --git a/test/570-checker-osr/src/Main.java b/test/570-checker-osr/src/Main.java
index 8af3894..4de5634 100644
--- a/test/570-checker-osr/src/Main.java
+++ b/test/570-checker-osr/src/Main.java
@@ -17,26 +17,6 @@
 public class Main {
   public static void main(String[] args) {
     System.loadLibrary(args[0]);
-    Thread testThread = new Thread() {
-      public void run() {
-        performTest();
-      }
-    };
-    testThread.start();
-    try {
-      testThread.join(20 * 1000);  // 20s timeout.
-    } catch (InterruptedException ie) {
-      System.out.println("Interrupted.");
-      System.exit(1);
-    }
-    Thread.State state = testThread.getState();
-    if (state != Thread.State.TERMINATED) {
-      System.out.println("Test timed out, current state: " + state);
-      System.exit(1);
-    }
-  }
-
-  public static void performTest() {
     new SubMain();
     if ($noinline$returnInt() != 53) {
       throw new Error("Unexpected return value");
diff --git a/test/586-checker-null-array-get/src/Main.java b/test/586-checker-null-array-get/src/Main.java
index e0782bc..0ea7d34 100644
--- a/test/586-checker-null-array-get/src/Main.java
+++ b/test/586-checker-null-array-get/src/Main.java
@@ -100,7 +100,7 @@
   /// CHECK-DAG:                     Return [<<ArrayGet2>>]
   public static float test1() {
     Test1 test1 = getNullTest1();
-    Test2 test2 = getNullTest2();;
+    Test2 test2 = getNullTest2();
     int[] iarr = test1.iarr;
     float[] farr = test2.farr;
     iarr[0] = iarr[1];
diff --git a/test/611-checker-simplify-if/src/Main.java b/test/611-checker-simplify-if/src/Main.java
index 7dac007..c1d75ec 100644
--- a/test/611-checker-simplify-if/src/Main.java
+++ b/test/611-checker-simplify-if/src/Main.java
@@ -64,13 +64,13 @@
 
   // Test when the phi is the input of the if.
 
-  /// CHECK-START: void Main.testInline(java.lang.String[]) dead_code_elimination$final (before)
+  /// CHECK-START: void Main.testInline(java.lang.String[]) dead_code_elimination$after_inlining (before)
   /// CHECK-DAG: <<Const0:i\d+>>   IntConstant 0
   /// CHECK-DAG:                   If
   /// CHECK-DAG: <<Phi:i\d+>>      Phi
   /// CHECK-DAG:                   If [<<Phi>>]
 
-  /// CHECK-START: void Main.testInline(java.lang.String[]) dead_code_elimination$final (after)
+  /// CHECK-START: void Main.testInline(java.lang.String[]) dead_code_elimination$after_inlining (after)
   /// CHECK:      If
   /// CHECK-NOT:  Phi
   /// CHECK-NOT:  If
@@ -144,7 +144,7 @@
   /// CHECK-NOT:                          GreaterThanOrEqual
   /// CHECK-NOT:                          If
   public static void testGreaterCondition(String[] args) {
-    int a = 42;;
+    int a = 42;
     if (args.length == 42) {
       a = 34;
     } else {
diff --git a/test/616-cha/expected.txt b/test/616-cha/expected.txt
new file mode 100644
index 0000000..6a5618e
--- /dev/null
+++ b/test/616-cha/expected.txt
@@ -0,0 +1 @@
+JNI_OnLoad called
diff --git a/test/616-cha/info.txt b/test/616-cha/info.txt
new file mode 100644
index 0000000..50e3b0d
--- /dev/null
+++ b/test/616-cha/info.txt
@@ -0,0 +1 @@
+Test for Class Hierarchy Analysis (CHA).
diff --git a/test/616-cha/src/Main.java b/test/616-cha/src/Main.java
new file mode 100644
index 0000000..787318d
--- /dev/null
+++ b/test/616-cha/src/Main.java
@@ -0,0 +1,255 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+class Main1 {
+  String getName() {
+    return "Main1";
+  }
+
+  void printError(String msg) {
+    System.out.println(msg);
+  }
+
+  void foo(int i) {
+    if (i != 1) {
+      printError("error1");
+    }
+  }
+
+  int getValue1() {
+    return 1;
+  }
+  int getValue2() {
+    return 2;
+  }
+  int getValue3() {
+    return 3;
+  }
+  int getValue4() {
+    return 4;
+  }
+  int getValue5() {
+    return 5;
+  }
+  int getValue6() {
+    return 6;
+  }
+}
+
+class Main2 extends Main1 {
+  String getName() {
+    return "Main2";
+  }
+
+  void foo(int i) {
+    if (i != 2) {
+      printError("error2");
+    }
+  }
+}
+
+class Main3 extends Main1 {
+  String getName() {
+    return "Main3";
+  }
+}
+
+public class Main {
+  static Main1 sMain1;
+  static Main1 sMain2;
+
+  static boolean sIsOptimizing = true;
+  static boolean sHasJIT = true;
+  static volatile boolean sOtherThreadStarted;
+
+  // sMain1.foo() will be always be Main1.foo() before Main2 is loaded/linked.
+  // So sMain1.foo() can be devirtualized to Main1.foo() and be inlined.
+  // After Dummy.createMain2() which links in Main2, live testOverride() on stack
+  // should be deoptimized.
+  static void testOverride(boolean createMain2, boolean wait, boolean setHasJIT) {
+    if (setHasJIT) {
+      if (isInterpreted()) {
+        sHasJIT = false;
+      }
+      return;
+    }
+
+    if (createMain2 && (sIsOptimizing || sHasJIT)) {
+      assertIsManaged();
+    }
+
+    sMain1.foo(sMain1.getClass() == Main1.class ? 1 : 2);
+
+    if (createMain2) {
+      // Wait for the other thread to start.
+      while (!sOtherThreadStarted);
+      // Create an Main2 instance and assign it to sMain2.
+      // sMain1 is kept the same.
+      sMain2 = Dummy.createMain2();
+      // Wake up the other thread.
+      synchronized(Main.class) {
+        Main.class.notify();
+      }
+    } else if (wait) {
+      // This is the other thread.
+      synchronized(Main.class) {
+        sOtherThreadStarted = true;
+        // Wait for Main2 to be linked and deoptimization is triggered.
+        try {
+          Main.class.wait();
+        } catch (Exception e) {
+        }
+      }
+    }
+
+    // There should be a deoptimization here right after Main2 is linked by
+    // calling Dummy.createMain2(), even though sMain1 didn't change.
+    // The behavior here would be different if inline-cache is used, which
+    // doesn't deoptimize since sMain1 still hits the type cache.
+    sMain1.foo(sMain1.getClass() == Main1.class ? 1 : 2);
+    if ((createMain2 || wait) && sHasJIT && !sIsOptimizing) {
+      // This method should be deoptimized right after Main2 is created.
+      assertIsInterpreted();
+    }
+
+    if (sMain2 != null) {
+      sMain2.foo(sMain2.getClass() == Main1.class ? 1 : 2);
+    }
+  }
+
+  static Main1[] sArray;
+
+  static long calcValue(Main1 m) {
+    return m.getValue1()
+        + m.getValue2() * 2
+        + m.getValue3() * 3
+        + m.getValue4() * 4
+        + m.getValue5() * 5
+        + m.getValue6() * 6;
+  }
+
+  static long testNoOverrideLoop(int count) {
+    long sum = 0;
+    for (int i=0; i<count; i++) {
+      sum += calcValue(sArray[0]);
+      sum += calcValue(sArray[1]);
+      sum += calcValue(sArray[2]);
+    }
+    return sum;
+  }
+
+  static void testNoOverride() {
+    sArray = new Main1[3];
+    sArray[0] = new Main1();
+    sArray[1] = Dummy.createMain2();
+    sArray[2] = Dummy.createMain3();
+    long sum = 0;
+    // Loop enough to get methods JITed.
+    for (int i=0; i<100; i++) {
+      testNoOverrideLoop(1);
+    }
+    ensureJitCompiled(Main.class, "testNoOverrideLoop");
+    ensureJitCompiled(Main.class, "calcValue");
+
+    long t1 = System.currentTimeMillis();
+    sum = testNoOverrideLoop(100000);
+    long t2 = System.currentTimeMillis();
+    if (sum != 27300000L) {
+      System.out.println("Unexpected result.");
+    }
+  }
+
+  private static void assertSingleImplementation(Class<?> clazz, String method_name, boolean b) {
+    if (hasSingleImplementation(clazz, method_name) != b) {
+      System.out.println(clazz + "." + method_name +
+          " doesn't have single implementation value of " + b);
+    }
+  }
+
+  // Test scanerios under which CHA-based devirtualization happens,
+  // and class loading that overrides a method can invalidate compiled code.
+  // Also test pure non-overriding case, which is more for checking generated
+  // code form.
+  public static void main(String[] args) {
+    System.loadLibrary(args[0]);
+
+    // CHeck some boot-image methods.
+    assertSingleImplementation(java.util.ArrayList.class, "size", true);
+    // java.util.LinkedHashMap overrides get().
+    assertSingleImplementation(java.util.HashMap.class, "get", false);
+
+    // We don't set single-implementation modifier bit for final classes or methods
+    // since we can devirtualize without CHA for those cases. However hasSingleImplementation()
+    // should return true for those cases.
+    assertSingleImplementation(java.lang.String.class, "charAt", true);
+    assertSingleImplementation(java.lang.Thread.class, "join", true);
+    // We don't set single-implementation modifier bit for native methods.
+    assertSingleImplementation(java.lang.Thread.class, "isInterrupted", false);
+
+    if (isInterpreted()) {
+      sIsOptimizing = false;
+    }
+
+    // sMain1 is an instance of Main1. Main2 hasn't bee loaded yet.
+    sMain1 = new Main1();
+
+    // Loop enough to get testOverride() JITed.
+    for (int i=0; i<100; i++) {
+      testOverride(false, false, false);
+    }
+
+    ensureJitCompiled(Main.class, "testOverride");
+    testOverride(false, false, true);
+
+    if (sHasJIT && !sIsOptimizing) {
+      assertSingleImplementation(Main1.class, "foo", true);
+    } else {
+      // Main2 is verified ahead-of-time so it's linked in already.
+    }
+    assertSingleImplementation(Main1.class, "getValue1", true);
+
+    // Create another thread that also calls sMain1.foo().
+    // Try to test suspend and deopt another thread.
+    new Thread() {
+      public void run() {
+        testOverride(false, true, false);
+      }
+    }.start();
+
+    // This will create Main2 instance in the middle of testOverride().
+    testOverride(true, false, false);
+    assertSingleImplementation(Main1.class, "foo", false);
+    assertSingleImplementation(Main1.class, "getValue1", true);
+
+    testNoOverride();
+  }
+
+  private static native void ensureJitCompiled(Class<?> itf, String method_name);
+  private static native void assertIsInterpreted();
+  private static native void assertIsManaged();
+  private static native boolean isInterpreted();
+  private static native boolean hasSingleImplementation(Class<?> clazz, String method_name);
+}
+
+// Do it in another class to avoid class loading due to verifier.
+class Dummy {
+  static Main1 createMain2() {
+    return new Main2();
+  }
+  static Main1 createMain3() {
+    return new Main3();
+  }
+}
diff --git a/test/618-checker-induction/src/Main.java b/test/618-checker-induction/src/Main.java
index d8bc611..f85479a 100644
--- a/test/618-checker-induction/src/Main.java
+++ b/test/618-checker-induction/src/Main.java
@@ -92,6 +92,43 @@
     }
   }
 
+  /// CHECK-START: void Main.deadConditional(int) loop_optimization (before)
+  /// CHECK-DAG: Phi loop:{{B\d+}} outer_loop:none
+  //
+  /// CHECK-START: void Main.deadConditional(int) loop_optimization (after)
+  /// CHECK-NOT: Phi loop:{{B\d+}}
+  public static void deadConditional(int n) {
+    int k = 0;
+    int m = 0;
+    for (int i = 0; i < n; i++) {
+      if (i == 3)
+        k = i;
+      else
+        m = i;
+    }
+  }
+
+  /// CHECK-START: void Main.deadConditionalCycle(int) loop_optimization (before)
+  /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: Phi loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: Phi loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: Phi loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: Phi loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START: void Main.deadConditionalCycle(int) loop_optimization (after)
+  /// CHECK-NOT: Phi loop:{{B\d+}}
+  public static void deadConditionalCycle(int n) {
+    int k = 0;
+    int m = 0;
+    for (int i = 0; i < n; i++) {
+      if (i == 3)
+        k--;
+      else
+        m++;
+    }
+  }
+
+
   /// CHECK-START: void Main.deadInduction() loop_optimization (before)
   /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: Phi      loop:<<Loop>>      outer_loop:none
@@ -668,6 +705,8 @@
     potentialInfiniteLoop(4);
     deadNestedLoops();
     deadNestedAndFollowingLoops();
+    deadConditional(4);
+    deadConditionalCycle(4);
 
     deadInduction();
     for (int i = 0; i < a.length; i++) {
diff --git a/test/620-checker-bce-intrinsics/expected.txt b/test/620-checker-bce-intrinsics/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/620-checker-bce-intrinsics/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/620-checker-bce-intrinsics/info.txt b/test/620-checker-bce-intrinsics/info.txt
new file mode 100644
index 0000000..a868845
--- /dev/null
+++ b/test/620-checker-bce-intrinsics/info.txt
@@ -0,0 +1 @@
+Test on bounds check elimination in loops using intrinsics.
diff --git a/test/620-checker-bce-intrinsics/src/Main.java b/test/620-checker-bce-intrinsics/src/Main.java
new file mode 100644
index 0000000..afc3c65
--- /dev/null
+++ b/test/620-checker-bce-intrinsics/src/Main.java
@@ -0,0 +1,285 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Tests on bounds check elimination in loops that use intrinsics.
+ * All bounds checks below should be statically eliminated.
+ */
+public class Main {
+
+  /// CHECK-START: int Main.oneArray(int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck loop:<<Loop:B\d+>> outer_loop:none
+  //
+  /// CHECK-START: int Main.oneArray(int[]) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
+  static int oneArray(int[] a) {
+    int x = 0;
+    for (int i = 0; i < a.length; i++) {
+      x += a[i];
+    }
+    return x;
+  }
+
+  /// CHECK-START: int Main.oneArrayAbs(int[], int) BCE (before)
+  /// CHECK-DAG: BoundsCheck loop:<<Loop:B\d+>> outer_loop:none
+  //
+  /// CHECK-START: int Main.oneArrayAbs(int[], int) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
+  static int oneArrayAbs(int[] a, int lo) {
+    int x = 0;
+    for (int i = Math.abs(lo); i < a.length; i++) {
+      x += a[i];
+    }
+    return x;
+  }
+
+
+  /// CHECK-START: int Main.twoArrays(int[], int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START: int Main.twoArrays(int[], int[]) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
+  static int twoArrays(int[] a, int[] b) {
+    int x = 0;
+    for (int i = 0; i < Math.min(a.length, b.length); i++) {
+      x += a[i] + b[i];
+    }
+    return x;
+  }
+
+  /// CHECK-START: int Main.threeArrays(int[], int[], int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START: int Main.threeArrays(int[], int[], int[]) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
+  static int threeArrays(int[] a, int[] b, int[] c) {
+    int x = 0;
+    for (int i = 0; i < Math.min(Math.min(a.length, b.length), c.length); i++) {
+      x += a[i] + b[i] + c[i];
+    }
+    return x;
+  }
+
+  /// CHECK-START: int Main.fourArrays(int[], int[], int[], int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START: int Main.fourArrays(int[], int[], int[], int[]) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
+  static int fourArrays(int[] a, int[] b, int[] c, int[] d) {
+    int x = 0;
+    for (int i = 0; i < Math.min(Math.min(a.length, b.length), Math.min(c.length, d.length)); i++) {
+      x += a[i] + b[i] + c[i] + d[i];
+    }
+    return x;
+  }
+
+  /// CHECK-START: int Main.oneArrayWithCleanup(int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck loop:<<Loop1:B\d+>> outer_loop:none
+  /// CHECK-DAG: BoundsCheck loop:<<Loop2:B\d+>> outer_loop:none
+  //
+  /// CHECK-EVAL: "<<Loop1>>" != "<<Loop2>>"
+  //
+  /// CHECK-START: int Main.oneArrayWithCleanup(int[]) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
+  static int oneArrayWithCleanup(int[] a) {
+    int x = 0;
+    int n = Math.min(4, a.length);
+    for (int i = 0; i < n; i++) {
+      x += a[i];
+    }
+    for (int i = n; i < a.length; i++) {
+      x += a[i] * 10;
+    }
+    return x;
+  }
+
+  /// CHECK-START: int Main.twoArraysWithCleanup(int[], int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck loop:<<Loop1:B\d+>> outer_loop:none
+  /// CHECK-DAG: BoundsCheck loop:<<Loop1>>      outer_loop:none
+  /// CHECK-DAG: BoundsCheck loop:<<Loop2:B\d+>> outer_loop:none
+  //
+  /// CHECK-EVAL: "<<Loop1>>" != "<<Loop2>>"
+  //
+  /// CHECK-START: int Main.twoArraysWithCleanup(int[], int[]) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
+  static int twoArraysWithCleanup(int[] a, int[] b) {
+    int x = 0;
+    int n = Math.min(a.length, b.length);
+    for (int i = n - 1; i >= 0; i--) {
+      x += a[i] + b[i];
+    }
+    for (int i = n; i < a.length; i++) {
+      x += a[i];
+    }
+    return x;
+  }
+
+  /// CHECK-START: int Main.threeArraysWithCleanup(int[], int[], int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck loop:<<Loop1:B\d+>> outer_loop:none
+  /// CHECK-DAG: BoundsCheck loop:<<Loop1>>      outer_loop:none
+  /// CHECK-DAG: BoundsCheck loop:<<Loop1>>      outer_loop:none
+  /// CHECK-DAG: BoundsCheck loop:<<Loop2:B\d+>> outer_loop:none
+  //
+  /// CHECK-EVAL: "<<Loop1>>" != "<<Loop2>>"
+  //
+  /// CHECK-START: int Main.threeArraysWithCleanup(int[], int[], int[]) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
+  static int threeArraysWithCleanup(int[] a, int[] b, int[] c) {
+    int x = 0;
+    int n = Math.min(a.length, Math.min(b.length, c.length));
+    for (int i = n - 1; i >= 0; i--) {
+      x += a[i] + b[i] + c[i];
+    }
+    for (int i = n; i < a.length; i++) {
+      x += a[i];
+    }
+    return x;
+  }
+
+  /// CHECK-START: int Main.altLoopLogic(int[], int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START: int Main.altLoopLogic(int[], int[]) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
+  static int altLoopLogic(int[] a, int[] b) {
+    int x = 0;
+    int n = Math.min(a.length, b.length);
+    for (int i = n; i-- > 0;) {
+      x += a[i] + b[i];
+    }
+    return x;
+  }
+
+  /// CHECK-START: int Main.hiddenMin(int[], int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START: int Main.hiddenMin(int[], int[]) BCE (after)
+  //
+  // TODO: make this so
+  static int hiddenMin(int[] a, int[] b) {
+    int x = 0;
+    for (int i = 0; i < a.length && i < b.length; i++) {
+      x += a[i] + b[i];
+    }
+    return x;
+  }
+
+  /// CHECK-START: int Main.hiddenMinWithCleanup(int[], int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck loop:<<Loop1:B\d+>> outer_loop:none
+  /// CHECK-DAG: BoundsCheck loop:<<Loop1>>      outer_loop:none
+  /// CHECK-DAG: BoundsCheck loop:<<Loop2:B\d+>> outer_loop:none
+  //
+  /// CHECK-EVAL: "<<Loop1>>" != "<<Loop2>>"
+  //
+  /// CHECK-START: int Main.hiddenMinWithCleanup(int[], int[]) BCE (after)
+  //
+  // TODO: make this so
+  static int hiddenMinWithCleanup(int[] a, int[] b) {
+    int x = 0;
+    int i = 0;
+    for (; i < a.length && i < b.length; i++) {
+      x += a[i] + b[i];
+    }
+    for (; i < a.length; i++) {
+      x += a[i];
+    }
+    return x;
+  }
+
+  public static void main(String[] args) {
+    int[] a = { 1, 2, 3, 4, 5 };
+    int[] b = { 6, 7, 8, 9, 4, 2 };
+    int[] c = { 1, 2, 3 };
+    int[] d = { 8, 5, 3, 2 };
+
+    expectEquals(15, oneArray(a));
+    expectEquals(36, oneArray(b));
+    expectEquals(6,  oneArray(c));
+    expectEquals(18, oneArray(d));
+
+    expectEquals(5,  oneArrayAbs(a, -4));
+    expectEquals(15, oneArrayAbs(a, 0));
+    expectEquals(5,  oneArrayAbs(a, 4));
+
+    expectEquals(30, twoArrays(a, a));
+    expectEquals(49, twoArrays(a, b));
+    expectEquals(12, twoArrays(a, c));
+    expectEquals(28, twoArrays(a, d));
+
+    expectEquals(45, threeArrays(a, a, a));
+    expectEquals(33, threeArrays(a, b, c));
+    expectEquals(58, threeArrays(a, b, d));
+    expectEquals(28, threeArrays(a, c, d));
+
+    expectEquals(60, fourArrays(a, a, a, a));
+    expectEquals(49, fourArrays(a, b, c, d));
+
+    expectEquals(60, oneArrayWithCleanup(a));
+    expectEquals(90, oneArrayWithCleanup(b));
+    expectEquals(6,  oneArrayWithCleanup(c));
+    expectEquals(18, oneArrayWithCleanup(d));
+
+    expectEquals(30, twoArraysWithCleanup(a, a));
+    expectEquals(49, twoArraysWithCleanup(a, b));
+    expectEquals(21, twoArraysWithCleanup(a, c));
+    expectEquals(33, twoArraysWithCleanup(a, d));
+
+    expectEquals(45, threeArraysWithCleanup(a, a, a));
+    expectEquals(42, threeArraysWithCleanup(a, b, c));
+    expectEquals(63, threeArraysWithCleanup(a, b, d));
+    expectEquals(37, threeArraysWithCleanup(a, c, d));
+
+    expectEquals(30, altLoopLogic(a, a));
+    expectEquals(49, altLoopLogic(a, b));
+    expectEquals(12, altLoopLogic(a, c));
+    expectEquals(28, altLoopLogic(a, d));
+
+    expectEquals(30, hiddenMin(a, a));
+    expectEquals(49, hiddenMin(a, b));
+    expectEquals(12, hiddenMin(a, c));
+    expectEquals(28, hiddenMin(a, d));
+
+    expectEquals(30, hiddenMinWithCleanup(a, a));
+    expectEquals(49, hiddenMinWithCleanup(a, b));
+    expectEquals(21, hiddenMinWithCleanup(a, c));
+    expectEquals(33, hiddenMinWithCleanup(a, d));
+
+    System.out.println("passed");
+  }
+
+  private static void expectEquals(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+}
diff --git a/test/622-checker-bce-regressions/expected.txt b/test/622-checker-bce-regressions/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/622-checker-bce-regressions/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/622-checker-bce-regressions/info.txt b/test/622-checker-bce-regressions/info.txt
new file mode 100644
index 0000000..a753dfa
--- /dev/null
+++ b/test/622-checker-bce-regressions/info.txt
@@ -0,0 +1 @@
+Regression tests on BCE.
diff --git a/test/622-checker-bce-regressions/src/Main.java b/test/622-checker-bce-regressions/src/Main.java
new file mode 100644
index 0000000..6ba2644
--- /dev/null
+++ b/test/622-checker-bce-regressions/src/Main.java
@@ -0,0 +1,55 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Regression tests for BCE.
+ */
+public class Main {
+
+  static int[] array = new int[10];
+
+  /// CHECK-START: int Main.doNotVisitAfterForwardBCE(int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START: int Main.doNotVisitAfterForwardBCE(int[]) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  static int doNotVisitAfterForwardBCE(int[] a) {
+    if (a == null) {
+      throw new Error("Null");
+    }
+    int k = 0;
+    int j = 0;
+    for (int i = 1; i < 10; i++) {
+      j = i - 1;
+      // b/32547652: after DCE, bounds checks become consecutive,
+      // and second should not be revisited after forward BCE.
+      k = a[i] + a[i - 1];
+    }
+    return j;
+  }
+
+  public static void main(String[] args) {
+    expectEquals(8, doNotVisitAfterForwardBCE(array));
+    System.out.println("passed");
+  }
+
+  private static void expectEquals(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+}
diff --git a/test/562-no-intermediate/expected.txt b/test/622-simplifyifs-exception-edges/expected.txt
similarity index 100%
copy from test/562-no-intermediate/expected.txt
copy to test/622-simplifyifs-exception-edges/expected.txt
diff --git a/test/622-simplifyifs-exception-edges/info.txt b/test/622-simplifyifs-exception-edges/info.txt
new file mode 100644
index 0000000..58c4bfb
--- /dev/null
+++ b/test/622-simplifyifs-exception-edges/info.txt
@@ -0,0 +1,2 @@
+Regression test for the SimplifyIfs() graph simplification erroneously trying
+to redirect exception handler edges.
\ No newline at end of file
diff --git a/test/622-simplifyifs-exception-edges/smali/Test.smali b/test/622-simplifyifs-exception-edges/smali/Test.smali
new file mode 100644
index 0000000..5e91258
--- /dev/null
+++ b/test/622-simplifyifs-exception-edges/smali/Test.smali
@@ -0,0 +1,76 @@
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LTest;
+
+.super Ljava/lang/Object;
+
+.method public static test([I)I
+    .locals 2
+    const/4 v0, 0
+    :try1_begin
+    array-length v1, p0
+    :try1_end
+    add-int/lit8 v0, v1, -1
+    :try2_begin
+    aget v0, p0, v0
+    :try2_end
+    :end
+    return v0
+
+    :catch_all
+    # Regression test for bug 32545860:
+    #     SimplifyIfs() would have redirected exception handler edges leading here.
+    # Note: There is no move-exception here to prevent matching the SimplifyIfs() pattern.
+    if-eqz v0, :is_zero
+    const/4 v0, -1
+    goto :end
+    :is_zero
+    const/4 v0, -2
+    goto :end
+
+    .catchall {:try1_begin .. :try1_end } :catch_all
+    .catchall {:try2_begin .. :try2_end } :catch_all
+.end method
+
+.method public static test2([II)I
+    .locals 3
+    move v0, p1
+    :try_begin
+    array-length v1, p0
+    add-int/lit8 v1, v1, -1
+    add-int/lit8 v0, v0, 1
+    aget v1, p0, v1
+    const/4 v0, 2
+    aget v2, p0, p1
+    const/4 v0, 3
+    :try_end
+    :end
+    return v0
+
+    :catch_all
+    # Regression test for bug 32546110:
+    #     SimplifyIfs() would have looked at predecessors of this block based on the indexes
+    #     of the catch Phi's inputs. For catch blocks these two arrays are unrelated, so
+    #     this caused out-of-range access triggering a DCHECK() in dchecked_vector<>.
+    # Note: There is no move-exception here to prevent matching the SimplifyIfs() pattern.
+    if-eqz v0, :is_zero
+    const/4 v0, -1
+    goto :end
+    :is_zero
+    const/4 v0, -2
+    goto :end
+
+    .catchall {:try_begin .. :try_end } :catch_all
+.end method
diff --git a/test/622-simplifyifs-exception-edges/src/Main.java b/test/622-simplifyifs-exception-edges/src/Main.java
new file mode 100644
index 0000000..636f047
--- /dev/null
+++ b/test/622-simplifyifs-exception-edges/src/Main.java
@@ -0,0 +1,43 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Method;
+import java.lang.reflect.InvocationTargetException;
+
+public class Main {
+    public static void main(String[] args) throws Exception {
+        Class<?> c = Class.forName("Test");
+        Method test = c.getDeclaredMethod("test", int[].class);
+        assertIntEquals(-2, (int)test.invoke(null, new Object[] { null }));
+        assertIntEquals(-1, (int)test.invoke(null, new Object[] { new int[0] }));
+        assertIntEquals(42, (int)test.invoke(null, new Object[] { new int[] { 42 } }));
+
+        Method test2 = c.getDeclaredMethod("test2", int[].class, int.class);
+        assertIntEquals(-2, (int)test2.invoke(null, new Object[] { null, 0 }));
+        assertIntEquals(-1, (int)test2.invoke(null, new Object[] { new int[0], 0 }));
+        assertIntEquals(-1, (int)test2.invoke(null, new Object[] { new int[0], 1 }));
+        assertIntEquals(3, (int)test2.invoke(null, new Object[] { new int[] { 42 }, 0 }));
+    }
+
+    public static void assertIntEquals(int expected, int result) {
+        if (expected != result) {
+            throw new Error("Expected: " + expected + ", found: " + result);
+        }
+    }
+
+    // Workaround for non-zero field ids offset in dex file with no fields. Bug: 18051191
+    static final boolean dummy = false;
+}
diff --git a/test/623-checker-loop-regressions/expected.txt b/test/623-checker-loop-regressions/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/623-checker-loop-regressions/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/623-checker-loop-regressions/info.txt b/test/623-checker-loop-regressions/info.txt
new file mode 100644
index 0000000..6271600
--- /dev/null
+++ b/test/623-checker-loop-regressions/info.txt
@@ -0,0 +1 @@
+Regression tests on loop optimizations.
diff --git a/test/623-checker-loop-regressions/src/Main.java b/test/623-checker-loop-regressions/src/Main.java
new file mode 100644
index 0000000..ce5bda1
--- /dev/null
+++ b/test/623-checker-loop-regressions/src/Main.java
@@ -0,0 +1,109 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Regression tests for loop optimizations.
+ */
+public class Main {
+
+  /// CHECK-START: int Main.earlyExitFirst(int) loop_optimization (before)
+  /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: Phi loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START: int Main.earlyExitFirst(int) loop_optimization (after)
+  /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: Phi loop:<<Loop>>      outer_loop:none
+  static int earlyExitFirst(int m) {
+    int k = 0;
+    for (int i = 0; i < 10; i++) {
+      if (i == m) {
+        return k;
+      }
+      k++;
+    }
+    return k;
+  }
+
+  /// CHECK-START: int Main.earlyExitLast(int) loop_optimization (before)
+  /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: Phi loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START: int Main.earlyExitLast(int) loop_optimization (after)
+  /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: Phi loop:<<Loop>>      outer_loop:none
+  static int earlyExitLast(int m) {
+    int k = 0;
+    for (int i = 0; i < 10; i++) {
+      k++;
+      if (i == m) {
+        return k;
+      }
+    }
+    return k;
+  }
+
+  /// CHECK-START: int Main.earlyExitNested() loop_optimization (before)
+  /// CHECK-DAG: Phi loop:<<Loop1:B\d+>> outer_loop:none
+  /// CHECK-DAG: Phi loop:<<Loop1>>      outer_loop:none
+  /// CHECK-DAG: Phi loop:<<Loop2:B\d+>> outer_loop:<<Loop1>>
+  /// CHECK-DAG: Phi loop:<<Loop2>>      outer_loop:<<Loop1>>
+  //
+  /// CHECK-START: int Main.earlyExitNested() loop_optimization (after)
+  /// CHECK-DAG: Phi loop:<<Loop1:B\d+>> outer_loop:none
+  /// CHECK-DAG: Phi loop:<<Loop1>>      outer_loop:none
+  //
+  /// CHECK-START: int Main.earlyExitNested() loop_optimization (after)
+  /// CHECK-NOT: Phi loop:{{B\d+}} outer_loop:{{B\d+}}
+  static int earlyExitNested() {
+    int offset = 0;
+    for (int i = 0; i < 2; i++) {
+      int start = offset;
+      // This loop can be removed.
+      for (int j = 0; j < 2; j++) {
+        offset++;
+      }
+      if (i == 1) {
+        return start;
+      }
+    }
+    return 0;
+  }
+
+  public static void main(String[] args) {
+    expectEquals(10, earlyExitFirst(-1));
+    for (int i = 0; i <= 10; i++) {
+      expectEquals(i, earlyExitFirst(i));
+    }
+    expectEquals(10, earlyExitFirst(11));
+
+    expectEquals(10, earlyExitLast(-1));
+    for (int i = 0; i < 10; i++) {
+      expectEquals(i + 1, earlyExitLast(i));
+    }
+    expectEquals(10, earlyExitLast(10));
+    expectEquals(10, earlyExitLast(11));
+
+    expectEquals(2, earlyExitNested());
+
+    System.out.println("passed");
+  }
+
+  private static void expectEquals(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+}
diff --git a/test/624-checker-stringops/expected.txt b/test/624-checker-stringops/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/624-checker-stringops/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/624-checker-stringops/info.txt b/test/624-checker-stringops/info.txt
new file mode 100644
index 0000000..64344ac
--- /dev/null
+++ b/test/624-checker-stringops/info.txt
@@ -0,0 +1 @@
+Verify some properties of string operations represented by intrinsics.
diff --git a/test/624-checker-stringops/src/Main.java b/test/624-checker-stringops/src/Main.java
new file mode 100644
index 0000000..d965e3f
--- /dev/null
+++ b/test/624-checker-stringops/src/Main.java
@@ -0,0 +1,292 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Tests properties of some string operations represented by intrinsics.
+ */
+public class Main {
+
+  static final String ABC = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
+  static final String XYZ = "XYZ";
+
+  //
+  // Variant intrinsics remain in the loop, but invariant references are hoisted out of the loop.
+  //
+  /// CHECK-START: int Main.liveIndexOf() licm (before)
+  /// CHECK-DAG: InvokeVirtual intrinsic:StringIndexOf            loop:{{B\d+}} outer_loop:none
+  /// CHECK-DAG: InvokeVirtual intrinsic:StringIndexOfAfter       loop:{{B\d+}} outer_loop:none
+  /// CHECK-DAG: InvokeVirtual intrinsic:StringStringIndexOf      loop:{{B\d+}} outer_loop:none
+  /// CHECK-DAG: InvokeVirtual intrinsic:StringStringIndexOfAfter loop:{{B\d+}} outer_loop:none
+  //
+  /// CHECK-START: int Main.liveIndexOf() licm (after)
+  /// CHECK-DAG: InvokeVirtual intrinsic:StringIndexOf            loop:{{B\d+}} outer_loop:none
+  /// CHECK-DAG: InvokeVirtual intrinsic:StringIndexOfAfter       loop:{{B\d+}} outer_loop:none
+  /// CHECK-DAG: InvokeVirtual intrinsic:StringStringIndexOf      loop:none
+  /// CHECK-DAG: InvokeVirtual intrinsic:StringStringIndexOfAfter loop:none
+  static int liveIndexOf() {
+    int k = ABC.length() + XYZ.length();  // does LoadString before loops
+    for (char c = 'A'; c <= 'Z'; c++) {
+      k += ABC.indexOf(c);
+    }
+    for (char c = 'A'; c <= 'Z'; c++) {
+      k += ABC.indexOf(c, 4);
+    }
+    for (char c = 'A'; c <= 'Z'; c++) {
+      k += ABC.indexOf(XYZ);
+    }
+    for (char c = 'A'; c <= 'Z'; c++) {
+      k += ABC.indexOf(XYZ, 2);
+    }
+    return k;
+  }
+
+  //
+  // All dead intrinsics can be removed completely.
+  //
+  /// CHECK-START: int Main.deadIndexOf() dead_code_elimination$initial (before)
+  /// CHECK-DAG: InvokeVirtual intrinsic:StringIndexOf            loop:{{B\d+}} outer_loop:none
+  /// CHECK-DAG: InvokeVirtual intrinsic:StringIndexOfAfter       loop:{{B\d+}} outer_loop:none
+  /// CHECK-DAG: InvokeVirtual intrinsic:StringStringIndexOf      loop:{{B\d+}} outer_loop:none
+  /// CHECK-DAG: InvokeVirtual intrinsic:StringStringIndexOfAfter loop:{{B\d+}} outer_loop:none
+  //
+  /// CHECK-START: int Main.deadIndexOf() dead_code_elimination$initial (after)
+  /// CHECK-NOT: InvokeVirtual intrinsic:StringIndexOf
+  /// CHECK-NOT: InvokeVirtual intrinsic:StringIndexOfAfter
+  /// CHECK-NOT: InvokeVirtual intrinsic:StringStringIndexOf
+  /// CHECK-NOT: InvokeVirtual intrinsic:StringStringIndexOfAfter
+  static int deadIndexOf() {
+    int k = ABC.length() + XYZ.length();  // does LoadString before loops
+    for (char c = 'A'; c <= 'Z'; c++) {
+      int d = ABC.indexOf(c);
+    }
+    for (char c = 'A'; c <= 'Z'; c++) {
+      int d = ABC.indexOf(c, 4);
+    }
+    for (char c = 'A'; c <= 'Z'; c++) {
+      int d = ABC.indexOf(XYZ);
+    }
+    for (char c = 'A'; c <= 'Z'; c++) {
+      int d = ABC.indexOf(XYZ, 2);
+    }
+    return k;
+  }
+
+  //
+  // Explicit null check on receiver, implicit null check on argument prevents hoisting.
+  //
+  /// CHECK-START: int Main.indexOfExceptions(java.lang.String, java.lang.String) licm (after)
+  /// CHECK-DAG: <<String:l\d+>> NullCheck                                                         loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG:                 InvokeVirtual [<<String>>,{{l\d+}}] intrinsic:StringStringIndexOf loop:<<Loop>>      outer_loop:none
+  static int indexOfExceptions(String s, String t) {
+    int k = 0;
+    for (char c = 'A'; c <= 'Z'; c++) {
+      k += s.indexOf(t);
+    }
+    return k;
+  }
+
+  //
+  // Allows combining of returned "this". Also ensures that similar looking append() calls
+  // are not combined somehow through returned result.
+  //
+  /// CHECK-START: int Main.bufferLen2() instruction_simplifier (before)
+  /// CHECK-DAG: <<New:l\d+>>     NewInstance
+  /// CHECK-DAG: <<String1:l\d+>> LoadString
+  /// CHECK-DAG: <<Append1:l\d+>> InvokeVirtual [<<New>>,<<String1>>]   intrinsic:StringBufferAppend
+  /// CHECK-DAG: <<String2:l\d+>> LoadString
+  /// CHECK-DAG: <<Null1:l\d+>>   NullCheck     [<<Append1>>]
+  /// CHECK-DAG: <<Append2:l\d+>> InvokeVirtual [<<Null1>>,<<String2>>] intrinsic:StringBufferAppend
+  /// CHECK-DAG: <<Null2:l\d+>>   NullCheck     [<<Append2>>]
+  /// CHECK-DAG:                  InvokeVirtual [<<Null2>>]             intrinsic:StringBufferLength
+  //
+  /// CHECK-START: int Main.bufferLen2() instruction_simplifier (after)
+  /// CHECK-DAG: <<New:l\d+>>     NewInstance
+  /// CHECK-DAG: <<String1:l\d+>> LoadString
+  /// CHECK-DAG: <<Append1:l\d+>> InvokeVirtual [<<New>>,<<String1>>] intrinsic:StringBufferAppend
+  /// CHECK-DAG: <<String2:l\d+>> LoadString
+  /// CHECK-DAG: <<Append2:l\d+>> InvokeVirtual [<<New>>,<<String2>>] intrinsic:StringBufferAppend
+  /// CHECK-DAG:                  InvokeVirtual [<<New>>]             intrinsic:StringBufferLength
+  static int bufferLen2() {
+    StringBuffer s = new StringBuffer();
+    return s.append("x").append("x").length();
+  }
+
+  //
+  // Allows combining of returned "this". Also ensures that similar looking append() calls
+  // are not combined somehow through returned result.
+  //
+  /// CHECK-START: int Main.builderLen2() instruction_simplifier (before)
+  /// CHECK-DAG: <<New:l\d+>>     NewInstance
+  /// CHECK-DAG: <<String1:l\d+>> LoadString
+  /// CHECK-DAG: <<Append1:l\d+>> InvokeVirtual [<<New>>,<<String1>>]   intrinsic:StringBuilderAppend
+  /// CHECK-DAG: <<String2:l\d+>> LoadString
+  /// CHECK-DAG: <<Null2:l\d+>>   NullCheck     [<<Append1>>]
+  /// CHECK-DAG: <<Append2:l\d+>> InvokeVirtual [<<Null2>>,<<String2>>] intrinsic:StringBuilderAppend
+  /// CHECK-DAG: <<Null3:l\d+>>   NullCheck     [<<Append2>>]
+  /// CHECK-DAG:                  InvokeVirtual [<<Null3>>]             intrinsic:StringBuilderLength
+  //
+  /// CHECK-START: int Main.builderLen2() instruction_simplifier (after)
+  /// CHECK-DAG: <<New:l\d+>>     NewInstance
+  /// CHECK-DAG: <<String1:l\d+>> LoadString
+  /// CHECK-DAG: <<Append1:l\d+>> InvokeVirtual [<<New>>,<<String1>>] intrinsic:StringBuilderAppend
+  /// CHECK-DAG: <<String2:l\d+>> LoadString
+  /// CHECK-DAG: <<Append2:l\d+>> InvokeVirtual [<<New>>,<<String2>>] intrinsic:StringBuilderAppend
+  /// CHECK-DAG:                  InvokeVirtual [<<New>>]             intrinsic:StringBuilderLength
+  static int builderLen2() {
+    StringBuilder s = new StringBuilder();
+    return s.append("x").append("x").length();
+  }
+
+  //
+  // Similar situation in a loop.
+  //
+  /// CHECK-START: int Main.bufferLoopAppender() instruction_simplifier (before)
+  /// CHECK-DAG: <<New:l\d+>>     NewInstance                                                         loop:none
+  /// CHECK-DAG: <<String1:l\d+>> LoadString                                                          loop:<<Loop:B\d+>>
+  /// CHECK-DAG: <<Null1:l\d+>>   NullCheck     [<<New>>]                                             loop:<<Loop>>
+  /// CHECK-DAG: <<Append1:l\d+>> InvokeVirtual [<<Null1>>,<<String1>>] intrinsic:StringBufferAppend  loop:<<Loop>>
+  /// CHECK-DAG: <<String2:l\d+>> LoadString                                                          loop:<<Loop>>
+  /// CHECK-DAG: <<Null2:l\d+>>   NullCheck     [<<Append1>>]                                         loop:<<Loop>>
+  /// CHECK-DAG: <<Append2:l\d+>> InvokeVirtual [<<Null2>>,<<String2>>] intrinsic:StringBufferAppend  loop:<<Loop>>
+  /// CHECK-DAG: <<String3:l\d+>> LoadString                                                          loop:<<Loop>>
+  /// CHECK-DAG: <<Null3:l\d+>>   NullCheck     [<<Append2>>]                                         loop:<<Loop>>
+  /// CHECK-DAG: <<Append3:l\d+>> InvokeVirtual [<<Null3>>,<<String3>>] intrinsic:StringBufferAppend  loop:<<Loop>>
+  /// CHECK-DAG: <<Null4:l\d+>>   NullCheck     [<<New>>]                                             loop:none
+  /// CHECK-DAG:                  InvokeVirtual [<<Null4>>]             intrinsic:StringBufferLength  loop:none
+  //
+  /// CHECK-START: int Main.bufferLoopAppender() instruction_simplifier (after)
+  /// CHECK-DAG: <<New:l\d+>>     NewInstance                                                       loop:none
+  /// CHECK-DAG: <<String1:l\d+>> LoadString                                                        loop:<<Loop:B\d+>>
+  /// CHECK-DAG: <<Append1:l\d+>> InvokeVirtual [<<New>>,<<String1>>] intrinsic:StringBufferAppend  loop:<<Loop>>
+  /// CHECK-DAG: <<String2:l\d+>> LoadString                                                        loop:<<Loop>>
+  /// CHECK-DAG: <<Append2:l\d+>> InvokeVirtual [<<New>>,<<String2>>] intrinsic:StringBufferAppend  loop:<<Loop>>
+  /// CHECK-DAG: <<String3:l\d+>> LoadString                                                        loop:<<Loop>>
+  /// CHECK-DAG: <<Append3:l\d+>> InvokeVirtual [<<New>>,<<String3>>] intrinsic:StringBufferAppend  loop:<<Loop>>
+  /// CHECK-DAG:                  InvokeVirtual [<<New>>]             intrinsic:StringBufferLength  loop:none
+  static int bufferLoopAppender() {
+    StringBuffer b = new StringBuffer();
+    for (int i = 0; i < 10; i++) {
+      b.append("x").append("y").append("z");
+    }
+    return b.length();
+  }
+
+  //
+  // Similar situation in a loop.
+  //
+  /// CHECK-START: int Main.builderLoopAppender() instruction_simplifier (before)
+  /// CHECK-DAG: <<New:l\d+>>     NewInstance                                                         loop:none
+  /// CHECK-DAG: <<String1:l\d+>> LoadString                                                          loop:<<Loop:B\d+>>
+  /// CHECK-DAG: <<Null1:l\d+>>   NullCheck     [<<New>>]                                             loop:<<Loop>>
+  /// CHECK-DAG: <<Append1:l\d+>> InvokeVirtual [<<Null1>>,<<String1>>] intrinsic:StringBuilderAppend loop:<<Loop>>
+  /// CHECK-DAG: <<String2:l\d+>> LoadString                                                          loop:<<Loop>>
+  /// CHECK-DAG: <<Null2:l\d+>>   NullCheck     [<<Append1>>]                                         loop:<<Loop>>
+  /// CHECK-DAG: <<Append2:l\d+>> InvokeVirtual [<<Null2>>,<<String2>>] intrinsic:StringBuilderAppend loop:<<Loop>>
+  /// CHECK-DAG: <<String3:l\d+>> LoadString                                                          loop:<<Loop>>
+  /// CHECK-DAG: <<Null3:l\d+>>   NullCheck     [<<Append2>>]                                         loop:<<Loop>>
+  /// CHECK-DAG: <<Append3:l\d+>> InvokeVirtual [<<Null3>>,<<String3>>] intrinsic:StringBuilderAppend loop:<<Loop>>
+  /// CHECK-DAG: <<Null4:l\d+>>   NullCheck     [<<New>>]                                             loop:none
+  /// CHECK-DAG:                  InvokeVirtual [<<Null4>>]             intrinsic:StringBuilderLength loop:none
+  //
+  /// CHECK-START: int Main.builderLoopAppender() instruction_simplifier (after)
+  /// CHECK-DAG: <<New:l\d+>>     NewInstance                                                       loop:none
+  /// CHECK-DAG: <<String1:l\d+>> LoadString                                                        loop:<<Loop:B\d+>>
+  /// CHECK-DAG: <<Append1:l\d+>> InvokeVirtual [<<New>>,<<String1>>] intrinsic:StringBuilderAppend loop:<<Loop>>
+  /// CHECK-DAG: <<String2:l\d+>> LoadString                                                        loop:<<Loop>>
+  /// CHECK-DAG: <<Append2:l\d+>> InvokeVirtual [<<New>>,<<String2>>] intrinsic:StringBuilderAppend loop:<<Loop>>
+  /// CHECK-DAG: <<String3:l\d+>> LoadString                                                        loop:<<Loop>>
+  /// CHECK-DAG: <<Append3:l\d+>> InvokeVirtual [<<New>>,<<String3>>] intrinsic:StringBuilderAppend loop:<<Loop>>
+  /// CHECK-DAG:                  InvokeVirtual [<<New>>]             intrinsic:StringBuilderLength loop:none
+  static int builderLoopAppender() {
+    StringBuilder b = new StringBuilder();
+    for (int i = 0; i < 10; i++) {
+      b.append("x").append("y").append("z");
+    }
+    return b.length();
+  }
+
+  //
+  // All calls in the loop-body and thus loop can be eliminated.
+  //
+  /// CHECK-START: int Main.bufferDeadLoop() instruction_simplifier (before)
+  /// CHECK-DAG: Phi                                              loop:<<Loop:B\d+>>
+  /// CHECK-DAG: InvokeVirtual intrinsic:StringBufferToString     loop:<<Loop>>
+  /// CHECK-DAG: InvokeVirtual intrinsic:StringStringIndexOfAfter loop:<<Loop>>
+  //
+  /// CHECK-START: int Main.bufferDeadLoop() loop_optimization (after)
+  /// CHECK-NOT: Phi
+  /// CHECK-NOT: InvokeVirtual intrinsic:StringBufferToString
+  /// CHECK-NOT: InvokeVirtual intrinsic:StringStringIndexOfAfter
+  static int bufferDeadLoop() {
+    StringBuffer b = new StringBuffer();
+    for (int i = 0; i < 10; i++) {
+      int d = b.toString().indexOf("x", 1);
+    }
+    return b.length();
+  }
+
+  //
+  // All calls in the loop-body and thus loop can be eliminated.
+  //
+  /// CHECK-START: int Main.builderDeadLoop() instruction_simplifier (before)
+  /// CHECK-DAG: Phi                                              loop:<<Loop:B\d+>>
+  /// CHECK-DAG: InvokeVirtual intrinsic:StringBuilderToString    loop:<<Loop>>
+  /// CHECK-DAG: InvokeVirtual intrinsic:StringStringIndexOfAfter loop:<<Loop>>
+  //
+  /// CHECK-START: int Main.builderDeadLoop() loop_optimization (after)
+  /// CHECK-NOT: Phi
+  /// CHECK-NOT: InvokeVirtual intrinsic:StringBuilderToString
+  /// CHECK-NOT: InvokeVirtual intrinsic:StringStringIndexOfAfter
+  static int builderDeadLoop() {
+    StringBuilder b = new StringBuilder();
+    for (int i = 0; i < 10; i++) {
+      int d = b.toString().indexOf("x", 1);
+    }
+    return b.length();
+  }
+
+  public static void main(String[] args) {
+    expectEquals(1865, liveIndexOf());
+    expectEquals(29, deadIndexOf());
+
+    try {
+      indexOfExceptions(null, XYZ);
+      throw new Error("Expected: NPE");
+    } catch (NullPointerException e) {
+    }
+    try {
+      indexOfExceptions(ABC, null);
+      throw new Error("Expected: NPE");
+    } catch (NullPointerException e) {
+    }
+    expectEquals(598, indexOfExceptions(ABC, XYZ));
+
+    expectEquals(2, bufferLen2());
+    expectEquals(2, builderLen2());
+    expectEquals(30, bufferLoopAppender());
+    expectEquals(30, builderLoopAppender());
+    expectEquals(0, bufferDeadLoop());
+    expectEquals(0, builderDeadLoop());
+
+    System.out.println("passed");
+  }
+
+  private static void expectEquals(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+}
diff --git a/test/625-checker-licm-regressions/expected.txt b/test/625-checker-licm-regressions/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/625-checker-licm-regressions/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/625-checker-licm-regressions/info.txt b/test/625-checker-licm-regressions/info.txt
new file mode 100644
index 0000000..10480df
--- /dev/null
+++ b/test/625-checker-licm-regressions/info.txt
@@ -0,0 +1 @@
+Regression tests on LICM.
diff --git a/test/625-checker-licm-regressions/src/Main.java b/test/625-checker-licm-regressions/src/Main.java
new file mode 100644
index 0000000..f372b1c
--- /dev/null
+++ b/test/625-checker-licm-regressions/src/Main.java
@@ -0,0 +1,135 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Regression tests for LICM.
+ */
+public class Main {
+
+  static int sA;
+
+  //
+  // We cannot hoist the null check (can throw) above the field
+  // assignment (has write side effects) because that would result
+  // in throwing an exception before the assignment is done.
+  //
+  /// CHECK-START: void Main.foo(int[]) licm (before)
+  /// CHECK-DAG: LoadClass      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: StaticFieldSet loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: NullCheck      loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: ArrayLength    loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START: void Main.foo(int[]) licm (after)
+  /// CHECK-DAG: LoadClass      loop:none
+  /// CHECK-DAG: StaticFieldSet loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: NullCheck      loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: ArrayLength    loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START: void Main.foo(int[]) licm (after)
+  /// CHECK-NOT: LoadClass      loop:{{B\d+}} outer_loop:none
+  static void foo(int[] arr) {
+    int j = 0;
+    do {
+      sA = 1;
+    } while (j < arr.length);
+  }
+
+  //
+  // Similar situation as in foo(), but now a proper induction value
+  // is assigned to the field inside the do-while loop.
+  //
+  /// CHECK-START: void Main.bar(int[]) licm (before)
+  /// CHECK-DAG: LoadClass      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: StaticFieldSet loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: NullCheck      loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: ArrayLength    loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START: void Main.bar(int[]) licm (after)
+  /// CHECK-DAG: LoadClass      loop:none
+  /// CHECK-DAG: StaticFieldSet loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: NullCheck      loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: ArrayLength    loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START: void Main.bar(int[]) licm (after)
+  /// CHECK-NOT: LoadClass      loop:{{B\d+}} outer_loop:none
+  static void bar(int[] arr) {
+    int j = 0;
+    do {
+      j++;
+      sA = j;
+    } while (j < arr.length);
+  }
+
+  //
+  // Similar situation as in bar(), but now an explicit catch
+  // statement may need the latest value of local j.
+  //
+  /// CHECK-START: int Main.catcher(int[]) licm (before)
+  /// CHECK-DAG: NullCheck   loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: ArrayLength loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START: int Main.catcher(int[]) licm (after)
+  /// CHECK-DAG: NullCheck   loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: ArrayLength loop:<<Loop>>      outer_loop:none
+  static int catcher(int[] arr) {
+    int j = 0;
+    try {
+      do {
+        j++;
+      } while (j < arr.length);
+    } catch (NullPointerException e) {
+      return -j;  // flag exception with negative value
+    }
+    return j;
+  }
+
+  public static void main(String[] args) {
+    sA = 0;
+    try {
+      foo(null);
+      throw new Error("Expected NPE");
+    } catch (NullPointerException e) {
+    }
+    expectEquals(1, sA);
+
+    sA = 0;
+    try {
+      bar(null);
+      throw new Error("Expected NPE");
+    } catch (NullPointerException e) {
+    }
+    expectEquals(1, sA);
+
+    for (int i = 0; i < 5; i++) {
+      sA = 0;
+      bar(new int[i]);
+      expectEquals(i == 0 ? 1 : i, sA);
+    }
+
+    expectEquals(-1, catcher(null));
+    for (int i = 0; i < 5; i++) {
+      expectEquals(i == 0 ? 1 : i, catcher(new int[i]));
+    }
+
+    System.out.println("passed");
+  }
+
+  private static void expectEquals(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+}
diff --git a/test/626-set-resolved-string/expected.txt b/test/626-set-resolved-string/expected.txt
new file mode 100644
index 0000000..f4983b5
--- /dev/null
+++ b/test/626-set-resolved-string/expected.txt
@@ -0,0 +1,2 @@
+JNI_OnLoad called
+foo
diff --git a/test/626-set-resolved-string/info.txt b/test/626-set-resolved-string/info.txt
new file mode 100644
index 0000000..e3a512f
--- /dev/null
+++ b/test/626-set-resolved-string/info.txt
@@ -0,0 +1,3 @@
+Test that even if Java code calls DexCache.setResolvedString and does
+not strongly intern the given string, the JIT will ensure that the
+strings it references are strongly interned.
diff --git a/test/626-set-resolved-string/src/Main.java b/test/626-set-resolved-string/src/Main.java
new file mode 100644
index 0000000..868b9d1
--- /dev/null
+++ b/test/626-set-resolved-string/src/Main.java
@@ -0,0 +1,48 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Method;
+
+public class Main {
+  public static void main(String[] args) {
+    System.loadLibrary(args[0]);
+
+    // Get all methods. We cannot call getDeclaredMethod("foo") as
+    // that would make "foo" a strong root.
+    Method[] methods = Main.class.getDeclaredMethods();
+
+    // Call getName on the methods, which is implemented by using the dex
+    // cache and  calling setResolvedString.
+    for (int i = 0; i < methods.length; i++) {
+      methods[i].getName();
+    }
+
+    // Compile Main.foo. "foo" needs to be a strong root for JIT compilation.
+    // We stress test this:
+    //   - avoid strongly interning "foo" by doing "f" + "oo"
+    //   - call GC so that weaks can be collected.
+    //   - invoke foo() to make sure "foo" hasn't been collected.
+    ensureJitCompiled(Main.class, "f" + "oo");
+    Runtime.getRuntime().gc();
+    foo();
+  }
+
+  public static void foo() {
+    System.out.println("foo");
+  }
+
+  public static native void ensureJitCompiled(Class cls, String method_name);
+}
diff --git a/test/628-vdex/expected.txt b/test/628-vdex/expected.txt
new file mode 100644
index 0000000..d0f61f6
--- /dev/null
+++ b/test/628-vdex/expected.txt
@@ -0,0 +1,2 @@
+In foo
+In foo
diff --git a/test/562-no-intermediate/expected.txt b/test/628-vdex/info.txt
similarity index 100%
copy from test/562-no-intermediate/expected.txt
copy to test/628-vdex/info.txt
diff --git a/test/103-string-append/run b/test/628-vdex/run
old mode 100755
new mode 100644
similarity index 77%
rename from test/103-string-append/run
rename to test/628-vdex/run
index e27a622..f1b0a95
--- a/test/103-string-append/run
+++ b/test/628-vdex/run
@@ -1,6 +1,6 @@
 #!/bin/bash
 #
-# Copyright (C) 2012 The Android Open Source Project
+# Copyright (C) 2016 The Android Open Source Project
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -14,5 +14,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# As this is a performance test we always use the non-debug build.
-exec ${RUN} "${@/#libartd.so/libart.so}"
+exec ${RUN} --vdex "${@}"
diff --git a/test/562-no-intermediate/src/Main.java b/test/628-vdex/src/Main.java
similarity index 68%
rename from test/562-no-intermediate/src/Main.java
rename to test/628-vdex/src/Main.java
index 3b74d6f..7ceab2c 100644
--- a/test/562-no-intermediate/src/Main.java
+++ b/test/628-vdex/src/Main.java
@@ -15,13 +15,23 @@
  */
 
 public class Main {
-
-  /// CHECK-START-ARM64: int Main.main(String[]) register_allocator (after)
-  /// CHECK-NOT: IntermediateAddress
-  public static void main(String[] args) {
-    array[index] += Math.cos(42);
+  Main() {
+    // Will be quickened with RETURN_VOID_NO_BARRIER.
   }
 
-  static int index = 0;
-  static double[] array = new double[2];
+  public static void main(String[] args) {
+    Main m = new Main();
+    Object o = m;
+    // The call and field accesses will be quickened.
+    m.foo(m.a);
+
+    // The checkcast will be quickened.
+    m.foo(((Main)o).a);
+  }
+
+  int a;
+  void foo(int a) {
+    System.out.println("In foo");
+  }
 }
+
diff --git a/test/902-hello-transformation/expected.txt b/test/902-hello-transformation/expected.txt
index e86e814..a826f93 100644
--- a/test/902-hello-transformation/expected.txt
+++ b/test/902-hello-transformation/expected.txt
@@ -1,3 +1,3 @@
-Hello
+hello
 modifying class 'Transform'
 Goodbye
diff --git a/test/902-hello-transformation/run b/test/902-hello-transformation/run
index 204e4cc..3755d1d 100755
--- a/test/902-hello-transformation/run
+++ b/test/902-hello-transformation/run
@@ -39,5 +39,6 @@
                    --experimental runtime-plugins \
                    --runtime-option -agentpath:${agent}=902-hello-transformation,${arg} \
                    --android-runtime-option -Xplugin:${plugin} \
+                   --android-runtime-option -Xfully-deoptable \
                    ${other_args} \
                    --args ${lib}
diff --git a/test/902-hello-transformation/src/Transform.java b/test/902-hello-transformation/src/Transform.java
index dc0a0c4..8e8af35 100644
--- a/test/902-hello-transformation/src/Transform.java
+++ b/test/902-hello-transformation/src/Transform.java
@@ -16,6 +16,13 @@
 
 class Transform {
   public void sayHi() {
-    System.out.println("Hello");
+    // Use lower 'h' to make sure the string will have a different string id
+    // than the transformation (the transformation code is the same except
+    // the actual printed String, which was making the test inacurately passing
+    // in JIT mode when loading the string from the dex cache, as the string ids
+    // of the two different strings were the same).
+    // We know the string ids will be different because lexicographically:
+    // "Goodbye" < "LTransform;" < "hello".
+    System.out.println("hello");
   }
 }
diff --git a/test/902-hello-transformation/transform.cc b/test/902-hello-transformation/transform.cc
index 5b0d219..3369dd4 100644
--- a/test/902-hello-transformation/transform.cc
+++ b/test/902-hello-transformation/transform.cc
@@ -23,6 +23,7 @@
 #include "base/logging.h"
 #include "jni.h"
 #include "openjdkjvmti/jvmti.h"
+#include "ti-agent/common_helper.h"
 #include "ti-agent/common_load.h"
 #include "utils.h"
 
@@ -132,15 +133,13 @@
 jint OnLoad(JavaVM* vm,
             char* options,
             void* reserved ATTRIBUTE_UNUSED) {
-  jvmtiCapabilities caps;
   RuntimeIsJvm = (strcmp("jvm", options) == 0);
   if (vm->GetEnv(reinterpret_cast<void**>(&jvmti_env), JVMTI_VERSION_1_0)) {
     printf("Unable to get jvmti env!\n");
     return 1;
   }
+  SetAllCapabilities(jvmti_env);
   if (IsJVM()) {
-    jvmti_env->GetPotentialCapabilities(&caps);
-    jvmti_env->AddCapabilities(&caps);
     jvmtiEventCallbacks cbs;
     memset(&cbs, 0, sizeof(cbs));
     cbs.ClassFileLoadHook = transformationHook;
diff --git a/test/903-hello-tagging/tagging.cc b/test/903-hello-tagging/tagging.cc
index bed4e5d..1557d45 100644
--- a/test/903-hello-tagging/tagging.cc
+++ b/test/903-hello-tagging/tagging.cc
@@ -28,6 +28,7 @@
 #include "art_method-inl.h"
 #include "base/logging.h"
 #include "openjdkjvmti/jvmti.h"
+#include "ti-agent/common_helper.h"
 #include "ti-agent/common_load.h"
 #include "utils.h"
 
@@ -145,6 +146,7 @@
     printf("Unable to get jvmti env!\n");
     return 1;
   }
+  SetAllCapabilities(jvmti_env);
   return 0;
 }
 
diff --git a/test/904-object-allocation/tracking.cc b/test/904-object-allocation/tracking.cc
index 57bfed5..9261a9f 100644
--- a/test/904-object-allocation/tracking.cc
+++ b/test/904-object-allocation/tracking.cc
@@ -26,6 +26,7 @@
 #include "openjdkjvmti/jvmti.h"
 #include "ScopedLocalRef.h"
 #include "ScopedUtfChars.h"
+#include "ti-agent/common_helper.h"
 #include "ti-agent/common_load.h"
 #include "utils.h"
 
@@ -95,6 +96,7 @@
     return 1;
   }
   jvmti_env->SetEventNotificationMode(JVMTI_ENABLE, JVMTI_EVENT_VM_OBJECT_ALLOC, nullptr);
+  SetAllCapabilities(jvmti_env);
   return 0;
 }
 
diff --git a/test/905-object-free/tracking_free.cc b/test/905-object-free/tracking_free.cc
index b41a914..fc43acc 100644
--- a/test/905-object-free/tracking_free.cc
+++ b/test/905-object-free/tracking_free.cc
@@ -26,6 +26,7 @@
 #include "openjdkjvmti/jvmti.h"
 #include "ScopedLocalRef.h"
 #include "ScopedUtfChars.h"
+#include "ti-agent/common_helper.h"
 #include "ti-agent/common_load.h"
 #include "utils.h"
 
@@ -87,6 +88,7 @@
     printf("Unable to get jvmti env!\n");
     return 1;
   }
+  SetAllCapabilities(jvmti_env);
   return 0;
 }
 
diff --git a/test/906-iterate-heap/iterate_heap.cc b/test/906-iterate-heap/iterate_heap.cc
index ab1d8d8..8dac89d 100644
--- a/test/906-iterate-heap/iterate_heap.cc
+++ b/test/906-iterate-heap/iterate_heap.cc
@@ -25,6 +25,7 @@
 #include "jni.h"
 #include "openjdkjvmti/jvmti.h"
 #include "ScopedPrimitiveArray.h"
+#include "ti-agent/common_helper.h"
 #include "ti-agent/common_load.h"
 
 namespace art {
@@ -180,6 +181,7 @@
     printf("Unable to get jvmti env!\n");
     return 1;
   }
+  SetAllCapabilities(jvmti_env);
   return 0;
 }
 
diff --git a/test/907-get-loaded-classes/get_loaded_classes.cc b/test/907-get-loaded-classes/get_loaded_classes.cc
index 0e09d1b..afbb774 100644
--- a/test/907-get-loaded-classes/get_loaded_classes.cc
+++ b/test/907-get-loaded-classes/get_loaded_classes.cc
@@ -27,6 +27,7 @@
 #include "ScopedLocalRef.h"
 #include "ScopedUtfChars.h"
 
+#include "ti-agent/common_helper.h"
 #include "ti-agent/common_load.h"
 
 namespace art {
@@ -50,28 +51,14 @@
     return nullptr;
   }
 
-  ScopedLocalRef<jclass> obj_class(env, env->FindClass("java/lang/String"));
-  if (obj_class.get() == nullptr) {
-    return nullptr;
-  }
-
-  jobjectArray ret = env->NewObjectArray(count, obj_class.get(), nullptr);
-  if (ret == nullptr) {
-    return ret;
-  }
-
-  for (size_t i = 0; i < static_cast<size_t>(count); ++i) {
+  auto callback = [&](jint i) {
     jstring class_name = GetClassName(env, classes[i]);
-    env->SetObjectArrayElement(ret, static_cast<jint>(i), class_name);
-    env->DeleteLocalRef(class_name);
-  }
-
-  // Need to:
-  // 1) Free the local references.
-  // 2) Deallocate.
-  for (size_t i = 0; i < static_cast<size_t>(count); ++i) {
     env->DeleteLocalRef(classes[i]);
-  }
+    return class_name;
+  };
+  jobjectArray ret = CreateObjectArray(env, count, "java/lang/String", callback);
+
+  // Need to Deallocate.
   jvmti_env->Deallocate(reinterpret_cast<unsigned char*>(classes));
 
   return ret;
@@ -85,6 +72,7 @@
     printf("Unable to get jvmti env!\n");
     return 1;
   }
+  SetAllCapabilities(jvmti_env);
   return 0;
 }
 
diff --git a/test/908-gc-start-finish/gc_callbacks.cc b/test/908-gc-start-finish/gc_callbacks.cc
index d546513..771d1ad 100644
--- a/test/908-gc-start-finish/gc_callbacks.cc
+++ b/test/908-gc-start-finish/gc_callbacks.cc
@@ -22,6 +22,7 @@
 #include "base/macros.h"
 #include "jni.h"
 #include "openjdkjvmti/jvmti.h"
+#include "ti-agent/common_helper.h"
 #include "ti-agent/common_load.h"
 
 namespace art {
@@ -98,6 +99,7 @@
     printf("Unable to get jvmti env!\n");
     return 1;
   }
+  SetAllCapabilities(jvmti_env);
   return 0;
 }
 
diff --git a/test/910-methods/methods.cc b/test/910-methods/methods.cc
index cc6ad67..3ed91d7 100644
--- a/test/910-methods/methods.cc
+++ b/test/910-methods/methods.cc
@@ -23,6 +23,7 @@
 #include "openjdkjvmti/jvmti.h"
 #include "ScopedLocalRef.h"
 
+#include "ti-agent/common_helper.h"
 #include "ti-agent/common_load.h"
 
 namespace art {
@@ -43,23 +44,16 @@
     return nullptr;
   }
 
-  ScopedLocalRef<jclass> obj_class(env, env->FindClass("java/lang/String"));
-  if (obj_class.get() == nullptr) {
-    return nullptr;
-  }
-
-  jobjectArray ret = env->NewObjectArray(3, obj_class.get(), nullptr);
-  if (ret == nullptr) {
-    return ret;
-  }
-
-  ScopedLocalRef<jstring> name_str(env, name == nullptr ? nullptr : env->NewStringUTF(name));
-  ScopedLocalRef<jstring> sig_str(env, sig == nullptr ? nullptr : env->NewStringUTF(sig));
-  ScopedLocalRef<jstring> gen_str(env, gen == nullptr ? nullptr : env->NewStringUTF(gen));
-
-  env->SetObjectArrayElement(ret, 0, name_str.get());
-  env->SetObjectArrayElement(ret, 1, sig_str.get());
-  env->SetObjectArrayElement(ret, 2, gen_str.get());
+  auto callback = [&](jint i) {
+    if (i == 0) {
+      return name == nullptr ? nullptr : env->NewStringUTF(name);
+    } else if (i == 1) {
+      return sig == nullptr ? nullptr : env->NewStringUTF(sig);
+    } else {
+      return gen == nullptr ? nullptr : env->NewStringUTF(gen);
+    }
+  };
+  jobjectArray ret = CreateObjectArray(env, 3, "java/lang/String", callback);
 
   // Need to deallocate the strings.
   if (name != nullptr) {
@@ -72,6 +66,15 @@
     jvmti_env->Deallocate(reinterpret_cast<unsigned char*>(gen));
   }
 
+  // Also run GetMethodName with all parameter pointers null to check for segfaults.
+  jvmtiError result2 = jvmti_env->GetMethodName(id, nullptr, nullptr, nullptr);
+  if (result2 != JVMTI_ERROR_NONE) {
+    char* err;
+    jvmti_env->GetErrorName(result2, &err);
+    printf("Failure running GetMethodName(null, null, null): %s\n", err);
+    return nullptr;
+  }
+
   return ret;
 }
 
@@ -115,6 +118,7 @@
     printf("Unable to get jvmti env!\n");
     return 1;
   }
+  SetAllCapabilities(jvmti_env);
   return 0;
 }
 
diff --git a/test/911-get-stack-trace/stack_trace.cc b/test/911-get-stack-trace/stack_trace.cc
index da649cf..e7d9380 100644
--- a/test/911-get-stack-trace/stack_trace.cc
+++ b/test/911-get-stack-trace/stack_trace.cc
@@ -23,6 +23,7 @@
 #include "jni.h"
 #include "openjdkjvmti/jvmti.h"
 #include "ScopedLocalRef.h"
+#include "ti-agent/common_helper.h"
 #include "ti-agent/common_load.h"
 
 namespace art {
@@ -33,39 +34,36 @@
   std::unique_ptr<jvmtiFrameInfo[]> frames(new jvmtiFrameInfo[max]);
 
   jint count;
-  jvmtiError result = jvmti_env->GetStackTrace(thread, start, max, frames.get(), &count);
-  if (result != JVMTI_ERROR_NONE) {
-    char* err;
-    jvmti_env->GetErrorName(result, &err);
-    printf("Failure running GetStackTrace: %s\n", err);
-    return nullptr;
+  {
+    jvmtiError result = jvmti_env->GetStackTrace(thread, start, max, frames.get(), &count);
+    if (result != JVMTI_ERROR_NONE) {
+      char* err;
+      jvmti_env->GetErrorName(result, &err);
+      printf("Failure running GetStackTrace: %s\n", err);
+      return nullptr;
+    }
   }
 
-  ScopedLocalRef<jclass> obj_class(env, env->FindClass("java/lang/String"));
-  if (obj_class.get() == nullptr) {
-    return nullptr;
-  }
-
-  jobjectArray ret = env->NewObjectArray(2 * count, obj_class.get(), nullptr);
-  if (ret == nullptr) {
-    return ret;
-  }
-
-  for (size_t i = 0; i < static_cast<size_t>(count); ++i) {
+  auto callback = [&](jint i) -> jstring {
+    size_t method_index = static_cast<size_t>(i) / 2;
     char* name;
     char* sig;
     char* gen;
-    jvmtiError result2 = jvmti_env->GetMethodName(frames[i].method, &name, &sig, &gen);
-    if (result2 != JVMTI_ERROR_NONE) {
-      char* err;
-      jvmti_env->GetErrorName(result, &err);
-      printf("Failure running GetMethodName: %s\n", err);
-      return nullptr;
+    {
+      jvmtiError result2 = jvmti_env->GetMethodName(frames[method_index].method, &name, &sig, &gen);
+      if (result2 != JVMTI_ERROR_NONE) {
+        char* err;
+        jvmti_env->GetErrorName(result2, &err);
+        printf("Failure running GetMethodName: %s\n", err);
+        return nullptr;
+      }
     }
-    ScopedLocalRef<jstring> trace_name(env, name == nullptr ? nullptr : env->NewStringUTF(name));
-    ScopedLocalRef<jstring> trace_sig(env, sig == nullptr ? nullptr : env->NewStringUTF(sig));
-    env->SetObjectArrayElement(ret, static_cast<jint>(2 * i), trace_name.get());
-    env->SetObjectArrayElement(ret, static_cast<jint>(2 * i + 1), trace_sig.get());
+    jstring callback_result;
+    if (i % 2 == 0) {
+      callback_result = name == nullptr ? nullptr : env->NewStringUTF(name);
+    } else {
+      callback_result = sig == nullptr ? nullptr : env->NewStringUTF(sig);
+    }
 
     if (name != nullptr) {
       jvmti_env->Deallocate(reinterpret_cast<unsigned char*>(name));
@@ -76,9 +74,9 @@
     if (gen != nullptr) {
       jvmti_env->Deallocate(reinterpret_cast<unsigned char*>(gen));
     }
-  }
-
-  return ret;
+    return callback_result;
+  };
+  return CreateObjectArray(env, 2 * count, "java/lang/String", callback);
 }
 
 // Don't do anything
@@ -89,6 +87,7 @@
     printf("Unable to get jvmti env!\n");
     return 1;
   }
+  SetAllCapabilities(jvmti_env);
   return 0;
 }
 
diff --git a/test/912-classes/classes.cc b/test/912-classes/classes.cc
index 4bf329c..838a92a 100644
--- a/test/912-classes/classes.cc
+++ b/test/912-classes/classes.cc
@@ -23,6 +23,7 @@
 #include "openjdkjvmti/jvmti.h"
 #include "ScopedLocalRef.h"
 
+#include "ti-agent/common_helper.h"
 #include "ti-agent/common_load.h"
 
 namespace art {
@@ -40,21 +41,14 @@
     return nullptr;
   }
 
-  ScopedLocalRef<jclass> obj_class(env, env->FindClass("java/lang/String"));
-  if (obj_class.get() == nullptr) {
-    return nullptr;
-  }
-
-  jobjectArray ret = env->NewObjectArray(2, obj_class.get(), nullptr);
-  if (ret == nullptr) {
-    return ret;
-  }
-
-  ScopedLocalRef<jstring> sig_str(env, sig == nullptr ? nullptr : env->NewStringUTF(sig));
-  ScopedLocalRef<jstring> gen_str(env, gen == nullptr ? nullptr : env->NewStringUTF(gen));
-
-  env->SetObjectArrayElement(ret, 0, sig_str.get());
-  env->SetObjectArrayElement(ret, 1, gen_str.get());
+  auto callback = [&](jint i) {
+    if (i == 0) {
+      return sig == nullptr ? nullptr : env->NewStringUTF(sig);
+    } else {
+      return gen == nullptr ? nullptr : env->NewStringUTF(gen);
+    }
+  };
+  jobjectArray ret = CreateObjectArray(env, 2, "java/lang/String", callback);
 
   // Need to deallocate the strings.
   if (sig != nullptr) {
@@ -75,6 +69,7 @@
     printf("Unable to get jvmti env!\n");
     return 1;
   }
+  SetAllCapabilities(jvmti_env);
   return 0;
 }
 
diff --git a/test/103-string-append/run b/test/913-heaps/build
similarity index 70%
copy from test/103-string-append/run
copy to test/913-heaps/build
index e27a622..898e2e5 100755
--- a/test/103-string-append/run
+++ b/test/913-heaps/build
@@ -1,12 +1,12 @@
 #!/bin/bash
 #
-# Copyright (C) 2012 The Android Open Source Project
+# Copyright 2016 The Android Open Source Project
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
-#     http://www.apache.org/licenses/LICENSE-2.0
+#      http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
@@ -14,5 +14,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# As this is a performance test we always use the non-debug build.
-exec ${RUN} "${@/#libartd.so/libart.so}"
+./default-build "$@" --experimental agents
diff --git a/test/913-heaps/expected.txt b/test/913-heaps/expected.txt
new file mode 100644
index 0000000..8002cfa
--- /dev/null
+++ b/test/913-heaps/expected.txt
@@ -0,0 +1,92 @@
+---
+true true
+root@root --(stack-local)--> 1@1000 [size=16, length=-1]
+root@root --(stack-local)--> 3000@0 [size=132, length=-1]
+root@root --(thread)--> 3000@0 [size=132, length=-1]
+0@0 --(array-element@0)--> 1@1000 [size=16, length=-1]
+1001@0 --(superclass)--> 1000@0 [size=123, length=-1]
+1002@0 --(interface)--> 2001@0 [size=132, length=-1]
+1002@0 --(superclass)--> 1001@0 [size=123, length=-1]
+1@1000 --(class)--> 1000@0 [size=123, length=-1]
+1@1000 --(field@12)--> 3@1001 [size=24, length=-1]
+1@1000 --(field@8)--> 2@1000 [size=16, length=-1]
+2001@0 --(interface)--> 2000@0 [size=132, length=-1]
+2@1000 --(class)--> 1000@0 [size=123, length=-1]
+3@1001 --(class)--> 1001@0 [size=123, length=-1]
+3@1001 --(field@16)--> 4@1000 [size=16, length=-1]
+3@1001 --(field@20)--> 5@1002 [size=32, length=-1]
+4@1000 --(class)--> 1000@0 [size=123, length=-1]
+5@1002 --(class)--> 1002@0 [size=123, length=-1]
+5@1002 --(field@24)--> 6@1000 [size=16, length=-1]
+5@1002 --(field@28)--> 1@1000 [size=16, length=-1]
+6@1000 --(class)--> 1000@0 [size=123, length=-1]
+---
+root@root --(stack-local)--> 1@1000 [size=16, length=-1]
+root@root --(stack-local)--> 2@1000 [size=16, length=-1]
+root@root --(stack-local)--> 3000@0 [size=132, length=-1]
+root@root --(thread)--> 2@1000 [size=16, length=-1]
+root@root --(thread)--> 3000@0 [size=132, length=-1]
+0@0 --(array-element@0)--> 1@1000 [size=16, length=-1]
+1001@0 --(superclass)--> 1000@0 [size=123, length=-1]
+1002@0 --(interface)--> 2001@0 [size=132, length=-1]
+1002@0 --(superclass)--> 1001@0 [size=123, length=-1]
+1@1000 --(class)--> 1000@0 [size=123, length=-1]
+1@1000 --(field@12)--> 3@1001 [size=24, length=-1]
+1@1000 --(field@8)--> 2@1000 [size=16, length=-1]
+2001@0 --(interface)--> 2000@0 [size=132, length=-1]
+2@1000 --(class)--> 1000@0 [size=123, length=-1]
+3@1001 --(class)--> 1001@0 [size=123, length=-1]
+3@1001 --(field@16)--> 4@1000 [size=16, length=-1]
+3@1001 --(field@20)--> 5@1002 [size=32, length=-1]
+4@1000 --(class)--> 1000@0 [size=123, length=-1]
+5@1002 --(class)--> 1002@0 [size=123, length=-1]
+5@1002 --(field@24)--> 6@1000 [size=16, length=-1]
+5@1002 --(field@28)--> 1@1000 [size=16, length=-1]
+6@1000 --(class)--> 1000@0 [size=123, length=-1]
+---
+root@root --(jni-global)--> 1@1000 [size=16, length=-1]
+root@root --(jni-local[id=1,tag=3000,depth=0,method=followReferences])--> 1@1000 [size=16, length=-1]
+root@root --(stack-local)--> 1@1000 [size=16, length=-1]
+root@root --(thread)--> 1@1000 [size=16, length=-1]
+root@root --(thread)--> 3000@0 [size=132, length=-1]
+1001@0 --(superclass)--> 1000@0 [size=123, length=-1]
+1002@0 --(interface)--> 2001@0 [size=132, length=-1]
+1002@0 --(superclass)--> 1001@0 [size=123, length=-1]
+1@1000 --(class)--> 1000@0 [size=123, length=-1]
+1@1000 --(field@12)--> 3@1001 [size=24, length=-1]
+1@1000 --(field@8)--> 2@1000 [size=16, length=-1]
+2001@0 --(interface)--> 2000@0 [size=132, length=-1]
+2@1000 --(class)--> 1000@0 [size=123, length=-1]
+3@1001 --(class)--> 1001@0 [size=123, length=-1]
+3@1001 --(field@16)--> 4@1000 [size=16, length=-1]
+3@1001 --(field@20)--> 5@1002 [size=32, length=-1]
+4@1000 --(class)--> 1000@0 [size=123, length=-1]
+5@1002 --(class)--> 1002@0 [size=123, length=-1]
+5@1002 --(field@24)--> 6@1000 [size=16, length=-1]
+5@1002 --(field@28)--> 1@1000 [size=16, length=-1]
+6@1000 --(class)--> 1000@0 [size=123, length=-1]
+---
+root@root --(jni-global)--> 1@1000 [size=16, length=-1]
+root@root --(jni-local[id=1,tag=3000,depth=0,method=followReferences])--> 1@1000 [size=16, length=-1]
+root@root --(stack-local)--> 1@1000 [size=16, length=-1]
+root@root --(stack-local)--> 2@1000 [size=16, length=-1]
+root@root --(thread)--> 1@1000 [size=16, length=-1]
+root@root --(thread)--> 2@1000 [size=16, length=-1]
+root@root --(thread)--> 3000@0 [size=132, length=-1]
+1001@0 --(superclass)--> 1000@0 [size=123, length=-1]
+1002@0 --(interface)--> 2001@0 [size=132, length=-1]
+1002@0 --(superclass)--> 1001@0 [size=123, length=-1]
+1@1000 --(class)--> 1000@0 [size=123, length=-1]
+1@1000 --(field@12)--> 3@1001 [size=24, length=-1]
+1@1000 --(field@8)--> 2@1000 [size=16, length=-1]
+2001@0 --(interface)--> 2000@0 [size=132, length=-1]
+2@1000 --(class)--> 1000@0 [size=123, length=-1]
+3@1001 --(class)--> 1001@0 [size=123, length=-1]
+3@1001 --(field@16)--> 4@1000 [size=16, length=-1]
+3@1001 --(field@20)--> 5@1002 [size=32, length=-1]
+4@1000 --(class)--> 1000@0 [size=123, length=-1]
+5@1002 --(class)--> 1002@0 [size=123, length=-1]
+5@1002 --(field@24)--> 6@1000 [size=16, length=-1]
+5@1002 --(field@28)--> 1@1000 [size=16, length=-1]
+6@1000 --(class)--> 1000@0 [size=123, length=-1]
+---
diff --git a/test/913-heaps/heaps.cc b/test/913-heaps/heaps.cc
new file mode 100644
index 0000000..340671d
--- /dev/null
+++ b/test/913-heaps/heaps.cc
@@ -0,0 +1,469 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "heaps.h"
+
+#include <inttypes.h>
+#include <stdio.h>
+#include <string.h>
+
+#include <vector>
+
+#include "base/logging.h"
+#include "base/macros.h"
+#include "base/stringprintf.h"
+#include "jit/jit.h"
+#include "jni.h"
+#include "native_stack_dump.h"
+#include "openjdkjvmti/jvmti.h"
+#include "runtime.h"
+#include "thread-inl.h"
+#include "thread_list.h"
+
+#include "ti-agent/common_helper.h"
+#include "ti-agent/common_load.h"
+
+namespace art {
+namespace Test913Heaps {
+
+extern "C" JNIEXPORT void JNICALL Java_Main_forceGarbageCollection(JNIEnv* env ATTRIBUTE_UNUSED,
+                                                                   jclass klass ATTRIBUTE_UNUSED) {
+  jvmtiError ret = jvmti_env->ForceGarbageCollection();
+  if (ret != JVMTI_ERROR_NONE) {
+    char* err;
+    jvmti_env->GetErrorName(ret, &err);
+    printf("Error forcing a garbage collection: %s\n", err);
+  }
+}
+
+class IterationConfig {
+ public:
+  IterationConfig() {}
+  virtual ~IterationConfig() {}
+
+  virtual jint Handle(jvmtiHeapReferenceKind reference_kind,
+                      const jvmtiHeapReferenceInfo* reference_info,
+                      jlong class_tag,
+                      jlong referrer_class_tag,
+                      jlong size,
+                      jlong* tag_ptr,
+                      jlong* referrer_tag_ptr,
+                      jint length,
+                      void* user_data) = 0;
+};
+
+static jint JNICALL HeapReferenceCallback(jvmtiHeapReferenceKind reference_kind,
+                                          const jvmtiHeapReferenceInfo* reference_info,
+                                          jlong class_tag,
+                                          jlong referrer_class_tag,
+                                          jlong size,
+                                          jlong* tag_ptr,
+                                          jlong* referrer_tag_ptr,
+                                          jint length,
+                                          void* user_data) {
+  IterationConfig* config = reinterpret_cast<IterationConfig*>(user_data);
+  return config->Handle(reference_kind,
+                        reference_info,
+                        class_tag,
+                        referrer_class_tag,
+                        size,
+                        tag_ptr,
+                        referrer_tag_ptr,
+                        length,
+                        user_data);
+}
+
+static bool Run(jint heap_filter,
+                jclass klass_filter,
+                jobject initial_object,
+                IterationConfig* config) {
+  jvmtiHeapCallbacks callbacks;
+  memset(&callbacks, 0, sizeof(jvmtiHeapCallbacks));
+  callbacks.heap_reference_callback = HeapReferenceCallback;
+
+  jvmtiError ret = jvmti_env->FollowReferences(heap_filter,
+                                               klass_filter,
+                                               initial_object,
+                                               &callbacks,
+                                               config);
+  if (ret != JVMTI_ERROR_NONE) {
+    char* err;
+    jvmti_env->GetErrorName(ret, &err);
+    printf("Failure running FollowReferences: %s\n", err);
+    return false;
+  }
+  return true;
+}
+
+extern "C" JNIEXPORT jobjectArray JNICALL Java_Main_followReferences(JNIEnv* env,
+                                                                     jclass klass ATTRIBUTE_UNUSED,
+                                                                     jint heap_filter,
+                                                                     jclass klass_filter,
+                                                                     jobject initial_object,
+                                                                     jint stop_after,
+                                                                     jint follow_set,
+                                                                     jobject jniRef) {
+  class PrintIterationConfig FINAL : public IterationConfig {
+   public:
+    PrintIterationConfig(jint _stop_after, jint _follow_set)
+        : counter_(0),
+          stop_after_(_stop_after),
+          follow_set_(_follow_set) {
+    }
+
+    jint Handle(jvmtiHeapReferenceKind reference_kind,
+                const jvmtiHeapReferenceInfo* reference_info,
+                jlong class_tag,
+                jlong referrer_class_tag,
+                jlong size,
+                jlong* tag_ptr,
+                jlong* referrer_tag_ptr,
+                jint length,
+                void* user_data ATTRIBUTE_UNUSED) OVERRIDE {
+      jlong tag = *tag_ptr;
+      // Only check tagged objects.
+      if (tag == 0) {
+        return JVMTI_VISIT_OBJECTS;
+      }
+
+      Print(reference_kind,
+            reference_info,
+            class_tag,
+            referrer_class_tag,
+            size,
+            tag_ptr,
+            referrer_tag_ptr,
+            length);
+
+      counter_++;
+      if (counter_ == stop_after_) {
+        return JVMTI_VISIT_ABORT;
+      }
+
+      if (tag > 0 && tag < 32) {
+        bool should_visit_references = (follow_set_ & (1 << static_cast<int32_t>(tag))) != 0;
+        return should_visit_references ? JVMTI_VISIT_OBJECTS : 0;
+      }
+
+      return JVMTI_VISIT_OBJECTS;
+    }
+
+    void Print(jvmtiHeapReferenceKind reference_kind,
+               const jvmtiHeapReferenceInfo* reference_info,
+               jlong class_tag,
+               jlong referrer_class_tag,
+               jlong size,
+               jlong* tag_ptr,
+               jlong* referrer_tag_ptr,
+               jint length) {
+      std::string referrer_str;
+      if (referrer_tag_ptr == nullptr) {
+        referrer_str = "root@root";
+      } else {
+        referrer_str = StringPrintf("%" PRId64 "@%" PRId64, *referrer_tag_ptr, referrer_class_tag);
+      }
+
+      jlong adapted_size = size;
+      if (*tag_ptr >= 1000) {
+        // This is a class or interface, the size of which will be dependent on the architecture.
+        // Do not print the size, but detect known values and "normalize" for the golden file.
+        if ((sizeof(void*) == 4 && size == 180) || (sizeof(void*) == 8 && size == 232)) {
+          adapted_size = 123;
+        }
+      }
+
+      std::string referree_str = StringPrintf("%" PRId64 "@%" PRId64, *tag_ptr, class_tag);
+
+      lines_.push_back(CreateElem(referrer_str,
+                                  referree_str,
+                                  reference_kind,
+                                  reference_info,
+                                  adapted_size,
+                                  length));
+
+      if (reference_kind == JVMTI_HEAP_REFERENCE_THREAD && *tag_ptr == 1000) {
+        DumpStacks();
+      }
+    }
+
+    std::vector<std::string> GetLines() const {
+      std::vector<std::string> ret;
+      for (const std::unique_ptr<Elem>& e : lines_) {
+        ret.push_back(e->Print());
+      }
+      return ret;
+    }
+
+   private:
+    // We need to postpone some printing, as required functions are not callback-safe.
+    class Elem {
+     public:
+      Elem(const std::string& referrer, const std::string& referree, jlong size, jint length)
+          : referrer_(referrer), referree_(referree), size_(size), length_(length) {}
+      virtual ~Elem() {}
+
+      std::string Print() const {
+        return StringPrintf("%s --(%s)--> %s [size=%" PRId64 ", length=%d]",
+                            referrer_.c_str(),
+                            PrintArrowType().c_str(),
+                            referree_.c_str(),
+                            size_,
+                            length_);
+      }
+
+     protected:
+      virtual std::string PrintArrowType() const = 0;
+
+     private:
+      std::string referrer_;
+      std::string referree_;
+      jlong size_;
+      jint length_;
+    };
+
+    class JNILocalElement : public Elem {
+     public:
+      JNILocalElement(const std::string& referrer,
+                      const std::string& referree,
+                      jlong size,
+                      jint length,
+                      const jvmtiHeapReferenceInfo* reference_info)
+          : Elem(referrer, referree, size, length) {
+        memcpy(&info_, reference_info, sizeof(jvmtiHeapReferenceInfo));
+      }
+
+     protected:
+      std::string PrintArrowType() const OVERRIDE {
+        char* name = nullptr;
+        if (info_.jni_local.method != nullptr) {
+          jvmti_env->GetMethodName(info_.jni_local.method, &name, nullptr, nullptr);
+        }
+        std::string ret = StringPrintf("jni-local[id=%" PRId64 ",tag=%" PRId64 ",depth=%d,"
+                                       "method=%s]",
+                                       info_.jni_local.thread_id,
+                                       info_.jni_local.thread_tag,
+                                       info_.jni_local.depth,
+                                       name == nullptr ? "<null>" : name);
+        if (name != nullptr) {
+          jvmti_env->Deallocate(reinterpret_cast<unsigned char*>(name));
+        }
+
+        return ret;
+      }
+
+     private:
+      const std::string string_;
+      jvmtiHeapReferenceInfo info_;
+    };
+
+    // For simple or unimplemented cases.
+    class StringElement : public Elem {
+     public:
+      StringElement(const std::string& referrer,
+                   const std::string& referree,
+                   jlong size,
+                   jint length,
+                   const std::string& string)
+          : Elem(referrer, referree, size, length), string_(string) {}
+
+     protected:
+      std::string PrintArrowType() const OVERRIDE {
+        return string_;
+      }
+
+     private:
+      const std::string string_;
+    };
+
+    static std::unique_ptr<Elem> CreateElem(const std::string& referrer,
+                                            const std::string& referree,
+                                            jvmtiHeapReferenceKind reference_kind,
+                                            const jvmtiHeapReferenceInfo* reference_info,
+                                            jlong size,
+                                            jint length) {
+      switch (reference_kind) {
+        case JVMTI_HEAP_REFERENCE_CLASS:
+          return std::unique_ptr<Elem>(new StringElement(referrer,
+                                                         referree,
+                                                         size,
+                                                         length,
+                                                         "class"));
+        case JVMTI_HEAP_REFERENCE_FIELD: {
+          std::string tmp = StringPrintf("field@%d", reference_info->field.index);
+          return std::unique_ptr<Elem>(new StringElement(referrer,
+                                                        referree,
+                                                        size,
+                                                        length,
+                                                        tmp));
+        }
+        case JVMTI_HEAP_REFERENCE_ARRAY_ELEMENT: {
+          std::string tmp = StringPrintf("array-element@%d", reference_info->array.index);
+          return std::unique_ptr<Elem>(new StringElement(referrer,
+                                                         referree,
+                                                         size,
+                                                         length,
+                                                         tmp));
+        }
+        case JVMTI_HEAP_REFERENCE_CLASS_LOADER:
+          return std::unique_ptr<Elem>(new StringElement(referrer,
+                                                         referree,
+                                                         size,
+                                                         length,
+                                                         "classloader"));
+        case JVMTI_HEAP_REFERENCE_SIGNERS:
+          return std::unique_ptr<Elem>(new StringElement(referrer,
+                                                         referree,
+                                                         size,
+                                                         length,
+                                                         "signers"));
+        case JVMTI_HEAP_REFERENCE_PROTECTION_DOMAIN:
+          return std::unique_ptr<Elem>(new StringElement(referrer,
+                                                         referree,
+                                                         size,
+                                                         length,
+                                                         "protection-domain"));
+        case JVMTI_HEAP_REFERENCE_INTERFACE:
+          return std::unique_ptr<Elem>(new StringElement(referrer,
+                                                         referree,
+                                                         size,
+                                                         length,
+                                                         "interface"));
+        case JVMTI_HEAP_REFERENCE_STATIC_FIELD: {
+          std::string tmp = StringPrintf("array-element@%d", reference_info->array.index);
+          return std::unique_ptr<Elem>(new StringElement(referrer,
+                                                         referree,
+                                                         size,
+                                                         length,
+                                                         tmp));;
+        }
+        case JVMTI_HEAP_REFERENCE_CONSTANT_POOL:
+          return std::unique_ptr<Elem>(new StringElement(referrer,
+                                                         referree,
+                                                         size,
+                                                         length,
+                                                         "constant-pool"));
+        case JVMTI_HEAP_REFERENCE_SUPERCLASS:
+          return std::unique_ptr<Elem>(new StringElement(referrer,
+                                                         referree,
+                                                         size,
+                                                         length,
+                                                         "superclass"));
+        case JVMTI_HEAP_REFERENCE_JNI_GLOBAL:
+          return std::unique_ptr<Elem>(new StringElement(referrer,
+                                                         referree,
+                                                         size,
+                                                         length,
+                                                         "jni-global"));
+        case JVMTI_HEAP_REFERENCE_SYSTEM_CLASS:
+          return std::unique_ptr<Elem>(new StringElement(referrer,
+                                                         referree,
+                                                         size,
+                                                         length,
+                                                         "system-class"));
+        case JVMTI_HEAP_REFERENCE_MONITOR:
+          return std::unique_ptr<Elem>(new StringElement(referrer,
+                                                         referree,
+                                                         size,
+                                                         length,
+                                                         "monitor"));
+        case JVMTI_HEAP_REFERENCE_STACK_LOCAL:
+          return std::unique_ptr<Elem>(new StringElement(referrer,
+                                                         referree,
+                                                         size,
+                                                         length,
+                                                         "stack-local"));
+        case JVMTI_HEAP_REFERENCE_JNI_LOCAL:
+          return std::unique_ptr<Elem>(new JNILocalElement(referrer,
+                                                           referree,
+                                                           size,
+                                                           length,
+                                                           reference_info));
+        case JVMTI_HEAP_REFERENCE_THREAD:
+          return std::unique_ptr<Elem>(new StringElement(referrer,
+                                                         referree,
+                                                         size,
+                                                         length,
+                                                         "thread"));
+        case JVMTI_HEAP_REFERENCE_OTHER:
+          return std::unique_ptr<Elem>(new StringElement(referrer,
+                                                         referree,
+                                                         size,
+                                                         length,
+                                                         "other"));
+      }
+      LOG(FATAL) << "Unknown kind";
+      UNREACHABLE();
+    }
+
+    static void DumpStacks() NO_THREAD_SAFETY_ANALYSIS {
+      auto dump_function = [](art::Thread* t, void* data ATTRIBUTE_UNUSED) {
+        std::string name;
+        t->GetThreadName(name);
+        LOG(ERROR) << name;
+        art::DumpNativeStack(LOG_STREAM(ERROR), t->GetTid());
+      };
+      art::Runtime::Current()->GetThreadList()->ForEach(dump_function, nullptr);
+    }
+
+    jint counter_;
+    const jint stop_after_;
+    const jint follow_set_;
+
+    std::vector<std::unique_ptr<Elem>> lines_;
+  };
+
+  jit::ScopedJitSuspend sjs;  // Wait to avoid JIT influence (e.g., JNI globals).
+
+  // If jniRef isn't null, add a local and a global ref.
+  ScopedLocalRef<jobject> jni_local_ref(env, nullptr);
+  jobject jni_global_ref = nullptr;
+  if (jniRef != nullptr) {
+    jni_local_ref.reset(env->NewLocalRef(jniRef));
+    jni_global_ref = env->NewGlobalRef(jniRef);
+  }
+
+  PrintIterationConfig config(stop_after, follow_set);
+  Run(heap_filter, klass_filter, initial_object, &config);
+
+  std::vector<std::string> lines = config.GetLines();
+  jobjectArray ret = CreateObjectArray(env,
+                                       static_cast<jint>(lines.size()),
+                                       "java/lang/String",
+                                       [&](jint i) {
+                                         return env->NewStringUTF(lines[i].c_str());
+                                       });
+
+  if (jni_global_ref != nullptr) {
+    env->DeleteGlobalRef(jni_global_ref);
+  }
+
+  return ret;
+}
+
+// Don't do anything
+jint OnLoad(JavaVM* vm,
+            char* options ATTRIBUTE_UNUSED,
+            void* reserved ATTRIBUTE_UNUSED) {
+  if (vm->GetEnv(reinterpret_cast<void**>(&jvmti_env), JVMTI_VERSION_1_0)) {
+    printf("Unable to get jvmti env!\n");
+    return 1;
+  }
+  SetAllCapabilities(jvmti_env);
+  return 0;
+}
+
+}  // namespace Test913Heaps
+}  // namespace art
diff --git a/test/562-no-intermediate/src/Main.java b/test/913-heaps/heaps.h
similarity index 68%
copy from test/562-no-intermediate/src/Main.java
copy to test/913-heaps/heaps.h
index 3b74d6f..bd828ac 100644
--- a/test/562-no-intermediate/src/Main.java
+++ b/test/913-heaps/heaps.h
@@ -14,14 +14,17 @@
  * limitations under the License.
  */
 
-public class Main {
+#ifndef ART_TEST_913_HEAPS_HEAPS_H_
+#define ART_TEST_913_HEAPS_HEAPS_H_
 
-  /// CHECK-START-ARM64: int Main.main(String[]) register_allocator (after)
-  /// CHECK-NOT: IntermediateAddress
-  public static void main(String[] args) {
-    array[index] += Math.cos(42);
-  }
+#include <jni.h>
 
-  static int index = 0;
-  static double[] array = new double[2];
-}
+namespace art {
+namespace Test913Heaps {
+
+jint OnLoad(JavaVM* vm, char* options, void* reserved);
+
+}  // namespace Test913Heaps
+}  // namespace art
+
+#endif  // ART_TEST_913_HEAPS_HEAPS_H_
diff --git a/test/913-heaps/info.txt b/test/913-heaps/info.txt
new file mode 100644
index 0000000..875a5f6
--- /dev/null
+++ b/test/913-heaps/info.txt
@@ -0,0 +1 @@
+Tests basic functions in the jvmti plugin.
diff --git a/test/913-heaps/run b/test/913-heaps/run
new file mode 100755
index 0000000..7bd8cbd
--- /dev/null
+++ b/test/913-heaps/run
@@ -0,0 +1,43 @@
+#!/bin/bash
+#
+# Copyright 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+plugin=libopenjdkjvmtid.so
+agent=libtiagentd.so
+lib=tiagentd
+if  [[ "$@" == *"-O"* ]]; then
+  agent=libtiagent.so
+  plugin=libopenjdkjvmti.so
+  lib=tiagent
+fi
+
+if [[ "$@" == *"--jvm"* ]]; then
+  arg="jvm"
+else
+  arg="art"
+fi
+
+if [[ "$@" != *"--debuggable"* ]]; then
+  other_args=" -Xcompiler-option --debuggable "
+else
+  other_args=""
+fi
+
+./default-run "$@" --experimental agents \
+                   --experimental runtime-plugins \
+                   --runtime-option -agentpath:${agent}=913-heaps,${arg} \
+                   --android-runtime-option -Xplugin:${plugin} \
+                   ${other_args} \
+                   --args ${lib}
diff --git a/test/913-heaps/src/Main.java b/test/913-heaps/src/Main.java
new file mode 100644
index 0000000..a6ace9a
--- /dev/null
+++ b/test/913-heaps/src/Main.java
@@ -0,0 +1,389 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+
+public class Main {
+  public static void main(String[] args) throws Exception {
+    System.loadLibrary(args[1]);
+
+    doTest();
+    doFollowReferencesTest();
+  }
+
+  public static void doTest() throws Exception {
+    setupGcCallback();
+
+    enableGcTracking(true);
+    run();
+    enableGcTracking(false);
+  }
+
+  private static void run() {
+    clearStats();
+    forceGarbageCollection();
+    printStats();
+  }
+
+  private static void clearStats() {
+    getGcStarts();
+    getGcFinishes();
+  }
+
+  private static void printStats() {
+    System.out.println("---");
+    int s = getGcStarts();
+    int f = getGcFinishes();
+    System.out.println((s > 0) + " " + (f > 0));
+  }
+
+  public static void doFollowReferencesTest() throws Exception {
+    // Force GCs to clean up dirt.
+    Runtime.getRuntime().gc();
+    Runtime.getRuntime().gc();
+
+    setTag(Thread.currentThread(), 3000);
+
+    {
+      ArrayList<Object> tmpStorage = new ArrayList<>();
+      doFollowReferencesTestNonRoot(tmpStorage);
+      tmpStorage = null;
+    }
+
+    // Force GCs to clean up dirt.
+    Runtime.getRuntime().gc();
+    Runtime.getRuntime().gc();
+
+    doFollowReferencesTestRoot();
+
+    // Force GCs to clean up dirt.
+    Runtime.getRuntime().gc();
+    Runtime.getRuntime().gc();
+  }
+
+  private static void doFollowReferencesTestNonRoot(ArrayList<Object> tmpStorage) {
+    Verifier v = new Verifier();
+    tagClasses(v);
+    A a = createTree(v);
+    tmpStorage.add(a);
+    v.add("0@0", "1@1000");  // tmpStorage[0] --(array-element)--> a.
+
+    doFollowReferencesTestImpl(null, Integer.MAX_VALUE, -1, null, v, null);
+    doFollowReferencesTestImpl(a.foo, Integer.MAX_VALUE, -1, null, v, "2@1000");
+
+    tmpStorage.clear();
+  }
+
+  private static void doFollowReferencesTestRoot() {
+    Verifier v = new Verifier();
+    tagClasses(v);
+    A a = createTree(v);
+
+    doFollowReferencesTestImpl(null, Integer.MAX_VALUE, -1, a, v, null);
+    doFollowReferencesTestImpl(a.foo, Integer.MAX_VALUE, -1, a, v, "2@1000");
+  }
+
+  private static void doFollowReferencesTestImpl(A root, int stopAfter, int followSet,
+      Object asRoot, Verifier v, String additionalEnabled) {
+    String[] lines =
+        followReferences(0, null, root, stopAfter, followSet, asRoot);
+
+    v.process(lines, additionalEnabled);
+
+    // TODO: Test filters.
+  }
+
+  private static void tagClasses(Verifier v) {
+    setTag(A.class, 1000);
+
+    setTag(B.class, 1001);
+    v.add("1001@0", "1000@0");  // B.class --(superclass)--> A.class.
+
+    setTag(C.class, 1002);
+    v.add("1002@0", "1001@0");  // C.class --(superclass)--> B.class.
+    v.add("1002@0", "2001@0");  // C.class --(interface)--> I2.class.
+
+    setTag(I1.class, 2000);
+
+    setTag(I2.class, 2001);
+    v.add("2001@0", "2000@0");  // I2.class --(interface)--> I1.class.
+  }
+
+  private static A createTree(Verifier v) {
+    A aInst = new A();
+    setTag(aInst, 1);
+    String aInstStr = "1@1000";
+    String aClassStr = "1000@0";
+    v.add(aInstStr, aClassStr);  // A -->(class) --> A.class.
+
+    A a2Inst = new A();
+    setTag(a2Inst, 2);
+    aInst.foo = a2Inst;
+    String a2InstStr = "2@1000";
+    v.add(a2InstStr, aClassStr);  // A2 -->(class) --> A.class.
+    v.add(aInstStr, a2InstStr);   // A -->(field) --> A2.
+
+    B bInst = new B();
+    setTag(bInst, 3);
+    aInst.foo2 = bInst;
+    String bInstStr = "3@1001";
+    String bClassStr = "1001@0";
+    v.add(bInstStr, bClassStr);  // B -->(class) --> B.class.
+    v.add(aInstStr, bInstStr);   // A -->(field) --> B.
+
+    A a3Inst = new A();
+    setTag(a3Inst, 4);
+    bInst.bar = a3Inst;
+    String a3InstStr = "4@1000";
+    v.add(a3InstStr, aClassStr);  // A3 -->(class) --> A.class.
+    v.add(bInstStr, a3InstStr);   // B -->(field) --> A3.
+
+    C cInst = new C();
+    setTag(cInst, 5);
+    bInst.bar2 = cInst;
+    String cInstStr = "5@1000";
+    String cClassStr = "1002@0";
+    v.add(cInstStr, cClassStr);  // C -->(class) --> C.class.
+    v.add(bInstStr, cInstStr);   // B -->(field) --> C.
+
+    A a4Inst = new A();
+    setTag(a4Inst, 6);
+    cInst.baz = a4Inst;
+    String a4InstStr = "6@1000";
+    v.add(a4InstStr, aClassStr);  // A4 -->(class) --> A.class.
+    v.add(cInstStr, a4InstStr);   // C -->(field) --> A4.
+
+    cInst.baz2 = aInst;
+    v.add(cInstStr, aInstStr);  // C -->(field) --> A.
+
+    return aInst;
+  }
+
+  public static class A {
+    public A foo;
+    public A foo2;
+
+    public A() {}
+    public A(A a, A b) {
+      foo = a;
+      foo2 = b;
+    }
+  }
+
+  public static class B extends A {
+    public A bar;
+    public A bar2;
+
+    public B() {}
+    public B(A a, A b) {
+      bar = a;
+      bar2 = b;
+    }
+  }
+
+  public static interface I1 {
+    public final static int i1Field = 1;
+  }
+
+  public static interface I2 extends I1 {
+    public final static int i2Field = 2;
+  }
+
+  public static class C extends B implements I2 {
+    public A baz;
+    public A baz2;
+
+    public C() {}
+    public C(A a, A b) {
+      baz = a;
+      baz2 = b;
+    }
+  }
+
+  public static class Verifier {
+    public static class Node {
+      public String referrer;
+
+      public HashSet<String> referrees = new HashSet<>();
+
+      public Node(String r) {
+        referrer = r;
+      }
+
+      public boolean isRoot() {
+        return referrer.startsWith("root@");
+      }
+    }
+
+    HashMap<String, Node> nodes = new HashMap<>();
+
+    public Verifier() {
+    }
+
+    public void add(String referrer, String referree) {
+      if (!nodes.containsKey(referrer)) {
+        nodes.put(referrer, new Node(referrer));
+      }
+      if (referree != null) {
+        nodes.get(referrer).referrees.add(referree);
+      }
+    }
+
+    public void process(String[] lines, String additionalEnabledReferrer) {
+      // This method isn't optimal. The loops could be merged. However, it's more readable if
+      // the different parts are separated.
+
+      ArrayList<String> rootLines = new ArrayList<>();
+      ArrayList<String> nonRootLines = new ArrayList<>();
+
+      // Check for consecutive chunks of referrers. Also ensure roots come first.
+      {
+        String currentHead = null;
+        boolean rootsDone = false;
+        HashSet<String> completedReferrers = new HashSet<>();
+        for (String l : lines) {
+          String referrer = getReferrer(l);
+
+          if (isRoot(referrer)) {
+            if (rootsDone) {
+              System.out.println("ERROR: Late root " + l);
+              print(lines);
+              return;
+            }
+            rootLines.add(l);
+            continue;
+          }
+
+          rootsDone = true;
+
+          if (currentHead == null) {
+            currentHead = referrer;
+          } else {
+            if (!currentHead.equals(referrer)) {
+              completedReferrers.add(currentHead);
+              currentHead = referrer;
+              if (completedReferrers.contains(referrer)) {
+                System.out.println("Non-contiguous referrer " + l);
+                print(lines);
+                return;
+              }
+            }
+          }
+          nonRootLines.add(l);
+        }
+      }
+
+      // Sort (root order is not specified) and print the roots.
+      // TODO: What about extra roots? JNI and the interpreter seem to introduce those (though it
+      //       isn't clear why a debuggable-AoT test doesn't have the same, at least for locals).
+      //       For now, swallow duplicates, and resolve once we have the metadata for the roots.
+      {
+        Collections.sort(rootLines);
+        String lastRoot = null;
+        for (String l : rootLines) {
+          if (lastRoot != null && lastRoot.equals(l)) {
+            continue;
+          }
+          lastRoot = l;
+          System.out.println(l);
+        }
+      }
+
+      // Iterate through the lines, keeping track of which referrers are visited, to ensure the
+      // order is acceptable.
+      HashSet<String> enabled = new HashSet<>();
+      if (additionalEnabledReferrer != null) {
+        enabled.add(additionalEnabledReferrer);
+      }
+      // Always add "0@0".
+      enabled.add("0@0");
+
+      for (String l : lines) {
+        String referrer = getReferrer(l);
+        String referree = getReferree(l);
+        if (isRoot(referrer)) {
+          // For a root src, just enable the referree.
+          enabled.add(referree);
+        } else {
+          // Check that the referrer is enabled (may be visited).
+          if (!enabled.contains(referrer)) {
+            System.out.println("Referrer " + referrer + " not enabled: " + l);
+            print(lines);
+            return;
+          }
+          enabled.add(referree);
+        }
+      }
+
+      // Now just sort the non-root lines and output them
+      Collections.sort(nonRootLines);
+      for (String l : nonRootLines) {
+        System.out.println(l);
+      }
+
+      System.out.println("---");
+    }
+
+    public static boolean isRoot(String ref) {
+      return ref.startsWith("root@");
+    }
+
+    private static String getReferrer(String line) {
+      int i = line.indexOf(" --");
+      if (i <= 0) {
+        throw new IllegalArgumentException(line);
+      }
+      int j = line.indexOf(' ');
+      if (i != j) {
+        throw new IllegalArgumentException(line);
+      }
+      return line.substring(0, i);
+    }
+
+    private static String getReferree(String line) {
+      int i = line.indexOf("--> ");
+      if (i <= 0) {
+        throw new IllegalArgumentException(line);
+      }
+      int j = line.indexOf(' ', i + 4);
+      if (j < 0) {
+        throw new IllegalArgumentException(line);
+      }
+      return line.substring(i + 4, j);
+    }
+
+    private static void print(String[] lines) {
+      for (String l : lines) {
+        System.out.println(l);
+      }
+    }
+  }
+
+  private static native void setupGcCallback();
+  private static native void enableGcTracking(boolean enable);
+  private static native int getGcStarts();
+  private static native int getGcFinishes();
+  private static native void forceGarbageCollection();
+
+  private static native void setTag(Object o, long tag);
+  private static native long getTag(Object o);
+
+  private static native String[] followReferences(int heapFilter, Class<?> klassFilter,
+      Object initialObject, int stopAfter, int followSet, Object jniRef);
+}
diff --git a/test/979-invoke-polymorphic-accessors/build b/test/954-invoke-polymorphic-verifier/build
old mode 100644
new mode 100755
similarity index 100%
copy from test/979-invoke-polymorphic-accessors/build
copy to test/954-invoke-polymorphic-verifier/build
diff --git a/test/103-string-append/run b/test/954-invoke-polymorphic-verifier/check
similarity index 68%
copy from test/103-string-append/run
copy to test/954-invoke-polymorphic-verifier/check
index e27a622..dc5ddb7 100755
--- a/test/103-string-append/run
+++ b/test/954-invoke-polymorphic-verifier/check
@@ -1,6 +1,6 @@
 #!/bin/bash
 #
-# Copyright (C) 2012 The Android Open Source Project
+# Copyright (C) 2014 The Android Open Source Project
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -14,5 +14,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# As this is a performance test we always use the non-debug build.
-exec ${RUN} "${@/#libartd.so/libart.so}"
+# Strip out temporary file path information and indicies from output.
+sed -e "s/ [(]declaration of.*//" -e "s/\[0x[0-9A-F]*\] //g" "$2" > "$2.tmp"
+diff --strip-trailing-cr -q "$1" "$2.tmp" >/dev/null
diff --git a/test/954-invoke-polymorphic-verifier/expected.txt b/test/954-invoke-polymorphic-verifier/expected.txt
new file mode 100644
index 0000000..5df393a
--- /dev/null
+++ b/test/954-invoke-polymorphic-verifier/expected.txt
@@ -0,0 +1,10 @@
+java.lang.VerifyError: Verifier rejected class MethodHandleNotInvoke: void MethodHandleNotInvoke.<init>() failed to verify: void MethodHandleNotInvoke.<init>(): void MethodHandleNotInvoke.<init>(): couldn't find method java.lang.invoke.MethodHandle.notInvoke ([Ljava/lang/Object;)Ljava/lang/Object;
+java.lang.VerifyError: Verifier rejected class MethodHandleToString: void MethodHandleToString.<init>() failed to verify: void MethodHandleToString.<init>(): void MethodHandleToString.<init>(): invoke type (METHOD_POLYMORPHIC) does not match method type of java.lang.String java.lang.invoke.MethodHandle.toString()
+java.lang.VerifyError: Verifier rejected class NonReference: void NonReference.<init>() failed to verify: void NonReference.<init>(): void NonReference.<init>(): tried to get class from non-reference register v0 (type=Precise Low-half Constant: 0)
+java.lang.VerifyError: Verifier rejected class TooFewArguments: void TooFewArguments.<init>() failed to verify: void TooFewArguments.<init>(): void TooFewArguments.<init>(): Rejecting invocation, expected 2 argument registers, method signature has 3 or more
+java.lang.VerifyError: Verifier rejected class TooManyArguments: void TooManyArguments.<init>() failed to verify: void TooManyArguments.<init>(): void TooManyArguments.<init>(): Rejecting invocation, expected 4 argument registers, method signature has 3
+java.lang.VerifyError: Verifier rejected class BadThis: void BadThis.<init>() failed to verify: void BadThis.<init>(): void BadThis.<init>(): 'this' argument 'Precise Reference: java.lang.String' not instance of 'Reference: java.lang.invoke.MethodHandle'
+java.lang.VerifyError: Verifier rejected class FakeSignaturePolymorphic: void FakeSignaturePolymorphic.<init>() failed to verify: void FakeSignaturePolymorphic.<init>(): void FakeSignaturePolymorphic.<init>(): invoke type (METHOD_POLYMORPHIC) does not match method type of java.lang.Object Main.invoke(java.lang.Object[])
+java.lang.VerifyError: Verifier rejected class BetterFakeSignaturePolymorphic: void BetterFakeSignaturePolymorphic.<init>() failed to verify: void BetterFakeSignaturePolymorphic.<init>(): Signature polymorphic method must be declared in java.lang.invoke.MethodClass
+Passed Subclass test
+java.lang.VerifyError: Verifier rejected class Unresolved: void Unresolved.<init>() failed to verify: void Unresolved.<init>(): invoke-polymorphic receiver has no class: Unresolved Reference: other.thing.Foo
diff --git a/test/954-invoke-polymorphic-verifier/info.txt b/test/954-invoke-polymorphic-verifier/info.txt
new file mode 100644
index 0000000..cb10d42
--- /dev/null
+++ b/test/954-invoke-polymorphic-verifier/info.txt
@@ -0,0 +1,3 @@
+Test cases that should be rejected by the method verifier.
+
+NOTE: needs to run under ART.
diff --git a/test/979-invoke-polymorphic-accessors/run b/test/954-invoke-polymorphic-verifier/run
old mode 100644
new mode 100755
similarity index 100%
copy from test/979-invoke-polymorphic-accessors/run
copy to test/954-invoke-polymorphic-verifier/run
diff --git a/test/954-invoke-polymorphic-verifier/smali/BadThis.smali b/test/954-invoke-polymorphic-verifier/smali/BadThis.smali
new file mode 100644
index 0000000..d9edf67
--- /dev/null
+++ b/test/954-invoke-polymorphic-verifier/smali/BadThis.smali
@@ -0,0 +1,30 @@
+#
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.source "BadThis.smali"
+
+.class public LBadThis;
+.super Ljava/lang/Object;
+
+.method public constructor <init>()V
+.registers 4
+  invoke-direct {p0}, Ljava/lang/Object;-><init>()V
+  const-string v0, "0"
+  const-string v1, "1"
+  const-string v2, "2"
+  # v0 is a String, not a MethodHandle.
+  invoke-polymorphic {v0, v1, v2}, Ljava/lang/invoke/MethodHandle;->invoke([Ljava/lang/Object;)Ljava/lang/Object;, (Ljava/lang/String;Ljava/lang/String;)Ljava/lang/String;
+  return-void
+.end method
\ No newline at end of file
diff --git a/test/954-invoke-polymorphic-verifier/smali/BetterFakeSignaturePolymorphic.smali b/test/954-invoke-polymorphic-verifier/smali/BetterFakeSignaturePolymorphic.smali
new file mode 100644
index 0000000..631e704
--- /dev/null
+++ b/test/954-invoke-polymorphic-verifier/smali/BetterFakeSignaturePolymorphic.smali
@@ -0,0 +1,43 @@
+#
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.source "BetterFakeSignaturePolymorphic.smali"
+
+.class public LBetterFakeSignaturePolymorphic;
+.super Ljava/lang/Object;
+
+.method public constructor <init>()V
+.registers 4
+  invoke-direct {p0}, Ljava/lang/Object;-><init>()V
+  invoke-static {}, LBetterFakeSignaturePolymorphic;->getMain()LMain;
+  move-result-object v0
+  const/4 v1, 0
+  move-object v1, v1
+  # Fail here because Main;->invokeExact is on wrong class.
+  invoke-polymorphic {v0, v1}, LMain;->invokeExact([Ljava/lang/Object;)Ljava/lang/Object;, ([Ljava/lang/Object;)Ljava/lang/Object;
+  return-void
+.end method
+
+.method public static getMethodHandle()Ljava/lang/invoke/MethodHandle;
+.registers 1
+  const/4 v0, 0
+  return-object v0
+.end method
+
+.method public static getMain()LMain;
+.registers 1
+  const/4 v0, 0
+  return-object v0
+.end method
\ No newline at end of file
diff --git a/test/954-invoke-polymorphic-verifier/smali/FakeSignaturePolymorphic.smali b/test/954-invoke-polymorphic-verifier/smali/FakeSignaturePolymorphic.smali
new file mode 100644
index 0000000..5bd054a
--- /dev/null
+++ b/test/954-invoke-polymorphic-verifier/smali/FakeSignaturePolymorphic.smali
@@ -0,0 +1,43 @@
+#
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.source "FakeSignaturePolymorphic.smali"
+
+.class public LFakeSignaturePolymorphic;
+.super Ljava/lang/Object;
+
+.method public constructor <init>()V
+.registers 4
+  invoke-direct {p0}, Ljava/lang/Object;-><init>()V
+  invoke-static {}, LFakeSignaturePolymorphic;->getMain()LMain;
+  move-result-object v0
+  const/4 v1, 0
+  move-object v1, v1
+  # Fail here because Main;->invoke does not have right flags (ie not native or varargs).
+  invoke-polymorphic {v0, v1}, LMain;->invoke([Ljava/lang/Object;)Ljava/lang/Object;, ([Ljava/lang/Object;)Ljava/lang/Object;
+  return-void
+.end method
+
+.method public static getMethodHandle()Ljava/lang/invoke/MethodHandle;
+.registers 1
+  const/4 v0, 0
+  return-object v0
+.end method
+
+.method public static getMain()LMain;
+.registers 1
+  const/4 v0, 0
+  return-object v0
+.end method
\ No newline at end of file
diff --git a/test/954-invoke-polymorphic-verifier/smali/Main.smali b/test/954-invoke-polymorphic-verifier/smali/Main.smali
new file mode 100644
index 0000000..5b5e555
--- /dev/null
+++ b/test/954-invoke-polymorphic-verifier/smali/Main.smali
@@ -0,0 +1,85 @@
+#
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# This is the test suite runner. It is written in smali rather than
+# Java pending support in dx/dxmerge for invoke-polymorphic (b/33191712).
+
+.source "Main.smali"
+
+.class public LMain;
+.super Ljava/lang/Object;
+
+.method public constructor<init>()V
+.registers 1
+  invoke-direct {v0}, Ljava/lang/Object;-><init>()V
+  return-void
+.end method
+
+.method public static main([Ljava/lang/String;)V
+.registers 1
+  # New tests should be added here.
+  const-string v0, "MethodHandleNotInvoke"
+  invoke-static {v0}, LMain;->test(Ljava/lang/String;)V
+  const-string v0, "MethodHandleToString"
+  invoke-static {v0}, LMain;->test(Ljava/lang/String;)V
+  const-string v0, "NonReference"
+  invoke-static {v0}, LMain;->test(Ljava/lang/String;)V
+  const-string v0, "TooFewArguments"
+  invoke-static {v0}, LMain;->test(Ljava/lang/String;)V
+  const-string v0, "TooManyArguments"
+  invoke-static {v0}, LMain;->test(Ljava/lang/String;)V
+  const-string v0, "BadThis"
+  invoke-static {v0}, LMain;->test(Ljava/lang/String;)V
+  const-string v0, "FakeSignaturePolymorphic"
+  invoke-static {v0}, LMain;->test(Ljava/lang/String;)V
+  const-string v0, "BetterFakeSignaturePolymorphic"
+  invoke-static {v0}, LMain;->test(Ljava/lang/String;)V
+  const-string v0, "Subclass"
+  invoke-static {v0}, LMain;->test(Ljava/lang/String;)V
+  const-string v0, "Unresolved"
+  invoke-static {v0}, LMain;->test(Ljava/lang/String;)V
+  return-void
+.end method
+
+.method public static test(Ljava/lang/String;)V
+.registers 6
+ :try_start_1
+  invoke-static {v5}, Ljava/lang/Class;->forName(Ljava/lang/String;)Ljava/lang/Class;
+  move-result-object v0
+  invoke-virtual {v0}, Ljava/lang/Class;->newInstance()Ljava/lang/Object;
+ :try_end_1
+  .catch Ljava/lang/VerifyError; {:try_start_1 .. :try_end_1} :catch_verifier
+  return-void
+ :catch_verifier
+  move-exception v3
+  invoke-virtual {v3}, Ljava/lang/Exception;->toString()Ljava/lang/String;
+  move-result-object v3
+  sget-object v2, Ljava/lang/System;->out:Ljava/io/PrintStream;
+  invoke-virtual {v2, v3}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V
+  return-void
+.end method
+
+# A test method called "invoke", but on a class other than MethodHandle.
+.method public invoke([Ljava/lang/Object;)Ljava/lang/Object;
+.registers 2
+  const/4 v0, 0
+  aget-object v0, p0, v0
+  return-object v0
+.end method
+
+# A test method called "invokeExact" that is native varargs, but is on a class
+# other than MethodHandle.
+.method public native varargs invokeExact([Ljava/lang/Object;)Ljava/lang/Object;
+.end method
\ No newline at end of file
diff --git a/test/954-invoke-polymorphic-verifier/smali/MethodHandleNotInvoke.smali b/test/954-invoke-polymorphic-verifier/smali/MethodHandleNotInvoke.smali
new file mode 100644
index 0000000..42546d1
--- /dev/null
+++ b/test/954-invoke-polymorphic-verifier/smali/MethodHandleNotInvoke.smali
@@ -0,0 +1,37 @@
+#
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.source "MethodHandleNotInvoke.smali"
+
+.class public LMethodHandleNotInvoke;
+.super Ljava/lang/Object;
+
+.method public constructor <init>()V
+.registers 4
+  invoke-direct {p0}, Ljava/lang/Object;-><init>()V
+  invoke-static {}, LMethodHandleNotInvoke;->getMethodHandle()Ljava/lang/invoke/MethodHandle;
+  move-result-object v0
+  const/4 v1, 0
+  move-object v1, v1
+  # Attempt invoke-polymorphic on MethodHandle.notInvoke().
+  invoke-polymorphic {v0, v1}, Ljava/lang/invoke/MethodHandle;->notInvoke([Ljava/lang/Object;)Ljava/lang/Object;, ([Ljava/lang/Object;)Ljava/lang/Object;
+  return-void
+.end method
+
+.method public static getMethodHandle()Ljava/lang/invoke/MethodHandle;
+.registers 1
+  const/4 v0, 0
+  return-object v0
+.end method
diff --git a/test/954-invoke-polymorphic-verifier/smali/MethodHandleToString.smali b/test/954-invoke-polymorphic-verifier/smali/MethodHandleToString.smali
new file mode 100644
index 0000000..c48429c
--- /dev/null
+++ b/test/954-invoke-polymorphic-verifier/smali/MethodHandleToString.smali
@@ -0,0 +1,35 @@
+#
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.source "MethodHandleToString.smali"
+
+.class public LMethodHandleToString;
+.super Ljava/lang/Object;
+
+.method public constructor <init>()V
+.registers 1
+  invoke-direct {p0}, Ljava/lang/Object;-><init>()V
+  invoke-static {}, LMethodHandleToString;->getMethodHandle()Ljava/lang/invoke/MethodHandle;
+  move-result-object v0
+  # Attempt invoke-polymorphic on MethodHandle.toString().
+  invoke-polymorphic {v0}, Ljava/lang/invoke/MethodHandle;->toString()Ljava/lang/String;, ()Ljava/lang/Object;
+  return-void
+.end method
+
+.method public static getMethodHandle()Ljava/lang/invoke/MethodHandle;
+.registers 1
+  const/4 v0, 0
+  return-object v0
+.end method
diff --git a/test/954-invoke-polymorphic-verifier/smali/NonReference.smali b/test/954-invoke-polymorphic-verifier/smali/NonReference.smali
new file mode 100644
index 0000000..4e1eff2
--- /dev/null
+++ b/test/954-invoke-polymorphic-verifier/smali/NonReference.smali
@@ -0,0 +1,30 @@
+#
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.source "NonReference.smali"
+
+.class public LNonReference;
+.super Ljava/lang/Object;
+
+.method public constructor <init>()V
+.registers 4
+  invoke-direct {p0}, Ljava/lang/Object;-><init>()V
+  # Set v0 to have incorrect type (not a MethodHandle) and value (not null).
+  const-wide v0, 0
+  const-string v1, "1"
+  const-string v2, "2"
+  invoke-polymorphic {v0, v1, v2}, Ljava/lang/invoke/MethodHandle;->invoke([Ljava/lang/Object;)Ljava/lang/Object;, (Ljava/lang/String;Ljava/lang/String;)Ljava/lang/String;
+  return-void
+.end method
diff --git a/test/954-invoke-polymorphic-verifier/smali/Subclass.smali b/test/954-invoke-polymorphic-verifier/smali/Subclass.smali
new file mode 100644
index 0000000..7ef61be
--- /dev/null
+++ b/test/954-invoke-polymorphic-verifier/smali/Subclass.smali
@@ -0,0 +1,45 @@
+#
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.source "Subclass.smali"
+
+.class public LSubclass;
+.super Ljava/lang/Object;
+
+.method public constructor <init>()V
+.registers 3
+  invoke-direct {p0}, Ljava/lang/Object;-><init>()V
+  goto :happy
+  # Get a MethodHandleImpl instance (subclass of MethodHandle).
+  invoke-static {}, LSubclass;->getMethodHandleSubclassInstance()Ljava/lang/invoke/MethodHandleImpl;
+  move-result-object v0
+  const-string v1, "1"
+  const-string v2, "2"
+  # Calling MethodHandle.invoke() on MethodHandleImpl instance (subclass of MethodHandle) => Okay
+  invoke-polymorphic {v0, v1, v2}, Ljava/lang/invoke/MethodHandle;->invoke([Ljava/lang/Object;)Ljava/lang/Object;, (Ljava/lang/String;Ljava/lang/String;)Ljava/lang/String;
+  # Calling MethodHandleImpl.invoke() rather than MethodHandle.invoke() [ declaring class is okay ] => Okay
+  invoke-polymorphic {v0, v1, v2}, Ljava/lang/invoke/MethodHandleImpl;->invoke([Ljava/lang/Object;)Ljava/lang/Object;, (Ljava/lang/String;Ljava/lang/String;)Ljava/lang/String;
+:happy
+  sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream;
+  const-string v2, "Passed Subclass test"
+  invoke-virtual {v1, v2}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V
+  return-void
+.end method
+
+.method public static getMethodHandleSubclassInstance()Ljava/lang/invoke/MethodHandleImpl;
+.registers 1
+  const/4 v0, 0
+  return-object v0
+.end method
diff --git a/test/954-invoke-polymorphic-verifier/smali/TooFewArguments.smali b/test/954-invoke-polymorphic-verifier/smali/TooFewArguments.smali
new file mode 100644
index 0000000..da29c6f
--- /dev/null
+++ b/test/954-invoke-polymorphic-verifier/smali/TooFewArguments.smali
@@ -0,0 +1,33 @@
+#
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.source "TooFewArguments.smali"
+
+.class public LTooFewArguments;
+.super Ljava/lang/Object;
+
+.method public constructor <init>()V
+.registers 4
+  invoke-direct {p0}, Ljava/lang/Object;-><init>()V
+  # Set up v0 as a null MethodHandle
+  const/4 v0, 0
+  move-object v0, v0
+  invoke-virtual {v0}, Ljava/lang/invoke/MethodHandle;->asFixedArity()Ljava/lang/invoke/MethodHandle;
+  move-result-object v0
+  const-string v1, "1"
+  # Invoke with one argument too few for prototype.
+  invoke-polymorphic {v0, v1}, Ljava/lang/invoke/MethodHandle;->invoke([Ljava/lang/Object;)Ljava/lang/Object;, (Ljava/lang/String;Ljava/lang/String;)Ljava/lang/String;
+  return-void
+.end method
diff --git a/test/954-invoke-polymorphic-verifier/smali/TooManyArguments.smali b/test/954-invoke-polymorphic-verifier/smali/TooManyArguments.smali
new file mode 100644
index 0000000..bc0135e
--- /dev/null
+++ b/test/954-invoke-polymorphic-verifier/smali/TooManyArguments.smali
@@ -0,0 +1,35 @@
+#
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.source "TooManyArguments.smali"
+
+.class public LTooManyArguments;
+.super Ljava/lang/Object;
+
+.method public constructor <init>()V
+.registers 4
+  invoke-direct {p0}, Ljava/lang/Object;-><init>()V
+  # Set up v0 as a null MethodHandle
+  const/4 v0, 0
+  move-object v0, v0
+  invoke-virtual {v0}, Ljava/lang/invoke/MethodHandle;->asFixedArity()Ljava/lang/invoke/MethodHandle;
+  move-result-object v0
+  const-string v1, "1"
+  const-string v2, "2"
+  const-string v3, "3"
+  # Invoke with one argument too many for prototype.
+  invoke-polymorphic {v0, v1, v2, v3}, Ljava/lang/invoke/MethodHandle;->invoke([Ljava/lang/Object;)Ljava/lang/Object;, (Ljava/lang/String;Ljava/lang/String;)Ljava/lang/String;
+  return-void
+.end method
diff --git a/test/954-invoke-polymorphic-verifier/smali/Unresolved.smali b/test/954-invoke-polymorphic-verifier/smali/Unresolved.smali
new file mode 100644
index 0000000..882f0e9
--- /dev/null
+++ b/test/954-invoke-polymorphic-verifier/smali/Unresolved.smali
@@ -0,0 +1,40 @@
+#
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.source "Unresolved.smali"
+
+.class public LUnresolved;
+.super Ljava/lang/Object;
+
+.method public constructor <init>()V
+.registers 3
+.line 23
+  invoke-direct {p0}, Ljava/lang/Object;-><init>()V
+  # Get an unresolvable instance (abstract class)
+  invoke-static {}, LAbstract;->getUnresolvedInstance()Lother/thing/Foo;
+  move-result-object v0
+  const-string v1, "1"
+  const-string v2, "2"
+  # Calling MethodHandle.invoke() on unresolved receiver.
+  invoke-polymorphic {v0, v1, v2}, Ljava/lang/invoke/MethodHandle;->invoke([Ljava/lang/Object;)Ljava/lang/Object;, (Ljava/lang/String;Ljava/lang/String;)Ljava/lang/String;
+  return-void
+.end method
+
+.method public static getUnresolvedInstance()Lother/thing/Foo;
+.registers 1
+.line 37
+  const/4 v0, 0
+  return-object v0
+.end method
diff --git a/test/955-methodhandles-smali/expected.txt b/test/955-methodhandles-smali/expected.txt
index 047a287..5de1274 100644
--- a/test/955-methodhandles-smali/expected.txt
+++ b/test/955-methodhandles-smali/expected.txt
@@ -5,4 +5,5 @@
 40
 43
 44
-0-11
+0
+-1
diff --git a/test/955-methodhandles-smali/smali/Main.smali b/test/955-methodhandles-smali/smali/Main.smali
index 9681d56..52460a8 100644
--- a/test/955-methodhandles-smali/smali/Main.smali
+++ b/test/955-methodhandles-smali/smali/Main.smali
@@ -220,24 +220,22 @@
     invoke-polymorphic {v0, v1, v1}, Ljava/lang/invoke/MethodHandle;->invoke([Ljava/lang/Object;)Ljava/lang/Object;, (Ljava/lang/Long;Ljava/lang/Long;)I
     move-result v3
     sget-object v4, Ljava/lang/System;->out:Ljava/io/PrintStream;
-    invoke-virtual {v4, v3}, Ljava/io/PrintStream;->print(I)V
+    invoke-virtual {v4, v3}, Ljava/io/PrintStream;->println(I)V
 
     # Call compareTo(long) - this is an implicit box.
     const-wide v2, 44
     invoke-polymorphic {v0, v1, v2, v3}, Ljava/lang/invoke/MethodHandle;->invoke([Ljava/lang/Object;)Ljava/lang/Object;, (Ljava/lang/Long;J)I
     move-result v3
     sget-object v4, Ljava/lang/System;->out:Ljava/io/PrintStream;
-    invoke-virtual {v4, v3}, Ljava/io/PrintStream;->print(I)V
+    invoke-virtual {v4, v3}, Ljava/io/PrintStream;->println(I)V
 
     # Call compareTo(int) - this is an implicit box.
-    const v2, 40
-    invoke-polymorphic {v0, v1, v2}, Ljava/lang/invoke/MethodHandle;->invoke([Ljava/lang/Object;)Ljava/lang/Object;, (Ljava/lang/Long;I)I
-    move-result v3
-    sget-object v4, Ljava/lang/System;->out:Ljava/io/PrintStream;
-    invoke-virtual {v4, v3}, Ljava/io/PrintStream;->print(I)V
-
-    # Add a newline at the end of file.
-    invoke-virtual {v4}, Ljava/io/PrintStream;->println()V
+# This throws WrongMethodTypeException as it's a two step conversion int->long->Long or int->Integer->Long.
+#    const v2, 40
+#    invoke-polymorphic {v0, v1, v2}, Ljava/lang/invoke/MethodHandle;->invoke([Ljava/lang/Object;)Ljava/lang/Object;, (Ljava/lang/Long;I)I
+#    move-result v3
+#    sget-object v4, Ljava/lang/System;->out:Ljava/io/PrintStream;
+#    invoke-virtual {v4, v3}, Ljava/io/PrintStream;->print(I)V
 
     return-void
 .end method
diff --git a/test/956-methodhandles/expected.txt b/test/956-methodhandles/expected.txt
index ddc1cb0..9b09327 100644
--- a/test/956-methodhandles/expected.txt
+++ b/test/956-methodhandles/expected.txt
@@ -3,3 +3,15 @@
 foo_A
 foo_B
 privateRyan_D
+Received exception: Expected (java.lang.String, java.lang.String)java.lang.String but was (java.lang.String, java.lang.Object)void
+String constructors done.
+testReferenceReturnValueConversions done.
+testPrimitiveReturnValueConversions done.
+Hi
+Hi
+Hi
+Hi
+Expect Hi here: Hi
+Don't expect Hi now
+[3, 2, 1]
+[1, 2, 3]
diff --git a/test/956-methodhandles/src/Main.java b/test/956-methodhandles/src/Main.java
index 42265a9..ee9c436 100644
--- a/test/956-methodhandles/src/Main.java
+++ b/test/956-methodhandles/src/Main.java
@@ -19,10 +19,20 @@
 import java.lang.invoke.MethodHandles.Lookup;
 import java.lang.invoke.MethodType;
 import java.lang.invoke.WrongMethodTypeException;
+import java.lang.reflect.Constructor;
+import java.lang.reflect.Field;
+import java.lang.reflect.Method;
+import java.nio.charset.Charset;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
 
 public class Main {
 
   public static class A {
+    public A() {}
+
     public void foo() {
       System.out.println("foo_A");
     }
@@ -57,6 +67,15 @@
   public static void main(String[] args) throws Throwable {
     testfindSpecial_invokeSuperBehaviour();
     testfindSpecial_invokeDirectBehaviour();
+    testExceptionDetailMessages();
+    testfindVirtual();
+    testfindStatic();
+    testUnreflects();
+    testAsType();
+    testConstructors();
+    testStringConstructors();
+    testReturnValueConversions();
+    testVariableArity();
   }
 
   public static void testfindSpecial_invokeSuperBehaviour() throws Throwable {
@@ -103,6 +122,19 @@
       System.out.println("findSpecial(A.class, foo, .. D.class) unexpectedly succeeded.");
     } catch (IllegalAccessException expected) {
     }
+
+    // Check return type matches for find.
+    try {
+      B.lookup.findSpecial(A.class /* refC */, "foo",
+                           MethodType.methodType(int.class), B.class /* specialCaller */);
+      fail();
+    } catch (NoSuchMethodException e) {}
+    // Check constructors
+    try {
+      B.lookup.findSpecial(A.class /* refC */, "<init>",
+                           MethodType.methodType(void.class), B.class /* specialCaller */);
+      fail();
+    } catch (NoSuchMethodException e) {}
   }
 
   public static void testfindSpecial_invokeDirectBehaviour() throws Throwable {
@@ -127,6 +159,1311 @@
     } catch (IllegalAccessException expected) {
     }
   }
+
+  public static void testExceptionDetailMessages() throws Throwable {
+    MethodHandle handle = MethodHandles.lookup().findVirtual(String.class, "concat",
+        MethodType.methodType(String.class, String.class));
+
+    try {
+      handle.invokeExact("a", new Object());
+      System.out.println("invokeExact(\"a\", new Object()) unexpectedly succeeded.");
+    } catch (WrongMethodTypeException ex) {
+      System.out.println("Received exception: " + ex.getMessage());
+    }
+  }
+
+  public interface Foo {
+    public String foo();
+  }
+
+  public interface Bar extends Foo {
+    public String bar();
+  }
+
+  public static class BarSuper {
+    public String superPublicMethod() {
+      return "superPublicMethod";
+    }
+
+    public String superProtectedMethod() {
+      return "superProtectedMethod";
+    }
+
+    String superPackageMethod() {
+      return "superPackageMethod";
+    }
+  }
+
+  public static class BarImpl extends BarSuper implements Bar {
+    public BarImpl() {
+    }
+
+    @Override
+    public String foo() {
+      return "foo";
+    }
+
+    @Override
+    public String bar() {
+      return "bar";
+    }
+
+    public String add(int x, int y) {
+      return Arrays.toString(new int[] { x, y });
+    }
+
+    private String privateMethod() { return "privateMethod"; }
+
+    public static String staticMethod() { return staticString; }
+
+    private static String staticString;
+
+    {
+      // Static constructor
+      staticString = Long.toString(System.currentTimeMillis());
+    }
+
+    static final MethodHandles.Lookup lookup = MethodHandles.lookup();
+  }
+
+  public static void testfindVirtual() throws Throwable {
+    // Virtual lookups on static methods should not succeed.
+    try {
+        MethodHandles.lookup().findVirtual(
+            BarImpl.class,  "staticMethod", MethodType.methodType(String.class));
+        System.out.println("findVirtual(staticMethod) unexpectedly succeeded");
+    } catch (IllegalAccessException expected) {
+    }
+
+    // Virtual lookups on private methods should not succeed, unless the Lookup
+    // context had sufficient privileges.
+    try {
+        MethodHandles.lookup().findVirtual(
+            BarImpl.class,  "privateMethod", MethodType.methodType(String.class));
+        System.out.println("findVirtual(privateMethod) unexpectedly succeeded");
+    } catch (IllegalAccessException expected) {
+    }
+
+    // Virtual lookup on a private method with a context that *does* have sufficient
+    // privileges.
+    MethodHandle mh = BarImpl.lookup.findVirtual(
+            BarImpl.class,  "privateMethod", MethodType.methodType(String.class));
+    String str = (String) mh.invoke(new BarImpl());
+    if (!"privateMethod".equals(str)) {
+      System.out.println("Unexpected return value for BarImpl#privateMethod: " + str);
+    }
+
+    // Find virtual must find interface methods defined by interfaces implemented
+    // by the class.
+    mh = MethodHandles.lookup().findVirtual(BarImpl.class, "foo",
+        MethodType.methodType(String.class));
+    str = (String) mh.invoke(new BarImpl());
+    if (!"foo".equals(str)) {
+      System.out.println("Unexpected return value for BarImpl#foo: " + str);
+    }
+
+    // Find virtual should check rtype.
+    try {
+      mh = MethodHandles.lookup().findVirtual(BarImpl.class, "foo",
+                                              MethodType.methodType(void.class));
+      fail();
+    } catch (NoSuchMethodException e) {}
+
+    // And ptypes
+    mh = MethodHandles.lookup().findVirtual(
+        BarImpl.class, "add", MethodType.methodType(String.class, int.class, int.class));
+    try {
+      mh = MethodHandles.lookup().findVirtual(
+          BarImpl.class, "add", MethodType.methodType(String.class, Integer.class, int.class));
+    } catch (NoSuchMethodException e) {}
+
+    // .. and their super-interfaces.
+    mh = MethodHandles.lookup().findVirtual(BarImpl.class, "bar",
+        MethodType.methodType(String.class));
+    str = (String) mh.invoke(new BarImpl());
+    if (!"bar".equals(str)) {
+      System.out.println("Unexpected return value for BarImpl#bar: " + str);
+    }
+
+    // TODO(narayan): Fix this case, we're using the wrong ArtMethod for the
+    // invoke resulting in a failing check in the interpreter.
+    //
+    // mh = MethodHandles.lookup().findVirtual(Bar.class, "bar",
+    //    MethodType.methodType(String.class));
+    // str = (String) mh.invoke(new BarImpl());
+    // if (!"bar".equals(str)) {
+    //   System.out.println("Unexpected return value for BarImpl#bar: " + str);
+    // }
+
+    // We should also be able to lookup public / protected / package methods in
+    // the super class, given sufficient access privileges.
+    mh = MethodHandles.lookup().findVirtual(BarImpl.class, "superPublicMethod",
+        MethodType.methodType(String.class));
+    str = (String) mh.invoke(new BarImpl());
+    if (!"superPublicMethod".equals(str)) {
+      System.out.println("Unexpected return value for BarImpl#superPublicMethod: " + str);
+    }
+
+    mh = MethodHandles.lookup().findVirtual(BarImpl.class, "superProtectedMethod",
+        MethodType.methodType(String.class));
+    str = (String) mh.invoke(new BarImpl());
+    if (!"superProtectedMethod".equals(str)) {
+      System.out.println("Unexpected return value for BarImpl#superProtectedMethod: " + str);
+    }
+
+    mh = MethodHandles.lookup().findVirtual(BarImpl.class, "superPackageMethod",
+        MethodType.methodType(String.class));
+    str = (String) mh.invoke(new BarImpl());
+    if (!"superPackageMethod".equals(str)) {
+      System.out.println("Unexpected return value for BarImpl#superPackageMethod: " + str);
+    }
+
+    try {
+      MethodHandles.lookup().findVirtual(BarImpl.class, "<init>",
+                                        MethodType.methodType(void.class));
+      fail();
+    } catch (NoSuchMethodException e) {}
+  }
+
+  public static void testfindStatic() throws Throwable {
+    MethodHandles.lookup().findStatic(BarImpl.class, "staticMethod",
+                                      MethodType.methodType(String.class));
+    try {
+      MethodHandles.lookup().findStatic(BarImpl.class, "staticMethod",
+                                        MethodType.methodType(void.class));
+      fail();
+    } catch (NoSuchMethodException e) {}
+    try {
+      MethodHandles.lookup().findStatic(BarImpl.class, "staticMethod",
+                                        MethodType.methodType(String.class, int.class));
+      fail();
+    } catch (NoSuchMethodException e) {}
+    try {
+      MethodHandles.lookup().findStatic(BarImpl.class, "<clinit>",
+                                        MethodType.methodType(void.class));
+      fail();
+    } catch (NoSuchMethodException e) {}
+    try {
+      MethodHandles.lookup().findStatic(BarImpl.class, "<init>",
+                                        MethodType.methodType(void.class));
+      fail();
+    } catch (NoSuchMethodException e) {}
+  }
+
+  static class UnreflectTester {
+    public String publicField;
+    private String privateField;
+
+    public static String publicStaticField = "publicStaticValue";
+    private static String privateStaticField = "privateStaticValue";
+
+    private UnreflectTester(String val) {
+      publicField = val;
+      privateField = val;
+    }
+
+    // NOTE: The boolean constructor argument only exists to give this a
+    // different signature.
+    public UnreflectTester(String val, boolean unused) {
+      this(val);
+    }
+
+    private static String privateStaticMethod() {
+      return "privateStaticMethod";
+    }
+
+    private String privateMethod() {
+      return "privateMethod";
+    }
+
+    public static String publicStaticMethod() {
+      return "publicStaticMethod";
+    }
+
+    public String publicMethod() {
+      return "publicMethod";
+    }
+  }
+
+  public static void testUnreflects() throws Throwable {
+    UnreflectTester instance = new UnreflectTester("unused");
+    Method publicMethod = UnreflectTester.class.getMethod("publicMethod");
+
+    MethodHandle mh = MethodHandles.lookup().unreflect(publicMethod);
+    assertEquals("publicMethod", (String) mh.invoke(instance));
+    assertEquals("publicMethod", (String) mh.invokeExact(instance));
+
+    Method publicStaticMethod = UnreflectTester.class.getMethod("publicStaticMethod");
+    mh = MethodHandles.lookup().unreflect(publicStaticMethod);
+    assertEquals("publicStaticMethod", (String) mh.invoke());
+    assertEquals("publicStaticMethod", (String) mh.invokeExact());
+
+    Method privateMethod = UnreflectTester.class.getDeclaredMethod("privateMethod");
+    try {
+      mh = MethodHandles.lookup().unreflect(privateMethod);
+      fail();
+    } catch (IllegalAccessException expected) {}
+
+    privateMethod.setAccessible(true);
+    mh = MethodHandles.lookup().unreflect(privateMethod);
+    assertEquals("privateMethod", (String) mh.invoke(instance));
+    assertEquals("privateMethod", (String) mh.invokeExact(instance));
+
+    Method privateStaticMethod = UnreflectTester.class.getDeclaredMethod("privateStaticMethod");
+    try {
+      mh = MethodHandles.lookup().unreflect(privateStaticMethod);
+      fail();
+    } catch (IllegalAccessException expected) {}
+
+    privateStaticMethod.setAccessible(true);
+    mh = MethodHandles.lookup().unreflect(privateStaticMethod);
+    assertEquals("privateStaticMethod", (String) mh.invoke());
+    assertEquals("privateStaticMethod", (String) mh.invokeExact());
+
+    Constructor privateConstructor = UnreflectTester.class.getDeclaredConstructor(String.class);
+    try {
+      mh = MethodHandles.lookup().unreflectConstructor(privateConstructor);
+      fail();
+    } catch (IllegalAccessException expected) {}
+
+    privateConstructor.setAccessible(true);
+    mh = MethodHandles.lookup().unreflectConstructor(privateConstructor);
+    instance = (UnreflectTester) mh.invokeExact("abc");
+    assertEquals("abc", instance.publicField);
+    instance = (UnreflectTester) mh.invoke("def");
+    assertEquals("def", instance.publicField);
+    Constructor publicConstructor = UnreflectTester.class.getConstructor(String.class,
+        boolean.class);
+    mh = MethodHandles.lookup().unreflectConstructor(publicConstructor);
+    instance = (UnreflectTester) mh.invokeExact("abc", false);
+    assertEquals("abc", instance.publicField);
+    instance = (UnreflectTester) mh.invoke("def", true);
+    assertEquals("def", instance.publicField);
+
+    // TODO(narayan): Non exact invokes for field sets/gets are not implemented yet.
+    //
+    // assertEquals("instanceValue", (String) mh.invoke(new UnreflectTester("instanceValue")));
+    Field publicField = UnreflectTester.class.getField("publicField");
+    mh = MethodHandles.lookup().unreflectGetter(publicField);
+    instance = new UnreflectTester("instanceValue");
+    assertEquals("instanceValue", (String) mh.invokeExact(instance));
+
+    mh = MethodHandles.lookup().unreflectSetter(publicField);
+    instance = new UnreflectTester("instanceValue");
+    mh.invokeExact(instance, "updatedInstanceValue");
+    assertEquals("updatedInstanceValue", instance.publicField);
+
+    Field publicStaticField = UnreflectTester.class.getField("publicStaticField");
+    mh = MethodHandles.lookup().unreflectGetter(publicStaticField);
+    UnreflectTester.publicStaticField = "updatedStaticValue";
+    assertEquals("updatedStaticValue", (String) mh.invokeExact());
+
+    mh = MethodHandles.lookup().unreflectSetter(publicStaticField);
+    UnreflectTester.publicStaticField = "updatedStaticValue";
+    mh.invokeExact("updatedStaticValue2");
+    assertEquals("updatedStaticValue2", UnreflectTester.publicStaticField);
+
+    Field privateField = UnreflectTester.class.getDeclaredField("privateField");
+    try {
+      mh = MethodHandles.lookup().unreflectGetter(privateField);
+      fail();
+    } catch (IllegalAccessException expected) {
+    }
+    try {
+      mh = MethodHandles.lookup().unreflectSetter(privateField);
+      fail();
+    } catch (IllegalAccessException expected) {
+    }
+
+    privateField.setAccessible(true);
+
+    mh = MethodHandles.lookup().unreflectGetter(privateField);
+    instance = new UnreflectTester("instanceValue");
+    assertEquals("instanceValue", (String) mh.invokeExact(instance));
+
+    mh = MethodHandles.lookup().unreflectSetter(privateField);
+    instance = new UnreflectTester("instanceValue");
+    mh.invokeExact(instance, "updatedInstanceValue");
+    assertEquals("updatedInstanceValue", instance.privateField);
+
+    Field privateStaticField = UnreflectTester.class.getDeclaredField("privateStaticField");
+    try {
+      mh = MethodHandles.lookup().unreflectGetter(privateStaticField);
+      fail();
+    } catch (IllegalAccessException expected) {
+    }
+    try {
+      mh = MethodHandles.lookup().unreflectSetter(privateStaticField);
+      fail();
+    } catch (IllegalAccessException expected) {
+    }
+
+    privateStaticField.setAccessible(true);
+    mh = MethodHandles.lookup().unreflectGetter(privateStaticField);
+    privateStaticField.set(null, "updatedStaticValue");
+    assertEquals("updatedStaticValue", (String) mh.invokeExact());
+
+    mh = MethodHandles.lookup().unreflectSetter(privateStaticField);
+    privateStaticField.set(null, "updatedStaticValue");
+    mh.invokeExact("updatedStaticValue2");
+    assertEquals("updatedStaticValue2", (String) privateStaticField.get(null));
+  }
+
+  // This method only exists to fool Jack's handling of types. See b/32536744.
+  public static CharSequence getSequence() {
+    return "foo";
+  }
+
+  public static void testAsType() throws Throwable {
+    // The type of this handle is (String, String)String.
+    MethodHandle mh = MethodHandles.lookup().findVirtual(String.class,
+        "concat", MethodType.methodType(String.class, String.class));
+
+    // Change it to (CharSequence, String)Object.
+    MethodHandle asType = mh.asType(
+        MethodType.methodType(Object.class, CharSequence.class, String.class));
+
+    Object obj = asType.invokeExact((CharSequence) getSequence(), "bar");
+    assertEquals("foobar", (String) obj);
+
+    // Should fail due to a wrong return type.
+    try {
+      String str = (String) asType.invokeExact((CharSequence) getSequence(), "bar");
+      fail();
+    } catch (WrongMethodTypeException expected) {
+    }
+
+    // Should fail due to a wrong argument type (String instead of Charsequence).
+    try {
+      String str = (String) asType.invokeExact("baz", "bar");
+      fail();
+    } catch (WrongMethodTypeException expected) {
+    }
+
+    // Calls to asType should fail if the types are not convertible.
+    //
+    // Bad return type conversion.
+    try {
+      mh.asType(MethodType.methodType(int.class, String.class, String.class));
+      fail();
+    } catch (WrongMethodTypeException expected) {
+    }
+
+    // Bad argument conversion.
+    try {
+      mh.asType(MethodType.methodType(String.class, int.class, String.class));
+      fail();
+    } catch (WrongMethodTypeException expected) {
+    }
+  }
+
+  public static void assertTrue(boolean value) {
+    if (!value) {
+      throw new AssertionError("assertTrue value: " + value);
+    }
+  }
+
+  public static void assertFalse(boolean value) {
+    if (value) {
+      throw new AssertionError("assertTrue value: " + value);
+    }
+  }
+
+  public static void assertEquals(int i1, int i2) {
+    if (i1 == i2) { return; }
+    throw new AssertionError("assertEquals i1: " + i1 + ", i2: " + i2);
+  }
+
+  public static void assertEquals(long i1, long i2) {
+    if (i1 == i2) { return; }
+    throw new AssertionError("assertEquals l1: " + i1 + ", l2: " + i2);
+  }
+
+  public static void assertEquals(Object o, Object p) {
+    if (o == p) { return; }
+    if (o != null && p != null && o.equals(p)) { return; }
+    throw new AssertionError("assertEquals: o1: " + o + ", o2: " + p);
+  }
+
+  public static void assertEquals(String s1, String s2) {
+    if (s1 == s2) {
+      return;
+    }
+
+    if (s1 != null && s2 != null && s1.equals(s2)) {
+      return;
+    }
+
+    throw new AssertionError("assertEquals s1: " + s1 + ", s2: " + s2);
+  }
+
+  public static void fail() {
+    System.out.println("fail");
+    Thread.dumpStack();
+  }
+
+  public static void fail(String message) {
+    System.out.println("fail: " + message);
+    Thread.dumpStack();
+  }
+
+  public static void testConstructors() throws Throwable {
+    MethodHandle mh =
+        MethodHandles.lookup().findConstructor(Float.class,
+                                               MethodType.methodType(void.class,
+                                                                     float.class));
+    Float value = (Float) mh.invokeExact(0.33f);
+    if (value.floatValue() != 0.33f) {
+      fail("Unexpected float value from invokeExact " + value.floatValue());
+    }
+
+    value = (Float) mh.invoke(3.34f);
+    if (value.floatValue() != 3.34f) {
+      fail("Unexpected float value from invoke " + value.floatValue());
+    }
+
+    mh = MethodHandles.lookup().findConstructor(Double.class,
+                                                MethodType.methodType(void.class, String.class));
+    Double d = (Double) mh.invoke("8.45e3");
+    if (d.doubleValue() != 8.45e3) {
+      fail("Unexpected double value from Double(String) " + value.doubleValue());
+    }
+
+    mh = MethodHandles.lookup().findConstructor(Double.class,
+                                                MethodType.methodType(void.class, double.class));
+    d = (Double) mh.invoke(8.45e3);
+    if (d.doubleValue() != 8.45e3) {
+      fail("Unexpected double value from Double(double) " + value.doubleValue());
+    }
+
+    // Primitive type
+    try {
+      mh = MethodHandles.lookup().findConstructor(int.class, MethodType.methodType(void.class));
+      fail("Unexpected lookup success for primitive constructor");
+    } catch (NoSuchMethodException e) {}
+
+    // Interface
+    try {
+      mh = MethodHandles.lookup().findConstructor(Readable.class,
+                                                  MethodType.methodType(void.class));
+      fail("Unexpected lookup success for interface constructor");
+    } catch (NoSuchMethodException e) {}
+
+    // Abstract
+    mh = MethodHandles.lookup().findConstructor(Process.class, MethodType.methodType(void.class));
+    try {
+      mh.invoke();
+      fail("Unexpected ability to instantiate an abstract class");
+    } catch (InstantiationException e) {}
+
+    // Non-existent
+    try {
+        MethodHandle bad = MethodHandles.lookup().findConstructor(
+            String.class, MethodType.methodType(String.class, Float.class));
+        fail("Unexpected success for non-existent constructor");
+    } catch (NoSuchMethodException e) {}
+
+    // Non-void constructor search. (I)I instead of (I)V.
+    try {
+        MethodHandle foo = MethodHandles.lookup().findConstructor(
+            Integer.class, MethodType.methodType(Integer.class, Integer.class));
+        fail("Unexpected success for non-void type for findConstructor");
+    } catch (NoSuchMethodException e) {}
+  }
+
+  public static void testStringConstructors() throws Throwable {
+    final String testPattern = "The system as we know it is broken";
+
+    // String()
+    MethodHandle mh = MethodHandles.lookup().findConstructor(
+        String.class, MethodType.methodType(void.class));
+    String s = (String) mh.invokeExact();
+    if (!s.equals("")) {
+      fail("Unexpected empty string constructor result: '" + s + "'");
+    }
+
+    // String(String)
+    mh = MethodHandles.lookup().findConstructor(
+        String.class, MethodType.methodType(void.class, String.class));
+    s = (String) mh.invokeExact(testPattern);
+    if (!s.equals(testPattern)) {
+      fail("Unexpected string constructor result: '" + s + "'");
+    }
+
+    // String(char[])
+    mh = MethodHandles.lookup().findConstructor(
+        String.class, MethodType.methodType(void.class, char[].class));
+    s = (String) mh.invokeExact(testPattern.toCharArray());
+    if (!s.equals(testPattern)) {
+      fail("Unexpected string constructor result: '" + s + "'");
+    }
+
+    // String(char[], int, int)
+    mh = MethodHandles.lookup().findConstructor(
+        String.class, MethodType.methodType(void.class, char[].class, int.class, int.class));
+    s = (String) mh.invokeExact(new char [] { 'a', 'b', 'c', 'd', 'e'}, 2, 3);
+    if (!s.equals("cde")) {
+      fail("Unexpected string constructor result: '" + s + "'");
+    }
+
+    // String(int[] codePoints, int offset, int count)
+    StringBuffer sb = new StringBuffer(testPattern);
+    int[] codePoints = new int[sb.codePointCount(0, sb.length())];
+    for (int i = 0; i < sb.length(); ++i) {
+      codePoints[i] = sb.codePointAt(i);
+    }
+    mh = MethodHandles.lookup().findConstructor(
+        String.class, MethodType.methodType(void.class, int[].class, int.class, int.class));
+    s = (String) mh.invokeExact(codePoints, 0, codePoints.length);
+    if (!s.equals(testPattern)) {
+      fail("Unexpected string constructor result: '" + s + "'");
+    }
+
+    // String(byte ascii[], int hibyte, int offset, int count)
+    byte [] ascii = testPattern.getBytes(StandardCharsets.US_ASCII);
+    mh = MethodHandles.lookup().findConstructor(
+        String.class, MethodType.methodType(void.class, byte[].class, int.class, int.class));
+    s = (String) mh.invokeExact(ascii, 0, ascii.length);
+    if (!s.equals(testPattern)) {
+      fail("Unexpected string constructor result: '" + s + "'");
+    }
+
+    // String(byte bytes[], int offset, int length, String charsetName)
+    mh = MethodHandles.lookup().findConstructor(
+        String.class,
+        MethodType.methodType(void.class, byte[].class, int.class, int.class, String.class));
+    s = (String) mh.invokeExact(ascii, 0, 5, StandardCharsets.US_ASCII.name());
+    if (!s.equals(testPattern.substring(0, 5))) {
+      fail("Unexpected string constructor result: '" + s + "'");
+    }
+
+    // String(byte bytes[], int offset, int length, Charset charset)
+    mh = MethodHandles.lookup().findConstructor(
+        String.class,
+        MethodType.methodType(void.class, byte[].class, int.class, int.class, Charset.class));
+    s = (String) mh.invokeExact(ascii, 0, 5, StandardCharsets.US_ASCII);
+    if (!s.equals(testPattern.substring(0, 5))) {
+      fail("Unexpected string constructor result: '" + s + "'");
+    }
+
+    // String(byte bytes[], String charsetName)
+    mh = MethodHandles.lookup().findConstructor(
+        String.class,
+        MethodType.methodType(void.class, byte[].class, String.class));
+    s = (String) mh.invokeExact(ascii, StandardCharsets.US_ASCII.name());
+    if (!s.equals(testPattern)) {
+      fail("Unexpected string constructor result: '" + s + "'");
+    }
+
+    // String(byte bytes[], Charset charset)
+    mh = MethodHandles.lookup().findConstructor(
+        String.class, MethodType.methodType(void.class, byte[].class, Charset.class));
+    s = (String) mh.invokeExact(ascii, StandardCharsets.US_ASCII);
+    if (!s.equals(testPattern)) {
+      fail("Unexpected string constructor result: '" + s + "'");
+    }
+
+    // String(byte bytes[], int offset, int length)
+    mh = MethodHandles.lookup().findConstructor(
+        String.class, MethodType.methodType(void.class, byte[].class, int.class, int.class));
+    s = (String) mh.invokeExact(ascii, 1, ascii.length - 2);
+    s = testPattern.charAt(0) + s + testPattern.charAt(testPattern.length() - 1);
+    if (!s.equals(testPattern)) {
+      fail("Unexpected string constructor result: '" + s + "'");
+    }
+
+    // String(byte bytes[])
+    mh = MethodHandles.lookup().findConstructor(
+        String.class, MethodType.methodType(void.class, byte[].class));
+    s = (String) mh.invokeExact(ascii);
+    if (!s.equals(testPattern)) {
+      fail("Unexpected string constructor result: '" + s + "'");
+    }
+
+    // String(StringBuffer buffer)
+    mh = MethodHandles.lookup().findConstructor(
+        String.class, MethodType.methodType(void.class, StringBuffer.class));
+    s = (String) mh.invokeExact(sb);
+    if (!s.equals(testPattern)) {
+      fail("Unexpected string constructor result: '" + s + "'");
+    }
+
+    System.out.println("String constructors done.");
+  }
+
+  private static void testReferenceReturnValueConversions() throws Throwable {
+    MethodHandle mh = MethodHandles.lookup().findStatic(
+        Float.class, "valueOf", MethodType.methodType(Float.class, String.class));
+
+    // No conversion
+    Float f = (Float) mh.invokeExact("1.375");
+    if (f.floatValue() != 1.375) {
+      fail();
+    }
+    f = (Float) mh.invoke("1.875");
+    if (f.floatValue() != 1.875) {
+      fail();
+    }
+
+    // Bad conversion
+    try {
+      int i = (int) mh.invokeExact("7.77");
+      fail();
+    } catch (WrongMethodTypeException e) {}
+
+    try {
+      int i = (int) mh.invoke("7.77");
+      fail();
+    } catch (WrongMethodTypeException e) {}
+
+    // Assignment to super-class.
+    Number n = (Number) mh.invoke("1.11");
+    try {
+      Number o = (Number) mh.invokeExact("1.11");
+      fail();
+    } catch (WrongMethodTypeException e) {}
+
+    // Assignment to widened boxed primitive class.
+    try {
+      Double u = (Double) mh.invoke("1.11");
+      fail();
+    } catch (ClassCastException e) {}
+
+    try {
+      Double v = (Double) mh.invokeExact("1.11");
+      fail();
+    } catch (WrongMethodTypeException e) {}
+
+    // Unboxed
+    float p = (float) mh.invoke("1.11");
+    if (p != 1.11f) {
+      fail();
+    }
+
+    // Unboxed and widened
+    double d = (double) mh.invoke("2.5");
+    if (d != 2.5) {
+      fail();
+    }
+
+    // Interface
+    Comparable<Float> c = (Comparable<Float>) mh.invoke("2.125");
+    if (c.compareTo(new Float(2.125f)) != 0) {
+      fail();
+    }
+
+    System.out.println("testReferenceReturnValueConversions done.");
+  }
+
+  private static void testPrimitiveReturnValueConversions() throws Throwable {
+    MethodHandle mh = MethodHandles.lookup().findStatic(
+        Math.class, "min", MethodType.methodType(int.class, int.class, int.class));
+
+    final int SMALL = -8972;
+    final int LARGE = 7932529;
+
+    // No conversion
+    if ((int) mh.invokeExact(LARGE, SMALL) != SMALL) {
+      fail();
+    } else if ((int) mh.invoke(LARGE, SMALL) != SMALL) {
+      fail();
+    } else if ((int) mh.invokeExact(SMALL, LARGE) != SMALL) {
+      fail();
+    } else if ((int) mh.invoke(SMALL, LARGE) != SMALL) {
+      fail();
+    }
+
+    // int -> long
+    try {
+      if ((long) mh.invokeExact(LARGE, SMALL) != (long) SMALL) {}
+        fail();
+    } catch (WrongMethodTypeException e) {}
+
+    if ((long) mh.invoke(LARGE, SMALL) != (long) SMALL) {
+      fail();
+    }
+
+    // int -> short
+    try {
+      if ((short) mh.invokeExact(LARGE, SMALL) != (short) SMALL) {}
+      fail();
+    } catch (WrongMethodTypeException e) {}
+
+    try {
+      if ((short) mh.invoke(LARGE, SMALL) != (short) SMALL) {
+        fail();
+      }
+    } catch (WrongMethodTypeException e) {}
+
+    // int -> Integer
+    try {
+      if (!((Integer) mh.invokeExact(LARGE, SMALL)).equals(new Integer(SMALL))) {}
+      fail();
+    } catch (WrongMethodTypeException e) {}
+
+    if (!((Integer) mh.invoke(LARGE, SMALL)).equals(new Integer(SMALL))) {
+      fail();
+    }
+
+    // int -> Long
+    try {
+      Long l = (Long) mh.invokeExact(LARGE, SMALL);
+      fail();
+    } catch (WrongMethodTypeException e) {}
+
+    try {
+      Long l = (Long) mh.invoke(LARGE, SMALL);
+      fail();
+    } catch (WrongMethodTypeException e) {}
+
+    // int -> Short
+    try {
+      Short s = (Short) mh.invokeExact(LARGE, SMALL);
+      fail();
+    } catch (WrongMethodTypeException e) {}
+
+    try {
+      Short s = (Short) mh.invoke(LARGE, SMALL);
+      fail();
+    } catch (WrongMethodTypeException e) {}
+
+    // int -> Process
+    try {
+      Process p = (Process) mh.invokeExact(LARGE, SMALL);
+      fail();
+    } catch (WrongMethodTypeException e) {}
+
+    try {
+      Process p = (Process) mh.invoke(LARGE, SMALL);
+      fail();
+    } catch (WrongMethodTypeException e) {}
+
+    // void -> Object
+    mh = MethodHandles.lookup().findStatic(System.class, "gc", MethodType.methodType(void.class));
+    Object o = (Object) mh.invoke();
+    if (o != null) fail();
+
+    // void -> long
+    long l = (long) mh.invoke();
+    if (l != 0) fail();
+
+    // boolean -> Boolean
+    mh = MethodHandles.lookup().findStatic(Boolean.class, "parseBoolean",
+                                           MethodType.methodType(boolean.class, String.class));
+    Boolean z = (Boolean) mh.invoke("True");
+    if (!z.booleanValue()) fail();
+
+    // boolean -> int
+    try {
+        int dummy = (int) mh.invoke("True");
+        fail();
+    } catch (WrongMethodTypeException e) {}
+
+    // boolean -> Integer
+    try {
+        Integer dummy = (Integer) mh.invoke("True");
+        fail();
+    } catch (WrongMethodTypeException e) {}
+
+    // Boolean -> boolean
+    mh = MethodHandles.lookup().findStatic(Boolean.class, "valueOf",
+                                           MethodType.methodType(Boolean.class, boolean.class));
+    boolean w = (boolean) mh.invoke(false);
+    if (w) fail();
+
+    // Boolean -> int
+    try {
+        int dummy = (int) mh.invoke(false);
+        fail();
+    } catch (WrongMethodTypeException e) {}
+
+    // Boolean -> Integer
+    try {
+        Integer dummy = (Integer) mh.invoke("True");
+        fail();
+    } catch (WrongMethodTypeException e) {}
+
+    System.out.println("testPrimitiveReturnValueConversions done.");
+  }
+
+  public static void testReturnValueConversions() throws Throwable {
+    testReferenceReturnValueConversions();
+    testPrimitiveReturnValueConversions();
+  }
+
+  public static class BaseVariableArityTester {
+    public String update(Float f0, Float... floats) {
+      return "base " + f0 + ", " + Arrays.toString(floats);
+    }
+  }
+
+  public static class VariableArityTester extends BaseVariableArityTester {
+    private String lastResult;
+
+    // Constructors
+    public VariableArityTester() {}
+    public VariableArityTester(boolean... booleans) { update(booleans); }
+    public VariableArityTester(byte... bytes) { update(bytes); }
+    public VariableArityTester(char... chars) { update(chars); }
+    public VariableArityTester(short... shorts) { update(shorts); }
+    public VariableArityTester(int... ints) { update(ints); }
+    public VariableArityTester(long... longs) { update(longs); }
+    public VariableArityTester(float... floats) { update(floats); }
+    public VariableArityTester(double... doubles) { update(doubles); }
+    public VariableArityTester(Float f0, Float... floats) { update(f0, floats); }
+    public VariableArityTester(String s0, String... strings) { update(s0, strings); }
+    public VariableArityTester(char c, Number... numbers) { update(c, numbers); }
+    @SafeVarargs
+    public VariableArityTester(ArrayList<Integer> l0, ArrayList<Integer>... lists) {
+      update(l0, lists);
+    }
+    public VariableArityTester(List l0, List... lists) { update(l0, lists); }
+
+    // Methods
+    public String update(boolean... booleans) { return lastResult = tally(booleans); }
+    public String update(byte... bytes) { return lastResult = tally(bytes); }
+    public String update(char... chars) { return lastResult = tally(chars); }
+    public String update(short... shorts) { return lastResult = tally(shorts); }
+    public String update(int... ints) {
+      lastResult = tally(ints);
+      return lastResult;
+    }
+    public String update(long... longs) { return lastResult = tally(longs); }
+    public String update(float... floats) { return lastResult = tally(floats); }
+    public String update(double... doubles) { return lastResult = tally(doubles); }
+    @Override
+    public String update(Float f0, Float... floats) { return lastResult = tally(f0, floats); }
+    public String update(String s0, String... strings) { return lastResult = tally(s0, strings); }
+    public String update(char c, Number... numbers) { return lastResult = tally(c, numbers); }
+    @SafeVarargs
+    public final String update(ArrayList<Integer> l0, ArrayList<Integer>... lists) {
+      lastResult = tally(l0, lists);
+      return lastResult;
+    }
+    public String update(List l0, List... lists) { return lastResult = tally(l0, lists); }
+
+    public String arrayMethod(Object[] o) {
+      return Arrays.deepToString(o);
+    }
+
+    public String lastResult() { return lastResult; }
+
+    // Static Methods
+    public static String tally(boolean... booleans) { return Arrays.toString(booleans); }
+    public static String tally(byte... bytes) { return Arrays.toString(bytes); }
+    public static String tally(char... chars) { return Arrays.toString(chars); }
+    public static String tally(short... shorts) { return Arrays.toString(shorts); }
+    public static String tally(int... ints) { return Arrays.toString(ints); }
+    public static String tally(long... longs) { return Arrays.toString(longs); }
+    public static String tally(float... floats) { return Arrays.toString(floats); }
+    public static String tally(double... doubles) { return Arrays.toString(doubles); }
+    public static String tally(Float f0, Float... floats) {
+      return f0 + ", " + Arrays.toString(floats);
+    }
+    public static String tally(String s0, String... strings) {
+      return s0 + ", " + Arrays.toString(strings);
+    }
+    public static String tally(char c, Number... numbers) {
+      return c + ", " + Arrays.toString(numbers);
+    }
+    @SafeVarargs
+    public static String tally(ArrayList<Integer> l0, ArrayList<Integer>... lists) {
+      return Arrays.toString(l0.toArray()) + ", " + Arrays.deepToString(lists);
+    }
+    public static String tally(List l0, List... lists) {
+      return Arrays.deepToString(l0.toArray()) + ", " + Arrays.deepToString(lists);
+    }
+    public static void foo(int... ints) { System.out.println(Arrays.toString(ints)); }
+    public static long sumToPrimitive(int... ints) {
+      long result = 0;
+      for (int i : ints) result += i;
+      return result;
+    }
+    public static Long sumToReference(int... ints) {
+      System.err.println("Hi");
+      return new Long(sumToPrimitive(ints));
+    }
+    public static MethodHandles.Lookup lookup() {
+      return MethodHandles.lookup();
+    }
+  }
+
+  // This method only exists to fool Jack's handling of types. See b/32536744.
+  public static Object getAsObject(String[] strings) {
+    return (Object) strings;
+  }
+
+  public static void testVariableArity() throws Throwable {
+    MethodHandle mh;
+    VariableArityTester vat = new VariableArityTester();
+
+    assertEquals("[1]", vat.update(1));
+    assertEquals("[1, 1]", vat.update(1, 1));
+    assertEquals("[1, 1, 1]", vat.update(1, 1, 1));
+
+    // Methods - boolean
+    mh = MethodHandles.lookup().findVirtual(VariableArityTester.class, "update",
+                                            MethodType.methodType(String.class, boolean[].class));
+    assertTrue(mh.isVarargsCollector());
+    assertFalse(mh.asFixedArity().isVarargsCollector());
+    assertEquals("[]", mh.invoke(vat));
+    assertEquals("[true, false, true]", mh.invoke(vat, true, false, true));
+    assertEquals("[true, false, true]", mh.invoke(vat, new boolean[] { true, false, true}));
+    assertEquals("[false, true]", mh.invoke(vat, Boolean.valueOf(false), Boolean.valueOf(true)));
+    try {
+      mh.invoke(vat, true, true, 0);
+      fail();
+    } catch (WrongMethodTypeException e) {}
+    try {
+      assertEquals("[false, true]", mh.invoke(vat, Boolean.valueOf(false), (Boolean) null));
+      fail();
+    } catch (NullPointerException e) {}
+
+    // Methods - byte
+    mh = MethodHandles.lookup().findVirtual(VariableArityTester.class, "update",
+                                            MethodType.methodType(String.class, byte[].class));
+    assertTrue(mh.isVarargsCollector());
+    assertEquals("[]", mh.invoke(vat));
+    assertEquals("[32, 64, 97]", mh.invoke(vat, (byte) 32, Byte.valueOf((byte) 64), (byte) 97));
+    assertEquals("[32, 64, 97]", mh.invoke(vat, new byte[] {(byte) 32, (byte) 64, (byte) 97}));
+    try {
+      mh.invoke(vat, (byte) 1, Integer.valueOf(3), (byte) 0);
+      fail();
+    } catch (WrongMethodTypeException e) {}
+
+    // Methods - char
+    mh = MethodHandles.lookup().findVirtual(VariableArityTester.class, "update",
+                                            MethodType.methodType(String.class, char[].class));
+    assertTrue(mh.isVarargsCollector());
+    assertEquals("[]", mh.invoke(vat));
+    assertEquals("[A, B, C]", mh.invoke(vat, 'A', Character.valueOf('B'), 'C'));
+    assertEquals("[W, X, Y, Z]", mh.invoke(vat, new char[] { 'W', 'X', 'Y', 'Z' }));
+
+    // Methods - short
+    mh = MethodHandles.lookup().findVirtual(VariableArityTester.class, "update",
+                                            MethodType.methodType(String.class, short[].class));
+    assertTrue(mh.isVarargsCollector());
+    assertEquals("[]", mh.invoke(vat));
+    assertEquals("[32767, -32768, 0]",
+                 mh.invoke(vat, Short.MAX_VALUE, Short.MIN_VALUE, Short.valueOf((short) 0)));
+    assertEquals("[1, -1]", mh.invoke(vat, new short[] { (short) 1, (short) -1 }));
+
+    // Methods - int
+    mh = MethodHandles.lookup().findVirtual(VariableArityTester.class, "update",
+                                            MethodType.methodType(String.class, int[].class));
+    assertTrue(mh.isVarargsCollector());
+    assertEquals("[]", mh.invoke(vat));
+    assertEquals("[0, 2147483647, -2147483648, 0]",
+                 mh.invoke(vat, Integer.valueOf(0), Integer.MAX_VALUE, Integer.MIN_VALUE, 0));
+    assertEquals("[0, -1, 1, 0]", mh.invoke(vat, new int[] { 0, -1, 1, 0 }));
+
+    assertEquals("[5, 4, 3, 2, 1]", (String) mh.invokeExact(vat, new int [] { 5, 4, 3, 2, 1 }));
+    try {
+      assertEquals("[5, 4, 3, 2, 1]", (String) mh.invokeExact(vat, 5, 4, 3, 2, 1));
+      fail();
+    } catch (WrongMethodTypeException e) {}
+    assertEquals("[5, 4, 3, 2, 1]", (String) mh.invoke(vat, 5, 4, 3, 2, 1));
+
+    // Methods - long
+    mh = MethodHandles.lookup().findVirtual(VariableArityTester.class, "update",
+                                            MethodType.methodType(String.class, long[].class));
+    assertTrue(mh.isVarargsCollector());
+    assertEquals("[]", mh.invoke(vat));
+    assertEquals("[0, 9223372036854775807, -9223372036854775808]",
+                 mh.invoke(vat, Long.valueOf(0), Long.MAX_VALUE, Long.MIN_VALUE));
+    assertEquals("[0, -1, 1, 0]", mh.invoke(vat, new long[] { 0, -1, 1, 0 }));
+
+    // Methods - float
+    mh = MethodHandles.lookup().findVirtual(VariableArityTester.class, "update",
+                                            MethodType.methodType(String.class, float[].class));
+    assertTrue(mh.isVarargsCollector());
+    assertEquals("[]", mh.invoke(vat));
+    assertEquals("[0.0, 1.25, -1.25]",
+                 mh.invoke(vat, 0.0f, Float.valueOf(1.25f), Float.valueOf(-1.25f)));
+    assertEquals("[0.0, -1.0, 1.0, 0.0]",
+                 mh.invoke(vat, new float[] { 0.0f, -1.0f, 1.0f, 0.0f }));
+
+    // Methods - double
+    mh = MethodHandles.lookup().findVirtual(VariableArityTester.class, "update",
+                                            MethodType.methodType(String.class, double[].class));
+    assertTrue(mh.isVarargsCollector());
+    assertEquals("[]", mh.invoke(vat));
+    assertEquals("[0.0, 1.25, -1.25]",
+                 mh.invoke(vat, 0.0, Double.valueOf(1.25), Double.valueOf(-1.25)));
+    assertEquals("[0.0, -1.0, 1.0, 0.0]",
+                 mh.invoke(vat, new double[] { 0.0, -1.0, 1.0, 0.0 }));
+    mh.invoke(vat, 0.3f, 1.33, 1.33);
+
+    // Methods - String
+    mh = MethodHandles.lookup().
+        findVirtual(VariableArityTester.class, "update",
+                    MethodType.methodType(String.class, String.class, String[].class));
+    assertTrue(mh.isVarargsCollector());
+    assertEquals("Echidna, []", mh.invoke(vat, "Echidna"));
+    assertEquals("Bongo, [Jerboa, Okapi]",
+                 mh.invoke(vat, "Bongo", "Jerboa", "Okapi"));
+
+    // Methods - Float
+    mh = MethodHandles.lookup().
+        findVirtual(VariableArityTester.class, "update",
+                    MethodType.methodType(String.class, Float.class, Float[].class));
+    assertTrue(mh.isVarargsCollector());
+    assertEquals("9.99, [0.0, 0.1, 1.1]",
+                 (String) mh.invoke(vat, Float.valueOf(9.99f),
+                                    new Float[] { Float.valueOf(0.0f),
+                                                  Float.valueOf(0.1f),
+                                                  Float.valueOf(1.1f) }));
+    assertEquals("9.99, [0.0, 0.1, 1.1]",
+                 (String) mh.invoke(vat, Float.valueOf(9.99f), Float.valueOf(0.0f),
+                                    Float.valueOf(0.1f), Float.valueOf(1.1f)));
+    assertEquals("9.99, [0.0, 0.1, 1.1]",
+                 (String) mh.invoke(vat, Float.valueOf(9.99f), 0.0f, 0.1f, 1.1f));
+    try {
+      assertEquals("9.99, [77.0, 33.0, 64.0]",
+                   (String) mh.invoke(vat, Float.valueOf(9.99f), 77, 33, 64));
+      fail();
+    } catch (WrongMethodTypeException e) {}
+    assertEquals("9.99, [0.0, 0.1, 1.1]",
+                 (String) mh.invokeExact(vat, Float.valueOf(9.99f),
+                                         new Float[] { Float.valueOf(0.0f),
+                                                       Float.valueOf(0.1f),
+                                                       Float.valueOf(1.1f) }));
+    assertEquals("9.99, [0.0, null, 1.1]",
+                 (String) mh.invokeExact(vat, Float.valueOf(9.99f),
+                                         new Float[] { Float.valueOf(0.0f),
+                                                       null,
+                                                       Float.valueOf(1.1f) }));
+    try {
+      assertEquals("9.99, [0.0, 0.1, 1.1]",
+                   (String) mh.invokeExact(vat, Float.valueOf(9.99f), 0.0f, 0.1f, 1.1f));
+      fail();
+    } catch (WrongMethodTypeException e) {}
+
+    // Methods - Number
+    mh = MethodHandles.lookup().
+        findVirtual(VariableArityTester.class, "update",
+                    MethodType.methodType(String.class, char.class, Number[].class));
+    assertTrue(mh.isVarargsCollector());
+    assertFalse(mh.asFixedArity().isVarargsCollector());
+    assertEquals("x, []",  (String) mh.invoke(vat, 'x'));
+    assertEquals("x, [3.141]", (String) mh.invoke(vat, 'x', 3.141));
+    assertEquals("x, [null, 3.131, 37]",
+                 (String) mh.invoke(vat, 'x', null, 3.131, new Integer(37)));
+    try {
+      assertEquals("x, [null, 3.131, bad, 37]",
+                   (String) mh.invoke(vat, 'x', null, 3.131, "bad", new Integer(37)));
+      assertTrue(false);
+      fail();
+    } catch (ClassCastException e) {}
+    try {
+      assertEquals("x, [null, 3.131, bad, 37]",
+                   (String) mh.invoke(
+                       vat, 'x', (Process) null, 3.131, "bad", new Integer(37)));
+      assertTrue(false);
+      fail();
+    } catch (ClassCastException e) {}
+
+    // Methods - an array method that is not variable arity.
+    mh = MethodHandles.lookup().findVirtual(
+        VariableArityTester.class, "arrayMethod",
+        MethodType.methodType(String.class, Object[].class));
+    assertFalse(mh.isVarargsCollector());
+    mh.invoke(vat, new Object[] { "123" });
+    try {
+      assertEquals("-", mh.invoke(vat, new Float(3), new Float(4)));
+      fail();
+    } catch (WrongMethodTypeException e) {}
+    mh = mh.asVarargsCollector(Object[].class);
+    assertTrue(mh.isVarargsCollector());
+    assertEquals("[3.0, 4.0]", (String) mh.invoke(vat, new Float(3), new Float(4)));
+
+    // Constructors - default
+    mh = MethodHandles.lookup().findConstructor(
+        VariableArityTester.class, MethodType.methodType(void.class));
+    assertFalse(mh.isVarargsCollector());
+
+    // Constructors - boolean
+    mh = MethodHandles.lookup().findConstructor(
+        VariableArityTester.class, MethodType.methodType(void.class, boolean[].class));
+    assertTrue(mh.isVarargsCollector());
+    assertEquals("[true, true, false]",
+                 ((VariableArityTester) mh.invoke(new boolean[] {true, true, false})).lastResult());
+    assertEquals("[true, true, false]",
+                 ((VariableArityTester) mh.invoke(true, true, false)).lastResult());
+    try {
+      assertEquals("[true, true, false]",
+                   ((VariableArityTester) mh.invokeExact(true, true, false)).lastResult());
+      fail();
+    } catch (WrongMethodTypeException e) {}
+
+    // Constructors - byte
+    mh = MethodHandles.lookup().findConstructor(
+        VariableArityTester.class, MethodType.methodType(void.class, byte[].class));
+    assertTrue(mh.isVarargsCollector());
+    assertEquals("[55, 66, 60]",
+                 ((VariableArityTester)
+                  mh.invoke(new byte[] {(byte) 55, (byte) 66, (byte) 60})).lastResult());
+    assertEquals("[55, 66, 60]",
+                 ((VariableArityTester) mh.invoke(
+                     (byte) 55, (byte) 66, (byte) 60)).lastResult());
+    try {
+      assertEquals("[55, 66, 60]",
+                   ((VariableArityTester) mh.invokeExact(
+                       (byte) 55, (byte) 66, (byte) 60)).lastResult());
+      fail();
+    } catch (WrongMethodTypeException e) {}
+    try {
+      assertEquals("[3, 3]",
+                   ((VariableArityTester) mh.invoke(
+                       new Number[] { Byte.valueOf((byte) 3), (byte) 3})).lastResult());
+      fail();
+    } catch (WrongMethodTypeException e) {}
+
+    // Constructors - String (have a different path than other reference types).
+    mh = MethodHandles.lookup().findConstructor(
+        VariableArityTester.class, MethodType.methodType(void.class, String.class, String[].class));
+    assertTrue(mh.isVarargsCollector());
+    assertEquals("x, []", ((VariableArityTester) mh.invoke("x")).lastResult());
+    assertEquals("x, [y]", ((VariableArityTester) mh.invoke("x", "y")).lastResult());
+    assertEquals("x, [y, z]",
+                 ((VariableArityTester) mh.invoke("x", new String[] { "y", "z" })).lastResult());
+    try {
+      assertEquals("x, [y]", ((VariableArityTester) mh.invokeExact("x", "y")).lastResult());
+      fail();
+    } catch (WrongMethodTypeException e) {}
+    assertEquals("x, [null, z]",
+                 ((VariableArityTester) mh.invoke("x", new String[] { null, "z" })).lastResult());
+
+    // Constructors - Number
+    mh = MethodHandles.lookup().findConstructor(
+        VariableArityTester.class, MethodType.methodType(void.class, char.class, Number[].class));
+    assertTrue(mh.isVarargsCollector());
+    assertFalse(mh.asFixedArity().isVarargsCollector());
+    assertEquals("x, []", ((VariableArityTester) mh.invoke('x')).lastResult());
+    assertEquals("x, [3.141]", ((VariableArityTester) mh.invoke('x', 3.141)).lastResult());
+    assertEquals("x, [null, 3.131, 37]",
+                 ((VariableArityTester) mh.invoke('x', null, 3.131, new Integer(37))).lastResult());
+    try {
+      assertEquals("x, [null, 3.131, bad, 37]",
+                   ((VariableArityTester) mh.invoke(
+                       'x', null, 3.131, "bad", new Integer(37))).lastResult());
+      assertTrue(false);
+      fail();
+    } catch (ClassCastException e) {}
+    try {
+      assertEquals("x, [null, 3.131, bad, 37]",
+                   ((VariableArityTester) mh.invoke(
+                       'x', (Process) null, 3.131, "bad", new Integer(37))).lastResult());
+      assertTrue(false);
+      fail();
+    } catch (ClassCastException e) {}
+
+    // Static Methods - Float
+    mh = MethodHandles.lookup().
+        findStatic(VariableArityTester.class, "tally",
+                   MethodType.methodType(String.class, Float.class, Float[].class));
+    assertTrue(mh.isVarargsCollector());
+    assertEquals("9.99, [0.0, 0.1, 1.1]",
+                 (String) mh.invoke(Float.valueOf(9.99f),
+                                    new Float[] { Float.valueOf(0.0f),
+                                                  Float.valueOf(0.1f),
+                                                  Float.valueOf(1.1f) }));
+    assertEquals("9.99, [0.0, 0.1, 1.1]",
+                 (String) mh.invoke(Float.valueOf(9.99f), Float.valueOf(0.0f),
+                                    Float.valueOf(0.1f), Float.valueOf(1.1f)));
+    assertEquals("9.99, [0.0, 0.1, 1.1]",
+                 (String) mh.invoke(Float.valueOf(9.99f), 0.0f, 0.1f, 1.1f));
+    try {
+      assertEquals("9.99, [77.0, 33.0, 64.0]",
+                   (String) mh.invoke(Float.valueOf(9.99f), 77, 33, 64));
+      fail();
+    } catch (WrongMethodTypeException e) {}
+    assertEquals("9.99, [0.0, 0.1, 1.1]",
+                 (String) mh.invokeExact(Float.valueOf(9.99f),
+                                         new Float[] { Float.valueOf(0.0f),
+                                                       Float.valueOf(0.1f),
+                                                       Float.valueOf(1.1f) }));
+    assertEquals("9.99, [0.0, null, 1.1]",
+                 (String) mh.invokeExact(Float.valueOf(9.99f),
+                                         new Float[] { Float.valueOf(0.0f),
+                                                       null,
+                                                       Float.valueOf(1.1f) }));
+    try {
+      assertEquals("9.99, [0.0, 0.1, 1.1]",
+                   (String) mh.invokeExact(Float.valueOf(9.99f), 0.0f, 0.1f, 1.1f));
+      fail();
+    } catch (WrongMethodTypeException e) {}
+
+    // Special methods - Float
+    mh = VariableArityTester.lookup().
+            findSpecial(BaseVariableArityTester.class, "update",
+                        MethodType.methodType(String.class, Float.class, Float[].class),
+                        VariableArityTester.class);
+    assertTrue(mh.isVarargsCollector());
+    assertEquals("base 9.99, [0.0, 0.1, 1.1]",
+    (String) mh.invoke(vat,
+                       Float.valueOf(9.99f),
+                       new Float[] { Float.valueOf(0.0f),
+                                     Float.valueOf(0.1f),
+                                     Float.valueOf(1.1f) }));
+    assertEquals("base 9.99, [0.0, 0.1, 1.1]",
+    (String) mh.invoke(vat, Float.valueOf(9.99f), Float.valueOf(0.0f),
+                       Float.valueOf(0.1f), Float.valueOf(1.1f)));
+
+    // Return value conversions.
+    mh = MethodHandles.lookup().findVirtual(VariableArityTester.class, "update",
+                                            MethodType.methodType(String.class, int[].class));
+    assertEquals("[1, 2, 3]", (String) mh.invoke(vat, 1, 2, 3));
+    assertEquals("[1, 2, 3]", (Object) mh.invoke(vat, 1, 2, 3));
+    try {
+      assertEquals("[1, 2, 3, 4]", (long) mh.invoke(vat, 1, 2, 3));
+      fail();
+    } catch (WrongMethodTypeException e) {}
+    assertEquals("[1, 2, 3]", vat.lastResult());
+    mh = MethodHandles.lookup().findStatic(VariableArityTester.class, "sumToPrimitive",
+                                           MethodType.methodType(long.class, int[].class));
+    assertEquals(10l, (long) mh.invoke(1, 2, 3, 4));
+    assertEquals(Long.valueOf(10l), (Long) mh.invoke(1, 2, 3, 4));
+    mh = MethodHandles.lookup().findStatic(VariableArityTester.class, "sumToReference",
+                                           MethodType.methodType(Long.class, int[].class));
+    Object o = mh.invoke(1, 2, 3, 4);
+    long l = (long) mh.invoke(1, 2, 3, 4);
+    assertEquals(10l, (long) mh.invoke(1, 2, 3, 4));
+    assertEquals(Long.valueOf(10l), (Long) mh.invoke(1, 2, 3, 4));
+    try {
+      // WrongMethodTypeException should be raised before invoke here.
+      System.err.print("Expect Hi here: ");
+      assertEquals(Long.valueOf(10l), (Byte) mh.invoke(1, 2, 3, 4));
+      fail();
+    } catch (ClassCastException e) {}
+    try {
+      // WrongMethodTypeException should be raised before invoke here.
+      System.err.println("Don't expect Hi now");
+      byte b = (byte) mh.invoke(1, 2, 3, 4);
+      fail();
+    } catch (WrongMethodTypeException e) {}
+
+    // Return void produces 0 / null.
+    mh = MethodHandles.lookup().findStatic(VariableArityTester.class, "foo",
+                                           MethodType.methodType(void.class, int[].class));
+    assertEquals(null, (Object) mh.invoke(3, 2, 1));
+    assertEquals(0l, (long) mh.invoke(1, 2, 3));
+
+    // Combinators
+    mh = MethodHandles.lookup().findVirtual(VariableArityTester.class, "update",
+                                            MethodType.methodType(String.class, boolean[].class));
+    assertTrue(mh.isVarargsCollector());
+    mh = mh.bindTo(vat);
+    assertFalse(mh.isVarargsCollector());
+    mh = mh.asVarargsCollector(boolean[].class);
+    assertTrue(mh.isVarargsCollector());
+    assertEquals("[]", mh.invoke());
+    assertEquals("[true, false, true]", mh.invoke(true, false, true));
+    assertEquals("[true, false, true]", mh.invoke(new boolean[] { true, false, true}));
+    assertEquals("[false, true]", mh.invoke(Boolean.valueOf(false), Boolean.valueOf(true)));
+    try {
+      mh.invoke(true, true, 0);
+      fail();
+    } catch (WrongMethodTypeException e) {}
+  }
 }
-
-
diff --git a/test/957-methodhandle-transforms/expected.txt b/test/957-methodhandle-transforms/expected.txt
index 73a34bc..7540ef7 100644
--- a/test/957-methodhandle-transforms/expected.txt
+++ b/test/957-methodhandle-transforms/expected.txt
@@ -1,35 +1,18 @@
----
--- testDelegation
----
-boolean: false
-char: h
-short: 56
-int: 72
-long: 2147483689
-float: 0.56
-double: 100.0
-String: hello
-Object: goodbye
-boolean: false
-char: h
-short: 56
-int: 72
-long: 73
-float: 0.56
-double: 100.0
-String: hello
-Object: goodbye
-true
-true
-a
-a
-42
-42
-43
-43
-43.0
-43.0
-43.0
-43.0
-plank
-plank
+Message: foo, Message2: 42
+Message: foo, Message2: 42
+Message: foo, Message2: 42
+Message: foo, Message2: 42
+Message: foo, Message2: 42
+Message: foo, Message2: 42
+Message: foo, Message2: 42
+Target: Arg1: foo, Arg2: 42
+Target: Arg1: foo, Arg2: 42
+Handler: java.lang.IllegalArgumentException: exceptionMessage, Arg1: foo, Arg2: 42, ExMsg: exceptionMessage
+Handler: java.lang.IllegalArgumentException: exceptionMessage, Arg1: foo, Arg2: 42, ExMsg: exceptionMessage
+Handler: java.lang.IllegalArgumentException: exceptionMessage, Arg1: foo
+Handler: java.lang.IllegalArgumentException: exceptionMessage, Arg1: foo
+target: target, 42, 56
+target: target, 42, 56
+fallback: fallback, 42, 56
+target: target, 42, 56
+target: target, 42, 56
diff --git a/test/957-methodhandle-transforms/src/Main.java b/test/957-methodhandle-transforms/src/Main.java
index e9d313b..5806509 100644
--- a/test/957-methodhandle-transforms/src/Main.java
+++ b/test/957-methodhandle-transforms/src/Main.java
@@ -19,166 +19,20 @@
 import java.lang.invoke.MethodHandles.Lookup;
 import java.lang.invoke.MethodType;
 import java.lang.invoke.WrongMethodTypeException;
-import java.lang.invoke.Transformers.Transformer;
-
-import dalvik.system.EmulatedStackFrame;
 
 public class Main {
-
-  public static void testDelegate_allTypes(boolean z, char a, short b, int c, long d,
-                                           float e, double f, String g, Object h) {
-    System.out.println("boolean: " + z);
-    System.out.println("char: " + a);
-    System.out.println("short: " + b);
-    System.out.println("int: " + c);
-    System.out.println("long: " + d);
-    System.out.println("float: " + e);
-    System.out.println("double: " + f);
-    System.out.println("String: " + g);
-    System.out.println("Object: " + h);
-  }
-
-  public static boolean testDelegate_returnBoolean() {
-    return true;
-  }
-
-  public static char testDelegate_returnChar() {
-    return 'a';
-  }
-
-  public static int testDelegate_returnInt() {
-    return 42;
-  }
-
-  public static long testDelegate_returnLong() {
-    return 43;
-  }
-
-  public static float testDelegate_returnFloat() {
-    return 43.0f;
-  }
-
-  public static double testDelegate_returnDouble() {
-    return 43.0;
-  }
-
-  public static String testDelegate_returnString() {
-    return "plank";
-  }
-
-  public static class DelegatingTransformer extends Transformer {
-    private final MethodHandle delegate;
-
-    public DelegatingTransformer(MethodHandle delegate) {
-      super(delegate.type());
-      this.delegate = delegate;
-    }
-
-    @Override
-    public void transform(EmulatedStackFrame stackFrame) throws Throwable {
-      delegate.invoke(stackFrame);
-    }
-  }
-
   public static void main(String[] args) throws Throwable {
     testThrowException();
-
-    testDelegation();
-  }
-
-  public static void testDelegation() throws Throwable {
-    System.out.println("---");
-    System.out.println("-- testDelegation");
-    System.out.println("---");
-
-    MethodHandle specialFunctionHandle = MethodHandles.lookup().findStatic(
-        Main.class, "testDelegate_allTypes", MethodType.methodType(void.class,
-          new Class<?>[] { boolean.class, char.class, short.class, int.class, long.class,
-            float.class, double.class, String.class, Object.class }));
-
-    DelegatingTransformer delegate = new DelegatingTransformer(specialFunctionHandle);
-
-    // Test an exact invoke.
-    //
-    // Note that the shorter form below doesn't work and must be
-    // investigated on the jack side :  b/32536744
-    //
-    // delegate.invokeExact(false, 'h', (short) 56, 72, Integer.MAX_VALUE + 42l,
-    //    0.56f, 100.0d, "hello", (Object) "goodbye");
-
-    Object obj = "goodbye";
-    delegate.invokeExact(false, 'h', (short) 56, 72, Integer.MAX_VALUE + 42l,
-        0.56f, 100.0d, "hello", obj);
-
-    // Test a non exact invoke with one int -> long conversion and a float -> double
-    // conversion.
-    delegate.invoke(false, 'h', (short) 56, 72, 73,
-        0.56f, 100.0f, "hello", "goodbye");
-
-    // Should throw a WrongMethodTypeException if the types don't align.
-    try {
-      delegate.invoke(false);
-      throw new AssertionError("Call to invoke unexpectedly succeeded");
-    } catch (WrongMethodTypeException expected) {
-    }
-
-    // Test return values.
-
-    // boolean.
-    MethodHandle returner = MethodHandles.lookup().findStatic(
-        Main.class, "testDelegate_returnBoolean", MethodType.methodType(boolean.class));
-    delegate = new DelegatingTransformer(returner);
-
-    System.out.println((boolean) delegate.invoke());
-    System.out.println((boolean) delegate.invokeExact());
-
-    // char.
-    returner = MethodHandles.lookup().findStatic(
-        Main.class, "testDelegate_returnChar", MethodType.methodType(char.class));
-    delegate = new DelegatingTransformer(returner);
-
-    System.out.println((char) delegate.invoke());
-    System.out.println((char) delegate.invokeExact());
-
-    // int.
-    returner = MethodHandles.lookup().findStatic(
-        Main.class, "testDelegate_returnInt", MethodType.methodType(int.class));
-    delegate = new DelegatingTransformer(returner);
-
-    System.out.println((int) delegate.invoke());
-    System.out.println((int) delegate.invokeExact());
-
-    // long.
-    returner = MethodHandles.lookup().findStatic(
-        Main.class, "testDelegate_returnLong", MethodType.methodType(long.class));
-    delegate = new DelegatingTransformer(returner);
-
-    System.out.println((long) delegate.invoke());
-    System.out.println((long) delegate.invokeExact());
-
-    // float.
-    returner = MethodHandles.lookup().findStatic(
-        Main.class, "testDelegate_returnFloat", MethodType.methodType(float.class));
-    delegate = new DelegatingTransformer(returner);
-
-    System.out.println((float) delegate.invoke());
-    System.out.println((float) delegate.invokeExact());
-
-    // double.
-    returner = MethodHandles.lookup().findStatic(
-        Main.class, "testDelegate_returnDouble", MethodType.methodType(double.class));
-    delegate = new DelegatingTransformer(returner);
-
-    System.out.println((double) delegate.invoke());
-    System.out.println((double) delegate.invokeExact());
-
-    // references.
-    returner = MethodHandles.lookup().findStatic(
-        Main.class, "testDelegate_returnString", MethodType.methodType(String.class));
-    delegate = new DelegatingTransformer(returner);
-
-    System.out.println((String) delegate.invoke());
-    System.out.println((String) delegate.invokeExact());
+    testDropArguments();
+    testCatchException();
+    testGuardWithTest();
+    testArrayElementGetter();
+    testArrayElementSetter();
+    testIdentity();
+    testConstant();
+    testBindTo();
+    testFilterReturnValue();
+    testPermuteArguments();
   }
 
   public static void testThrowException() throws Throwable {
@@ -190,12 +44,864 @@
           " [ " + handle.type() + "]");
     }
 
+    final IllegalArgumentException iae = new IllegalArgumentException("boo!");
     try {
-      handle.invoke();
+      handle.invoke(iae);
       System.out.println("Expected an exception of type: java.lang.IllegalArgumentException");
     } catch (IllegalArgumentException expected) {
+      if (expected != iae) {
+        System.out.println("Wrong exception: expected " + iae + " but was " + expected);
+      }
+    }
+  }
+
+  public static void dropArguments_delegate(String message, long message2) {
+    System.out.println("Message: " + message + ", Message2: " + message2);
+  }
+
+  public static void testDropArguments() throws Throwable {
+    MethodHandle delegate = MethodHandles.lookup().findStatic(Main.class,
+        "dropArguments_delegate",
+        MethodType.methodType(void.class, new Class<?>[] { String.class, long.class }));
+
+    MethodHandle transform = MethodHandles.dropArguments(delegate, 0, int.class, Object.class);
+
+    // The transformer will accept two additional arguments at position zero.
+    try {
+      transform.invokeExact("foo", 42l);
+      fail();
+    } catch (WrongMethodTypeException expected) {
+    }
+
+    transform.invokeExact(45, new Object(), "foo", 42l);
+    transform.invoke(45, new Object(), "foo", 42l);
+
+    // Additional arguments at position 1.
+    transform = MethodHandles.dropArguments(delegate, 1, int.class, Object.class);
+    transform.invokeExact("foo", 45, new Object(), 42l);
+    transform.invoke("foo", 45, new Object(), 42l);
+
+    // Additional arguments at position 2.
+    transform = MethodHandles.dropArguments(delegate, 2, int.class, Object.class);
+    transform.invokeExact("foo", 42l, 45, new Object());
+    transform.invoke("foo", 42l, 45, new Object());
+
+    // Note that we still perform argument conversions even for the arguments that
+    // are subsequently dropped.
+    try {
+      transform.invoke("foo", 42l, 45l, new Object());
+      fail();
+    } catch (WrongMethodTypeException expected) {
+    } catch (IllegalArgumentException expected) {
+      // TODO(narayan): We currently throw the wrong type of exception here,
+      // it's IAE and should be WMTE instead.
+    }
+
+    // Check that asType works as expected.
+    transform = MethodHandles.dropArguments(delegate, 0, int.class, Object.class);
+    transform = transform.asType(MethodType.methodType(void.class,
+          new Class<?>[] { short.class, Object.class, String.class, long.class }));
+    transform.invokeExact((short) 45, new Object(), "foo", 42l);
+
+    // Invalid argument location, should not be allowed.
+    try {
+      MethodHandles.dropArguments(delegate, -1, int.class, Object.class);
+      fail();
+    } catch (IllegalArgumentException expected) {
+    }
+
+    // Invalid argument location, should not be allowed.
+    try {
+      MethodHandles.dropArguments(delegate, 3, int.class, Object.class);
+      fail();
+    } catch (IllegalArgumentException expected) {
+    }
+
+    try {
+      MethodHandles.dropArguments(delegate, 1, void.class);
+      fail();
+    } catch (IllegalArgumentException expected) {
     }
   }
+
+  public static String testCatchException_target(String arg1, long arg2, String exceptionMessage)
+      throws Throwable {
+    if (exceptionMessage != null) {
+      throw new IllegalArgumentException(exceptionMessage);
+    }
+
+    System.out.println("Target: Arg1: " + arg1 + ", Arg2: " + arg2);
+    return "target";
+  }
+
+  public static String testCatchException_handler(IllegalArgumentException iae, String arg1, long arg2,
+      String exMsg) {
+    System.out.println("Handler: " + iae + ", Arg1: " + arg1 + ", Arg2: " + arg2 + ", ExMsg: " + exMsg);
+    return "handler1";
+  }
+
+  public static String testCatchException_handler2(IllegalArgumentException iae, String arg1) {
+    System.out.println("Handler: " + iae + ", Arg1: " + arg1);
+    return "handler2";
+  }
+
+  public static void testCatchException() throws Throwable {
+    MethodHandle target = MethodHandles.lookup().findStatic(Main.class,
+        "testCatchException_target",
+        MethodType.methodType(String.class, new Class<?>[] { String.class, long.class, String.class }));
+
+    MethodHandle handler = MethodHandles.lookup().findStatic(Main.class,
+        "testCatchException_handler",
+        MethodType.methodType(String.class, new Class<?>[] { IllegalArgumentException.class,
+            String.class, long.class, String.class }));
+
+    MethodHandle adapter = MethodHandles.catchException(target, IllegalArgumentException.class,
+        handler);
+
+    String returnVal = null;
+
+    // These two should end up calling the target always. We're passing a null exception
+    // message here, which means the target will not throw.
+    returnVal = (String) adapter.invoke("foo", 42, null);
+    assertEquals("target", returnVal);
+    returnVal = (String) adapter.invokeExact("foo", 42l, (String) null);
+    assertEquals("target", returnVal);
+
+    // We're passing a non-null exception message here, which means the target will throw,
+    // which in turn means that the handler must be called for the next two invokes.
+    returnVal = (String) adapter.invoke("foo", 42, "exceptionMessage");
+    assertEquals("handler1", returnVal);
+    returnVal = (String) adapter.invokeExact("foo", 42l, "exceptionMessage");
+    assertEquals("handler1", returnVal);
+
+    handler = MethodHandles.lookup().findStatic(Main.class,
+        "testCatchException_handler2",
+        MethodType.methodType(String.class, new Class<?>[] { IllegalArgumentException.class,
+            String.class }));
+    adapter = MethodHandles.catchException(target, IllegalArgumentException.class, handler);
+
+    returnVal = (String) adapter.invoke("foo", 42, "exceptionMessage");
+    assertEquals("handler2", returnVal);
+    returnVal = (String) adapter.invokeExact("foo", 42l, "exceptionMessage");
+    assertEquals("handler2", returnVal);
+
+    // Test that the type of the invoke doesn't matter. Here we call
+    // IllegalArgumentException.toString() on the exception that was thrown by
+    // the target.
+    handler = MethodHandles.lookup().findVirtual(IllegalArgumentException.class,
+        "toString", MethodType.methodType(String.class));
+    adapter = MethodHandles.catchException(target, IllegalArgumentException.class, handler);
+
+    returnVal = (String) adapter.invoke("foo", 42, "exceptionMessage");
+    assertEquals("java.lang.IllegalArgumentException: exceptionMessage", returnVal);
+    returnVal = (String) adapter.invokeExact("foo", 42l, "exceptionMessage2");
+    assertEquals("java.lang.IllegalArgumentException: exceptionMessage2", returnVal);
+
+    // Check that asType works as expected.
+    adapter = MethodHandles.catchException(target, IllegalArgumentException.class,
+        handler);
+    adapter = adapter.asType(MethodType.methodType(String.class,
+          new Class<?>[] { String.class, int.class, String.class }));
+    returnVal = (String) adapter.invokeExact("foo", 42, "exceptionMessage");
+    assertEquals("java.lang.IllegalArgumentException: exceptionMessage", returnVal);
+  }
+
+  public static boolean testGuardWithTest_test(String arg1, long arg2) {
+    return "target".equals(arg1) && 42 == arg2;
+  }
+
+  public static String testGuardWithTest_target(String arg1, long arg2, int arg3) {
+    System.out.println("target: " + arg1 + ", " + arg2  + ", " + arg3);
+    return "target";
+  }
+
+  public static String testGuardWithTest_fallback(String arg1, long arg2, int arg3) {
+    System.out.println("fallback: " + arg1 + ", " + arg2  + ", " + arg3);
+    return "fallback";
+  }
+
+  public static void testGuardWithTest() throws Throwable {
+    MethodHandle test = MethodHandles.lookup().findStatic(Main.class,
+        "testGuardWithTest_test",
+        MethodType.methodType(boolean.class, new Class<?>[] { String.class, long.class }));
+
+    final MethodType type = MethodType.methodType(String.class,
+        new Class<?>[] { String.class, long.class, int.class });
+
+    final MethodHandle target = MethodHandles.lookup().findStatic(Main.class,
+        "testGuardWithTest_target", type);
+    final MethodHandle fallback = MethodHandles.lookup().findStatic(Main.class,
+        "testGuardWithTest_fallback", type);
+
+    MethodHandle adapter = MethodHandles.guardWithTest(test, target, fallback);
+
+    String returnVal = null;
+
+    returnVal = (String) adapter.invoke("target", 42, 56);
+    assertEquals("target", returnVal);
+    returnVal = (String) adapter.invokeExact("target", 42l, 56);
+    assertEquals("target", returnVal);
+
+    returnVal = (String) adapter.invoke("fallback", 42l, 56);
+    assertEquals("fallback", returnVal);
+    returnVal = (String) adapter.invokeExact("target", 42l, 56);
+    assertEquals("target", returnVal);
+
+    // Check that asType works as expected.
+    adapter = adapter.asType(MethodType.methodType(String.class,
+          new Class<?>[] { String.class, int.class, int.class }));
+    returnVal = (String) adapter.invokeExact("target", 42, 56);
+    assertEquals("target", returnVal);
+  }
+
+  public static void testArrayElementGetter() throws Throwable {
+    MethodHandle getter = MethodHandles.arrayElementGetter(int[].class);
+
+    {
+      int[] array = new int[1];
+      array[0] = 42;
+      int value = (int) getter.invoke(array, 0);
+      if (value != 42) {
+        System.out.println("Unexpected value: " + value);
+      }
+
+      try {
+        value = (int) getter.invoke(array, -1);
+        fail();
+      } catch (ArrayIndexOutOfBoundsException expected) {
+      }
+
+      try {
+        value = (int) getter.invoke(null, -1);
+        fail();
+      } catch (NullPointerException expected) {
+      }
+    }
+
+    {
+      getter = MethodHandles.arrayElementGetter(long[].class);
+      long[] array = new long[1];
+      array[0] = 42;
+      long value = (long) getter.invoke(array, 0);
+      if (value != 42l) {
+        System.out.println("Unexpected value: " + value);
+      }
+    }
+
+    {
+      getter = MethodHandles.arrayElementGetter(short[].class);
+      short[] array = new short[1];
+      array[0] = 42;
+      short value = (short) getter.invoke(array, 0);
+      if (value != 42l) {
+        System.out.println("Unexpected value: " + value);
+      }
+    }
+
+    {
+      getter = MethodHandles.arrayElementGetter(char[].class);
+      char[] array = new char[1];
+      array[0] = 42;
+      char value = (char) getter.invoke(array, 0);
+      if (value != 42l) {
+        System.out.println("Unexpected value: " + value);
+      }
+    }
+
+    {
+      getter = MethodHandles.arrayElementGetter(byte[].class);
+      byte[] array = new byte[1];
+      array[0] = (byte) 0x8;
+      byte value = (byte) getter.invoke(array, 0);
+      if (value != (byte) 0x8) {
+        System.out.println("Unexpected value: " + value);
+      }
+    }
+
+    {
+      getter = MethodHandles.arrayElementGetter(boolean[].class);
+      boolean[] array = new boolean[1];
+      array[0] = true;
+      boolean value = (boolean) getter.invoke(array, 0);
+      if (!value) {
+        System.out.println("Unexpected value: " + value);
+      }
+    }
+
+    {
+      getter = MethodHandles.arrayElementGetter(float[].class);
+      float[] array = new float[1];
+      array[0] = 42.0f;
+      float value = (float) getter.invoke(array, 0);
+      if (value != 42.0f) {
+        System.out.println("Unexpected value: " + value);
+      }
+    }
+
+    {
+      getter = MethodHandles.arrayElementGetter(double[].class);
+      double[] array = new double[1];
+      array[0] = 42.0;
+      double value = (double) getter.invoke(array, 0);
+      if (value != 42.0) {
+        System.out.println("Unexpected value: " + value);
+      }
+    }
+
+    {
+      getter = MethodHandles.arrayElementGetter(String[].class);
+      String[] array = new String[3];
+      array[0] = "42";
+      array[1] = "48";
+      array[2] = "54";
+      String value = (String) getter.invoke(array, 0);
+      assertEquals("42", value);
+      value = (String) getter.invoke(array, 1);
+      assertEquals("48", value);
+      value = (String) getter.invoke(array, 2);
+      assertEquals("54", value);
+    }
+  }
+
+  public static void testArrayElementSetter() throws Throwable {
+    MethodHandle setter = MethodHandles.arrayElementSetter(int[].class);
+
+    {
+      int[] array = new int[2];
+      setter.invoke(array, 0, 42);
+      setter.invoke(array, 1, 43);
+
+      if (array[0] != 42) {
+        System.out.println("Unexpected value: " + array[0]);
+      }
+      if (array[1] != 43) {
+        System.out.println("Unexpected value: " + array[1]);
+      }
+
+      try {
+        setter.invoke(array, -1, 42);
+        fail();
+      } catch (ArrayIndexOutOfBoundsException expected) {
+      }
+
+      try {
+        setter.invoke(null, 0, 42);
+        fail();
+      } catch (NullPointerException expected) {
+      }
+    }
+
+    {
+      setter = MethodHandles.arrayElementSetter(long[].class);
+      long[] array = new long[1];
+      setter.invoke(array, 0, 42l);
+      if (array[0] != 42l) {
+        System.out.println("Unexpected value: " + array[0]);
+      }
+    }
+
+    {
+      setter = MethodHandles.arrayElementSetter(short[].class);
+      short[] array = new short[1];
+      setter.invoke(array, 0, (short) 42);
+      if (array[0] != 42l) {
+        System.out.println("Unexpected value: " + array[0]);
+      }
+    }
+
+    {
+      setter = MethodHandles.arrayElementSetter(char[].class);
+      char[] array = new char[1];
+      setter.invoke(array, 0, (char) 42);
+      if (array[0] != 42) {
+        System.out.println("Unexpected value: " + array[0]);
+      }
+    }
+
+    {
+      setter = MethodHandles.arrayElementSetter(byte[].class);
+      byte[] array = new byte[1];
+      setter.invoke(array, 0, (byte) 0x8);
+      if (array[0] != (byte) 0x8) {
+        System.out.println("Unexpected value: " + array[0]);
+      }
+    }
+
+    {
+      setter = MethodHandles.arrayElementSetter(boolean[].class);
+      boolean[] array = new boolean[1];
+      setter.invoke(array, 0, true);
+      if (!array[0]) {
+        System.out.println("Unexpected value: " + array[0]);
+      }
+    }
+
+    {
+      setter = MethodHandles.arrayElementSetter(float[].class);
+      float[] array = new float[1];
+      setter.invoke(array, 0, 42.0f);
+      if (array[0] != 42.0f) {
+        System.out.println("Unexpected value: " + array[0]);
+      }
+    }
+
+    {
+      setter = MethodHandles.arrayElementSetter(double[].class);
+      double[] array = new double[1];
+      setter.invoke(array, 0, 42.0);
+      if (array[0] != 42.0) {
+        System.out.println("Unexpected value: " + array[0]);
+      }
+    }
+
+    {
+      setter = MethodHandles.arrayElementSetter(String[].class);
+      String[] array = new String[3];
+      setter.invoke(array, 0, "42");
+      setter.invoke(array, 1, "48");
+      setter.invoke(array, 2, "54");
+      assertEquals("42", array[0]);
+      assertEquals("48", array[1]);
+      assertEquals("54", array[2]);
+    }
+  }
+
+  public static void testIdentity() throws Throwable {
+    {
+      MethodHandle identity = MethodHandles.identity(boolean.class);
+      boolean value = (boolean) identity.invoke(false);
+      if (value) {
+        System.out.println("Unexpected value: " + value);
+      }
+    }
+
+    {
+      MethodHandle identity = MethodHandles.identity(byte.class);
+      byte value = (byte) identity.invoke((byte) 0x8);
+      if (value != (byte) 0x8) {
+        System.out.println("Unexpected value: " + value);
+      }
+    }
+
+    {
+      MethodHandle identity = MethodHandles.identity(char.class);
+      char value = (char) identity.invoke((char) -56);
+      if (value != (char) -56) {
+        System.out.println("Unexpected value: " + value);
+      }
+    }
+
+    {
+      MethodHandle identity = MethodHandles.identity(short.class);
+      short value = (short) identity.invoke((short) -59);
+      if (value != (short) -59) {
+        System.out.println("Unexpected value: " + value);
+      }
+    }
+
+    {
+      MethodHandle identity = MethodHandles.identity(int.class);
+      int value = (int) identity.invoke(52);
+      if (value != 52) {
+        System.out.println("Unexpected value: " + value);
+      }
+    }
+
+    {
+      MethodHandle identity = MethodHandles.identity(long.class);
+      long value = (long) identity.invoke(-76l);
+      if (value != (long) -76) {
+        System.out.println("Unexpected value: " + value);
+      }
+    }
+
+    {
+      MethodHandle identity = MethodHandles.identity(float.class);
+      float value = (float) identity.invoke(56.0f);
+      if (value != (float) 56.0f) {
+        System.out.println("Unexpected value: " + value);
+      }
+    }
+
+    {
+      MethodHandle identity = MethodHandles.identity(double.class);
+      double value = (double) identity.invoke((double) 72.0);
+      if (value != (double) 72.0) {
+        System.out.println("Unexpected value: " + value);
+      }
+    }
+
+    {
+      MethodHandle identity = MethodHandles.identity(String.class);
+      String value = (String) identity.invoke("bazman");
+      assertEquals("bazman", value);
+    }
+  }
+
+  public static void testConstant() throws Throwable {
+    // int constants.
+    {
+      MethodHandle constant = MethodHandles.constant(int.class, 56);
+      int value = (int) constant.invoke();
+      if (value != 56) {
+        System.out.println("Unexpected value: " + value);
+      }
+
+      // short constant values are converted to int.
+      constant = MethodHandles.constant(int.class, (short) 52);
+      value = (int) constant.invoke();
+      if (value != 52) {
+        System.out.println("Unexpected value: " + value);
+      }
+
+      // char constant values are converted to int.
+      constant = MethodHandles.constant(int.class, (char) 'b');
+      value = (int) constant.invoke();
+      if (value != (int) 'b') {
+        System.out.println("Unexpected value: " + value);
+      }
+
+      // int constant values are converted to int.
+      constant = MethodHandles.constant(int.class, (byte) 0x1);
+      value = (int) constant.invoke();
+      if (value != 1) {
+        System.out.println("Unexpected value: " + value);
+      }
+
+      // boolean, float, double and long primitive constants are not convertible
+      // to int, so the handle creation must fail with a CCE.
+      try {
+        MethodHandles.constant(int.class, false);
+        fail();
+      } catch (ClassCastException expected) {
+      }
+
+      try {
+        MethodHandles.constant(int.class, 0.1f);
+        fail();
+      } catch (ClassCastException expected) {
+      }
+
+      try {
+        MethodHandles.constant(int.class, 0.2);
+        fail();
+      } catch (ClassCastException expected) {
+      }
+
+      try {
+        MethodHandles.constant(int.class, 73l);
+        fail();
+      } catch (ClassCastException expected) {
+      }
+    }
+
+    // long constants.
+    {
+      MethodHandle constant = MethodHandles.constant(long.class, 56l);
+      long value = (long) constant.invoke();
+      if (value != 56l) {
+        System.out.println("Unexpected value: " + value);
+      }
+
+      constant = MethodHandles.constant(long.class, (int) 56);
+      value = (long) constant.invoke();
+      if (value != 56l) {
+        System.out.println("Unexpected value: " + value);
+      }
+    }
+
+    // byte constants.
+    {
+      MethodHandle constant = MethodHandles.constant(byte.class, (byte) 0x12);
+      byte value = (byte) constant.invoke();
+      if (value != (byte) 0x12) {
+        System.out.println("Unexpected value: " + value);
+      }
+    }
+
+    // boolean constants.
+    {
+      MethodHandle constant = MethodHandles.constant(boolean.class, true);
+      boolean value = (boolean) constant.invoke();
+      if (!value) {
+        System.out.println("Unexpected value: " + value);
+      }
+    }
+
+    // char constants.
+    {
+      MethodHandle constant = MethodHandles.constant(char.class, 'f');
+      char value = (char) constant.invoke();
+      if (value != 'f') {
+        System.out.println("Unexpected value: " + value);
+      }
+    }
+
+    // short constants.
+    {
+      MethodHandle constant = MethodHandles.constant(short.class, (short) 123);
+      short value = (short) constant.invoke();
+      if (value != (short) 123) {
+        System.out.println("Unexpected value: " + value);
+      }
+    }
+
+    // float constants.
+    {
+      MethodHandle constant = MethodHandles.constant(float.class, 56.0f);
+      float value = (float) constant.invoke();
+      if (value != 56.0f) {
+        System.out.println("Unexpected value: " + value);
+      }
+    }
+
+    // double constants.
+    {
+      MethodHandle constant = MethodHandles.constant(double.class, 256.0);
+      double value = (double) constant.invoke();
+      if (value != 256.0) {
+        System.out.println("Unexpected value: " + value);
+      }
+    }
+
+    // reference constants.
+    {
+      MethodHandle constant = MethodHandles.constant(String.class, "256.0");
+      String value = (String) constant.invoke();
+      assertEquals("256.0", value);
+    }
+  }
+
+  public static void testBindTo() throws Throwable {
+    MethodHandle stringCharAt = MethodHandles.lookup().findVirtual(
+        String.class, "charAt", MethodType.methodType(char.class, int.class));
+
+    char value = (char) stringCharAt.invoke("foo", 0);
+    if (value != 'f') {
+      System.out.println("Unexpected value: " + value);
+    }
+
+    MethodHandle bound = stringCharAt.bindTo("foo");
+    value = (char) bound.invoke(0);
+    if (value != 'f') {
+      System.out.println("Unexpected value: " + value);
+    }
+
+    try {
+      stringCharAt.bindTo(new Object());
+      fail();
+    } catch (ClassCastException expected) {
+    }
+
+    bound = stringCharAt.bindTo(null);
+    try {
+      bound.invoke(0);
+      fail();
+    } catch (NullPointerException expected) {
+    }
+
+    MethodHandle integerParseInt = MethodHandles.lookup().findStatic(
+        Integer.class, "parseInt", MethodType.methodType(int.class, String.class));
+
+    bound = integerParseInt.bindTo("78452");
+    int intValue = (int) bound.invoke();
+    if (intValue != 78452) {
+      System.out.println("Unexpected value: " + intValue);
+    }
+  }
+
+  public static String filterReturnValue_target(int a) {
+    return "ReturnValue" + a;
+  }
+
+  public static boolean filterReturnValue_filter(String value) {
+    return value.indexOf("42") != -1;
+  }
+
+  public static int filterReturnValue_intTarget(String a) {
+    return Integer.parseInt(a);
+  }
+
+  public static int filterReturnValue_intFilter(int b) {
+    return b + 1;
+  }
+
+  public static void filterReturnValue_voidTarget() {
+  }
+
+  public static int filterReturnValue_voidFilter() {
+    return 42;
+  }
+
+  public static void testFilterReturnValue() throws Throwable {
+    // A target that returns a reference.
+    {
+      final MethodHandle target = MethodHandles.lookup().findStatic(Main.class,
+          "filterReturnValue_target", MethodType.methodType(String.class, int.class));
+      final MethodHandle filter = MethodHandles.lookup().findStatic(Main.class,
+          "filterReturnValue_filter", MethodType.methodType(boolean.class, String.class));
+
+      MethodHandle adapter = MethodHandles.filterReturnValue(target, filter);
+
+      boolean value = (boolean) adapter.invoke((int) 42);
+      if (!value) {
+        System.out.println("Unexpected value: " + value);
+      }
+      value = (boolean) adapter.invoke((int) 43);
+      if (value) {
+        System.out.println("Unexpected value: " + value);
+      }
+    }
+
+    // A target that returns a primitive.
+    {
+      final MethodHandle target = MethodHandles.lookup().findStatic(Main.class,
+          "filterReturnValue_intTarget", MethodType.methodType(int.class, String.class));
+      final MethodHandle filter = MethodHandles.lookup().findStatic(Main.class,
+          "filterReturnValue_intFilter", MethodType.methodType(int.class, int.class));
+
+      MethodHandle adapter = MethodHandles.filterReturnValue(target, filter);
+
+      int value = (int) adapter.invoke("56");
+      if (value != 57) {
+        System.out.println("Unexpected value: " + value);
+      }
+    }
+
+    // A target that returns void.
+    {
+      final MethodHandle target = MethodHandles.lookup().findStatic(Main.class,
+          "filterReturnValue_voidTarget", MethodType.methodType(void.class));
+      final MethodHandle filter = MethodHandles.lookup().findStatic(Main.class,
+          "filterReturnValue_voidFilter", MethodType.methodType(int.class));
+
+      MethodHandle adapter = MethodHandles.filterReturnValue(target, filter);
+
+      int value = (int) adapter.invoke();
+      if (value != 42) {
+        System.out.println("Unexpected value: " + value);
+      }
+    }
+  }
+
+  public static void permuteArguments_callee(boolean a, byte b, char c,
+      short d, int e, long f, float g, double h) {
+    if (a == true && b == (byte) 'b' && c == 'c' && d == (short) 56 &&
+        e == 78 && f == (long) 97 && g == 98.0f && f == 97.0) {
+      return;
+    }
+
+    System.out.println("Unexpected arguments: " + a + ", " + b + ", " + c
+        + ", " + d + ", " + e + ", " + f + ", " + g + ", " + h);
+  }
+
+  public static void permuteArguments_boxingCallee(boolean a, Integer b) {
+    if (a && b.intValue() == 42) {
+      return;
+    }
+
+    System.out.println("Unexpected arguments: " + a + ", " + b);
+  }
+
+  public static void testPermuteArguments() throws Throwable {
+    {
+      final MethodHandle target = MethodHandles.lookup().findStatic(
+          Main.class, "permuteArguments_callee",
+          MethodType.methodType(void.class, new Class<?>[] {
+            boolean.class, byte.class, char.class, short.class, int.class,
+            long.class, float.class, double.class }));
+
+      final MethodType newType = MethodType.methodType(void.class, new Class<?>[] {
+        double.class, float.class, long.class, int.class, short.class, char.class,
+        byte.class, boolean.class });
+
+      final MethodHandle permutation = MethodHandles.permuteArguments(
+          target, newType, new int[] { 7, 6, 5, 4, 3, 2, 1, 0 });
+
+      permutation.invoke((double) 97.0, (float) 98.0f, (long) 97, 78,
+          (short) 56, 'c', (byte) 'b', (boolean) true);
+
+      // The permutation array was not of the right length.
+      try {
+        MethodHandles.permuteArguments(target, newType,
+            new int[] { 7 });
+        fail();
+      } catch (IllegalArgumentException expected) {
+      }
+
+      // The permutation array has an element that's out of bounds
+      // (there's no argument with idx == 8).
+      try {
+        MethodHandles.permuteArguments(target, newType,
+            new int[] { 8, 6, 5, 4, 3, 2, 1, 0 });
+        fail();
+      } catch (IllegalArgumentException expected) {
+      }
+
+      // The permutation array maps to an incorrect type.
+      try {
+        MethodHandles.permuteArguments(target, newType,
+            new int[] { 7, 7, 5, 4, 3, 2, 1, 0 });
+        fail();
+      } catch (IllegalArgumentException expected) {
+      }
+    }
+
+    // Tests for reference arguments as well as permutations that
+    // repeat arguments.
+    {
+      final MethodHandle target = MethodHandles.lookup().findVirtual(
+          String.class, "concat", MethodType.methodType(String.class, String.class));
+
+      final MethodType newType = MethodType.methodType(String.class, String.class,
+          String.class);
+
+      assertEquals("foobar", (String) target.invoke("foo", "bar"));
+
+      MethodHandle permutation = MethodHandles.permuteArguments(target,
+          newType, new int[] { 1, 0 });
+      assertEquals("barfoo", (String) permutation.invoke("foo", "bar"));
+
+      permutation = MethodHandles.permuteArguments(target, newType, new int[] { 0, 0 });
+      assertEquals("foofoo", (String) permutation.invoke("foo", "bar"));
+
+      permutation = MethodHandles.permuteArguments(target, newType, new int[] { 1, 1 });
+      assertEquals("barbar", (String) permutation.invoke("foo", "bar"));
+    }
+
+    // Tests for boxing and unboxing.
+    {
+      final MethodHandle target = MethodHandles.lookup().findStatic(
+          Main.class, "permuteArguments_boxingCallee",
+          MethodType.methodType(void.class, new Class<?>[] { boolean.class, Integer.class }));
+
+      final MethodType newType = MethodType.methodType(void.class,
+          new Class<?>[] { Integer.class, boolean.class });
+
+      MethodHandle permutation = MethodHandles.permuteArguments(target,
+          newType, new int[] { 1, 0 });
+
+      permutation.invoke(42, true);
+      permutation.invoke(42, Boolean.TRUE);
+      permutation.invoke(Integer.valueOf(42), true);
+      permutation.invoke(Integer.valueOf(42), Boolean.TRUE);
+    }
+  }
+
+  public static void fail() {
+    System.out.println("FAIL");
+    Thread.dumpStack();
+  }
+
+  public static void assertEquals(String s1, String s2) {
+    if (s1 == s2) {
+      return;
+    }
+
+    if (s1 != null && s2 != null && s1.equals(s2)) {
+      return;
+    }
+
+    throw new AssertionError("assertEquals s1: " + s1 + ", s2: " + s2);
+  }
 }
-
-
diff --git a/test/979-invoke-polymorphic-accessors/build b/test/958-methodhandle-emulated-stackframe/build
old mode 100644
new mode 100755
similarity index 100%
copy from test/979-invoke-polymorphic-accessors/build
copy to test/958-methodhandle-emulated-stackframe/build
diff --git a/test/958-methodhandle-emulated-stackframe/expected.txt b/test/958-methodhandle-emulated-stackframe/expected.txt
new file mode 100644
index 0000000..5f38259
--- /dev/null
+++ b/test/958-methodhandle-emulated-stackframe/expected.txt
@@ -0,0 +1,32 @@
+boolean: false
+char: h
+short: 56
+int: 72
+long: 2147483689
+float: 0.56
+double: 100.0
+String: hello
+Object: goodbye
+boolean: false
+char: h
+short: 56
+int: 72
+long: 73
+float: 0.56
+double: 100.0
+String: hello
+Object: goodbye
+true
+true
+a
+a
+42
+42
+43
+43
+43.0
+43.0
+43.0
+43.0
+plank
+plank
diff --git a/test/958-methodhandle-emulated-stackframe/info.txt b/test/958-methodhandle-emulated-stackframe/info.txt
new file mode 100644
index 0000000..bec2324
--- /dev/null
+++ b/test/958-methodhandle-emulated-stackframe/info.txt
@@ -0,0 +1,5 @@
+Tests for dalvik.system.EmulatedStackFrame, which is used to implement
+MethodHandle transformations. This is a separate test because it tests
+an implementation detail and hence cannot be used with --mode=jvm.
+
+NOTE: needs to run under ART or a Java 8 Language runtime and compiler.
diff --git a/test/979-invoke-polymorphic-accessors/run b/test/958-methodhandle-emulated-stackframe/run
old mode 100644
new mode 100755
similarity index 100%
copy from test/979-invoke-polymorphic-accessors/run
copy to test/958-methodhandle-emulated-stackframe/run
diff --git a/test/958-methodhandle-emulated-stackframe/src/Main.java b/test/958-methodhandle-emulated-stackframe/src/Main.java
new file mode 100644
index 0000000..f739d47
--- /dev/null
+++ b/test/958-methodhandle-emulated-stackframe/src/Main.java
@@ -0,0 +1,175 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.invoke.MethodHandle;
+import java.lang.invoke.MethodHandles;
+import java.lang.invoke.MethodHandles.Lookup;
+import java.lang.invoke.MethodType;
+import java.lang.invoke.WrongMethodTypeException;
+import java.lang.invoke.Transformers.Transformer;
+
+import dalvik.system.EmulatedStackFrame;
+
+public class Main {
+
+  public static void testDelegate_allTypes(boolean z, char a, short b, int c, long d,
+                                           float e, double f, String g, Object h) {
+    System.out.println("boolean: " + z);
+    System.out.println("char: " + a);
+    System.out.println("short: " + b);
+    System.out.println("int: " + c);
+    System.out.println("long: " + d);
+    System.out.println("float: " + e);
+    System.out.println("double: " + f);
+    System.out.println("String: " + g);
+    System.out.println("Object: " + h);
+  }
+
+  public static boolean testDelegate_returnBoolean() {
+    return true;
+  }
+
+  public static char testDelegate_returnChar() {
+    return 'a';
+  }
+
+  public static int testDelegate_returnInt() {
+    return 42;
+  }
+
+  public static long testDelegate_returnLong() {
+    return 43;
+  }
+
+  public static float testDelegate_returnFloat() {
+    return 43.0f;
+  }
+
+  public static double testDelegate_returnDouble() {
+    return 43.0;
+  }
+
+  public static String testDelegate_returnString() {
+    return "plank";
+  }
+
+  public static class DelegatingTransformer extends Transformer {
+    private final MethodHandle delegate;
+
+    public DelegatingTransformer(MethodHandle delegate) {
+      super(delegate.type());
+      this.delegate = delegate;
+    }
+
+    @Override
+    public void transform(EmulatedStackFrame stackFrame) throws Throwable {
+      delegate.invoke(stackFrame);
+    }
+  }
+
+  public static void main(String[] args) throws Throwable {
+    MethodHandle specialFunctionHandle = MethodHandles.lookup().findStatic(
+        Main.class, "testDelegate_allTypes", MethodType.methodType(void.class,
+          new Class<?>[] { boolean.class, char.class, short.class, int.class, long.class,
+            float.class, double.class, String.class, Object.class }));
+
+    DelegatingTransformer delegate = new DelegatingTransformer(specialFunctionHandle);
+
+    // Test an exact invoke.
+    //
+    // Note that the shorter form below doesn't work and must be
+    // investigated on the jack side :  b/32536744
+    //
+    // delegate.invokeExact(false, 'h', (short) 56, 72, Integer.MAX_VALUE + 42l,
+    //    0.56f, 100.0d, "hello", (Object) "goodbye");
+
+    Object obj = "goodbye";
+    delegate.invokeExact(false, 'h', (short) 56, 72, Integer.MAX_VALUE + 42l,
+        0.56f, 100.0d, "hello", obj);
+
+    // Test a non exact invoke with one int -> long conversion and a float -> double
+    // conversion.
+    delegate.invoke(false, 'h', (short) 56, 72, 73,
+        0.56f, 100.0f, "hello", "goodbye");
+
+    // Should throw a WrongMethodTypeException if the types don't align.
+    try {
+      delegate.invoke(false);
+      throw new AssertionError("Call to invoke unexpectedly succeeded");
+    } catch (WrongMethodTypeException expected) {
+    }
+
+    // Test return values.
+
+    // boolean.
+    MethodHandle returner = MethodHandles.lookup().findStatic(
+        Main.class, "testDelegate_returnBoolean", MethodType.methodType(boolean.class));
+    delegate = new DelegatingTransformer(returner);
+
+    System.out.println((boolean) delegate.invoke());
+    System.out.println((boolean) delegate.invokeExact());
+
+    // char.
+    returner = MethodHandles.lookup().findStatic(
+        Main.class, "testDelegate_returnChar", MethodType.methodType(char.class));
+    delegate = new DelegatingTransformer(returner);
+
+    System.out.println((char) delegate.invoke());
+    System.out.println((char) delegate.invokeExact());
+
+    // int.
+    returner = MethodHandles.lookup().findStatic(
+        Main.class, "testDelegate_returnInt", MethodType.methodType(int.class));
+    delegate = new DelegatingTransformer(returner);
+
+    System.out.println((int) delegate.invoke());
+    System.out.println((int) delegate.invokeExact());
+
+    // long.
+    returner = MethodHandles.lookup().findStatic(
+        Main.class, "testDelegate_returnLong", MethodType.methodType(long.class));
+    delegate = new DelegatingTransformer(returner);
+
+    System.out.println((long) delegate.invoke());
+    System.out.println((long) delegate.invokeExact());
+
+    // float.
+    returner = MethodHandles.lookup().findStatic(
+        Main.class, "testDelegate_returnFloat", MethodType.methodType(float.class));
+    delegate = new DelegatingTransformer(returner);
+
+    System.out.println((float) delegate.invoke());
+    System.out.println((float) delegate.invokeExact());
+
+    // double.
+    returner = MethodHandles.lookup().findStatic(
+        Main.class, "testDelegate_returnDouble", MethodType.methodType(double.class));
+    delegate = new DelegatingTransformer(returner);
+
+    System.out.println((double) delegate.invoke());
+    System.out.println((double) delegate.invokeExact());
+
+    // references.
+    returner = MethodHandles.lookup().findStatic(
+        Main.class, "testDelegate_returnString", MethodType.methodType(String.class));
+    delegate = new DelegatingTransformer(returner);
+
+    System.out.println((String) delegate.invoke());
+    System.out.println((String) delegate.invokeExact());
+  }
+}
+
+
diff --git a/test/979-invoke-polymorphic-accessors/build b/test/959-invoke-polymorphic-accessors/build
similarity index 100%
rename from test/979-invoke-polymorphic-accessors/build
rename to test/959-invoke-polymorphic-accessors/build
diff --git a/test/959-invoke-polymorphic-accessors/expected.txt b/test/959-invoke-polymorphic-accessors/expected.txt
new file mode 100644
index 0000000..de2916b
--- /dev/null
+++ b/test/959-invoke-polymorphic-accessors/expected.txt
@@ -0,0 +1,4 @@
+1515870810
+Passed MethodHandles.Lookup tests for accessors.
+Passed MethodHandle.invokeExact() tests for accessors.
+Passed MethodHandle.invoke() tests for accessors.
diff --git a/test/979-invoke-polymorphic-accessors/info.txt b/test/959-invoke-polymorphic-accessors/info.txt
similarity index 100%
rename from test/979-invoke-polymorphic-accessors/info.txt
rename to test/959-invoke-polymorphic-accessors/info.txt
diff --git a/test/979-invoke-polymorphic-accessors/run b/test/959-invoke-polymorphic-accessors/run
similarity index 100%
rename from test/979-invoke-polymorphic-accessors/run
rename to test/959-invoke-polymorphic-accessors/run
diff --git a/test/979-invoke-polymorphic-accessors/src/Main.java b/test/959-invoke-polymorphic-accessors/src/Main.java
similarity index 79%
rename from test/979-invoke-polymorphic-accessors/src/Main.java
rename to test/959-invoke-polymorphic-accessors/src/Main.java
index 6cdcd10..b7ecf8e 100644
--- a/test/979-invoke-polymorphic-accessors/src/Main.java
+++ b/test/959-invoke-polymorphic-accessors/src/Main.java
@@ -44,7 +44,26 @@
         public static final int s_fi = 0x5a5a5a5a;
     }
 
-    public static class InvokeExactTester {
+    public static class Tester {
+        public static void assertActualAndExpectedMatch(boolean actual, boolean expected)
+                throws AssertionError {
+            if (actual != expected) {
+                throw new AssertionError("Actual != Expected (" + actual + " != " + expected + ")");
+            }
+        }
+
+        public static void assertTrue(boolean value) throws AssertionError {
+            if (!value) {
+                throw new AssertionError("Value is not true");
+            }
+        }
+
+        public static void unreachable() throws Throwable{
+            throw new Error("unreachable");
+        }
+    }
+
+    public static class InvokeExactTester extends Tester {
         private enum PrimitiveType {
             Boolean,
             Byte,
@@ -64,19 +83,6 @@
             SGET,
         }
 
-        private static void assertActualAndExpectedMatch(boolean actual, boolean expected)
-                throws AssertionError {
-            if (actual != expected) {
-                throw new AssertionError("Actual != Expected (" + actual + " != " + expected + ")");
-            }
-        }
-
-        private static void assertTrue(boolean value) throws AssertionError {
-            if (!value) {
-                throw new AssertionError("Value is not true");
-            }
-        }
-
         static void setByte(MethodHandle m, ValueHolder v, byte value, boolean expectFailure)
                 throws Throwable {
             boolean exceptionThrown = false;
@@ -677,16 +683,26 @@
                 assertTrue(s.equals(ValueHolder.s_l));
             }
 
-            System.out.println("Passed InvokeExact tests for accessors.");
+            System.out.println("Passed MethodHandle.invokeExact() tests for accessors.");
         }
     }
 
-    public static class FindAccessorTester {
+    public static class FindAccessorTester extends Tester {
         public static void main() throws Throwable {
-            ValueHolder valueHolder = new ValueHolder();
+            // NB having a static field test here is essential for
+            // this test. MethodHandles need to ensure the class
+            // (ValueHolder) is initialized. This happens in the
+            // invoke-polymorphic dispatch.
             MethodHandles.Lookup lookup = MethodHandles.lookup();
-
-            lookup.findStaticGetter(ValueHolder.class, "s_fi", int.class);
+            try {
+                MethodHandle mh = lookup.findStaticGetter(ValueHolder.class, "s_fi", int.class);
+                int initialValue = (int)mh.invokeExact();
+                System.out.println(initialValue);
+            } catch (NoSuchFieldException e) { unreachable(); }
+            try {
+                MethodHandle mh = lookup.findStaticSetter(ValueHolder.class, "s_i", int.class);
+                mh.invokeExact(0);
+            } catch (NoSuchFieldException e) { unreachable(); }
             try {
                 lookup.findStaticGetter(ValueHolder.class, "s_fi", byte.class);
                 unreachable();
@@ -713,15 +729,191 @@
                 lookup.findSetter(ValueHolder.class, "m_fi", int.class);
                 unreachable();
             } catch (IllegalAccessException e) {}
+
+            System.out.println("Passed MethodHandles.Lookup tests for accessors.");
+        }
+    }
+
+    public static class InvokeTester extends Tester {
+        private static void testStaticGetter() throws Throwable {
+            MethodHandles.Lookup lookup = MethodHandles.lookup();
+            MethodHandle h0 = lookup.findStaticGetter(ValueHolder.class, "s_fi", int.class);
+            h0.invoke();
+            Number t = (Number)h0.invoke();
+            int u = (int)h0.invoke();
+            Integer v = (Integer)h0.invoke();
+            long w = (long)h0.invoke();
+            try {
+                byte x = (byte)h0.invoke();
+                unreachable();
+            } catch (WrongMethodTypeException e) {}
+            try {
+                String y = (String)h0.invoke();
+                unreachable();
+            } catch (WrongMethodTypeException e) {}
+            try {
+                Long z = (Long)h0.invoke();
+                unreachable();
+            } catch (WrongMethodTypeException e) {}
         }
 
-        public static void unreachable() throws Throwable{
-            throw new Error("unreachable");
+        private static void testMemberGetter() throws Throwable {
+            ValueHolder valueHolder = new ValueHolder();
+            MethodHandles.Lookup lookup = MethodHandles.lookup();
+            MethodHandle h0 = lookup.findGetter(ValueHolder.class, "m_fi", int.class);
+            h0.invoke(valueHolder);
+            Number t = (Number)h0.invoke(valueHolder);
+            int u = (int)h0.invoke(valueHolder);
+            Integer v = (Integer)h0.invoke(valueHolder);
+            long w = (long)h0.invoke(valueHolder);
+            try {
+                byte x = (byte)h0.invoke(valueHolder);
+                unreachable();
+            } catch (WrongMethodTypeException e) {}
+            try {
+                String y = (String)h0.invoke(valueHolder);
+                unreachable();
+            } catch (WrongMethodTypeException e) {}
+            try {
+                Long z = (Long)h0.invoke(valueHolder);
+                unreachable();
+            } catch (WrongMethodTypeException e) {}
+        }
+
+        /*package*/ static Number getDoubleAsNumber() {
+            return new Double(1.4e77);
+        }
+        /*package*/ static Number getFloatAsNumber() {
+            return new Float(7.77);
+        }
+        /*package*/ static Object getFloatAsObject() {
+            return new Float(-7.77);
+        }
+
+        private static void testMemberSetter() throws Throwable {
+            ValueHolder valueHolder = new ValueHolder();
+            MethodHandles.Lookup lookup = MethodHandles.lookup();
+            MethodHandle h0 = lookup.findSetter(ValueHolder.class, "m_f", float.class);
+            h0.invoke(valueHolder, 0.22f);
+            h0.invoke(valueHolder, new Float(1.11f));
+            Number floatNumber = getFloatAsNumber();
+            h0.invoke(valueHolder, floatNumber);
+            assertTrue(valueHolder.m_f == floatNumber.floatValue());
+            Object objNumber = getFloatAsObject();
+            h0.invoke(valueHolder, objNumber);
+            assertTrue(valueHolder.m_f == ((Float) objNumber).floatValue());
+            try {
+              h0.invoke(valueHolder, (Float)null);
+              unreachable();
+            } catch (NullPointerException e) {}
+
+            h0.invoke(valueHolder, (byte)1);
+            h0.invoke(valueHolder, (short)2);
+            h0.invoke(valueHolder, 3);
+            h0.invoke(valueHolder, 4l);
+
+            assertTrue(null == (Object) h0.invoke(valueHolder, 33));
+            assertTrue(0.0f == (float) h0.invoke(valueHolder, 33));
+            assertTrue(0l == (long) h0.invoke(valueHolder, 33));
+
+            try {
+                h0.invoke(valueHolder, 0.33);
+                unreachable();
+            } catch (WrongMethodTypeException e) {}
+            try {
+                Number doubleNumber = getDoubleAsNumber();
+                h0.invoke(valueHolder, doubleNumber);
+                unreachable();
+            } catch (ClassCastException e) {}
+            try {
+                Number doubleNumber = null;
+                h0.invoke(valueHolder, doubleNumber);
+                unreachable();
+            } catch (NullPointerException e) {}
+            try {
+                // Mismatched return type - float != void
+                float tmp = (float)h0.invoke(valueHolder, 0.45f);
+                assertTrue(tmp == 0.0);
+            } catch (Exception e) { unreachable(); }
+            try {
+                h0.invoke(valueHolder, "bam");
+                unreachable();
+            } catch (WrongMethodTypeException e) {}
+            try {
+                String s = null;
+                h0.invoke(valueHolder, s);
+                unreachable();
+            } catch (WrongMethodTypeException e) {}
+        }
+
+        private static void testStaticSetter() throws Throwable {
+            MethodHandles.Lookup lookup = MethodHandles.lookup();
+            MethodHandle h0 = lookup.findStaticSetter(ValueHolder.class, "s_f", float.class);
+            h0.invoke(0.22f);
+            h0.invoke(new Float(1.11f));
+            Number floatNumber = new Float(0.88f);
+            h0.invoke(floatNumber);
+            assertTrue(ValueHolder.s_f == floatNumber.floatValue());
+
+            try {
+              h0.invoke((Float)null);
+              unreachable();
+            } catch (NullPointerException e) {}
+
+            h0.invoke((byte)1);
+            h0.invoke((short)2);
+            h0.invoke(3);
+            h0.invoke(4l);
+
+            assertTrue(null == (Object) h0.invoke(33));
+            assertTrue(0.0f == (float) h0.invoke(33));
+            assertTrue(0l == (long) h0.invoke(33));
+
+            try {
+                h0.invoke(0.33);
+                unreachable();
+            } catch (WrongMethodTypeException e) {}
+            try {
+                Number doubleNumber = getDoubleAsNumber();
+                h0.invoke(doubleNumber);
+                unreachable();
+            } catch (ClassCastException e) {}
+            try {
+                Number doubleNumber = new Double(1.01);
+                doubleNumber = (doubleNumber.doubleValue() != 0.1) ? null : doubleNumber;
+                h0.invoke(doubleNumber);
+                unreachable();
+            } catch (NullPointerException e) {}
+            try {
+                // Mismatched return type - float != void
+                float tmp = (float)h0.invoke(0.45f);
+                assertTrue(tmp == 0.0);
+            } catch (Exception e) { unreachable(); }
+            try {
+                h0.invoke("bam");
+                unreachable();
+            } catch (WrongMethodTypeException e) {}
+            try {
+                String s = null;
+                h0.invoke(s);
+                unreachable();
+            } catch (WrongMethodTypeException e) {}
+        }
+
+        public static void main() throws Throwable{
+            testStaticGetter();
+            testMemberGetter();
+            testStaticSetter();
+            testMemberSetter();
+            System.out.println("Passed MethodHandle.invoke() tests for accessors.");
         }
     }
 
     public static void main(String[] args) throws Throwable {
+        // FindAccessor test should be the first test class in this
+        // file to ensure class initialization test is run.
         FindAccessorTester.main();
         InvokeExactTester.main();
+        InvokeTester.main();
     }
 }
diff --git a/test/979-invoke-polymorphic-accessors/expected.txt b/test/979-invoke-polymorphic-accessors/expected.txt
deleted file mode 100644
index 2987b6c..0000000
--- a/test/979-invoke-polymorphic-accessors/expected.txt
+++ /dev/null
@@ -1 +0,0 @@
-Passed InvokeExact tests for accessors.
diff --git a/test/Android.arm_vixl.mk b/test/Android.arm_vixl.mk
index 5f969e3..5ae961a 100644
--- a/test/Android.arm_vixl.mk
+++ b/test/Android.arm_vixl.mk
@@ -16,334 +16,6 @@
 
 # Known broken tests for the ARM VIXL backend.
 TEST_ART_BROKEN_OPTIMIZING_ARM_VIXL_RUN_TESTS := \
-  002-sleep \
-  003-omnibus-opcodes \
-  004-checker-UnsafeTest18 \
-  004-InterfaceTest \
-  004-JniTest \
-  004-NativeAllocations \
-  004-ReferenceMap \
-  004-SignalTest \
-  004-StackWalk \
-  004-ThreadStress \
-  004-UnsafeTest \
-  005-annotations \
-  006-args \
-  008-exceptions \
-  009-instanceof \
-  011-array-copy \
-  012-math \
-  015-switch \
-  019-wrong-array-type \
-  020-string \
-  021-string2 \
-  022-interface \
-  023-many-interfaces \
-  024-illegal-access \
-  025-access-controller \
-  027-arithmetic \
-  028-array-write \
-  031-class-attributes \
-  032-concrete-sub \
-  035-enum \
-  036-finalizer \
-  037-inherit \
-  042-new-instance \
-  044-proxy \
-  045-reflect-array \
-  046-reflect \
-  047-returns \
-  048-reflect-v8 \
-  049-show-object \
-  050-sync-test \
-  051-thread \
-  052-verifier-fun \
-  053-wait-some \
-  054-uncaught \
-  055-enum-performance \
-  058-enum-order \
-  059-finalizer-throw \
-  061-out-of-memory \
-  062-character-encodings \
-  063-process-manager \
-  064-field-access \
-  065-mismatched-implements \
-  066-mismatched-super \
-  067-preemptive-unpark \
-  068-classloader \
-  069-field-type \
-  070-nio-buffer \
-  071-dexfile \
-  072-precise-gc \
-  074-gc-thrash \
-  075-verification-error \
-  076-boolean-put \
-  079-phantom \
-  080-oom-throw \
-  080-oom-throw-with-finalizer \
-  081-hot-exceptions \
-  082-inline-execute \
-  083-compiler-regressions \
-  086-null-super \
-  087-gc-after-link \
-  088-monitor-verification \
-  090-loop-formation \
-  091-override-package-private-method \
-  093-serialization \
-  094-pattern \
-  096-array-copy-concurrent-gc \
-  098-ddmc \
-  099-vmdebug \
-  100-reflect2 \
-  101-fibonacci \
-  102-concurrent-gc \
-  103-string-append \
-  104-growth-limit \
-  106-exceptions2 \
-  107-int-math2 \
-  108-check-cast \
-  109-suspend-check \
-  113-multidex \
-  114-ParallelGC \
-  117-nopatchoat \
-  119-noimage-patchoat \
-  120-hashcode \
-  121-modifiers \
-  122-npe \
-  123-compiler-regressions-mt \
-  123-inline-execute2 \
-  127-checker-secondarydex \
-  129-ThreadGetId \
-  131-structural-change \
-  132-daemon-locks-shutdown \
-  133-static-invoke-super \
-  134-reg-promotion \
-  135-MirandaDispatch \
-  136-daemon-jni-shutdown \
-  137-cfi \
-  138-duplicate-classes-check2 \
-  139-register-natives \
-  140-field-packing \
-  141-class-unload \
-  142-classloader2 \
-  144-static-field-sigquit \
-  145-alloc-tracking-stress \
-  146-bad-interface \
-  150-loadlibrary \
-  151-OpenFileLimit \
-  201-built-in-except-detail-messages \
-  304-method-tracing \
-  407-arrays \
-  410-floats \
-  411-optimizing-arith-mul \
-  412-new-array \
-  413-regalloc-regression \
-  414-static-fields \
-  416-optimizing-arith-not \
-  417-optimizing-arith-div \
-  421-exceptions \
-  422-instanceof \
-  422-type-conversion \
-  423-invoke-interface \
-  424-checkcast \
-  425-invoke-super \
-  426-monitor \
-  427-bounds \
-  428-optimizing-arith-rem \
-  430-live-register-slow-path \
-  431-type-propagation \
-  432-optimizing-cmp \
-  434-invoke-direct \
-  436-rem-float \
-  437-inline \
-  438-volatile \
-  439-npe \
-  441-checker-inliner \
-  442-checker-constant-folding \
-  444-checker-nce \
-  445-checker-licm \
-  447-checker-inliner3 \
-  448-multiple-returns \
-  449-checker-bce \
-  450-checker-types \
-  451-regression-add-float \
-  451-spill-splot \
-  452-multiple-returns2 \
-  453-not-byte \
-  454-get-vreg \
-  456-baseline-array-set \
-  457-regs \
-  458-checker-instruct-simplification \
-  458-long-to-fpu \
-  459-dead-phi \
-  460-multiple-returns3 \
-  461-get-reference-vreg \
-  463-checker-boolean-simplifier \
-  466-get-live-vreg \
-  467-regalloc-pair \
-  468-checker-bool-simplif-regression \
-  469-condition-materialization \
-  471-deopt-environment \
-  472-type-propagation \
-  474-checker-boolean-input \
-  475-regression-inliner-ids \
-  477-checker-bound-type \
-  478-checker-clinit-check-pruning \
-  483-dce-block \
-  484-checker-register-hints \
-  485-checker-dce-switch \
-  486-checker-must-do-null-check \
   488-checker-inline-recursive-calls \
-  490-checker-inline \
-  491-current-method \
-  492-checker-inline-invoke-interface \
-  493-checker-inline-invoke-interface \
-  494-checker-instanceof-tests \
-  495-checker-checkcast-tests \
-  496-checker-inlining-class-loader \
-  497-inlining-and-class-loader \
-  498-type-propagation \
-  499-bce-phi-array-length \
-  500-instanceof \
-  501-null-constant-dce \
-  501-regression-packed-switch \
-  503-dead-instructions \
-  504-regression-baseline-entry \
-  508-checker-disassembly \
-  510-checker-try-catch \
-  513-array-deopt \
-  515-dce-dominator \
-  517-checker-builder-fallthrough \
-  518-null-array-get \
-  519-bound-load-class \
-  520-equivalent-phi \
-  521-checker-array-set-null \
-  521-regression-integer-field-set \
-  522-checker-regression-monitor-exit \
-  523-checker-can-throw-regression \
-  525-checker-arrays-fields1 \
-  525-checker-arrays-fields2 \
-  526-checker-caller-callee-regs \
-  526-long-regalloc \
-  527-checker-array-access-split \
-  528-long-hint \
-  529-checker-unresolved \
-  529-long-split \
-  530-checker-loops1 \
-  530-checker-loops2 \
-  530-checker-loops3 \
-  530-checker-lse \
-  530-checker-regression-reftyp-final \
-  530-instanceof-checkcast \
-  532-checker-nonnull-arrayset \
-  534-checker-bce-deoptimization \
-  535-deopt-and-inlining \
-  535-regression-const-val \
-  536-checker-intrinsic-optimization \
-  536-checker-needs-access-check \
-  537-checker-inline-and-unverified \
-  537-checker-jump-over-jump \
-  538-checker-embed-constants \
-  540-checker-rtp-bug \
-  541-regression-inlined-deopt \
-  542-unresolved-access-check \
-  543-checker-dce-trycatch \
-  543-env-long-ref \
-  545-tracing-and-jit \
-  546-regression-simplify-catch \
-  550-checker-multiply-accumulate \
-  550-checker-regression-wide-store \
-  551-checker-shifter-operand \
-  551-invoke-super \
   552-checker-sharpening \
-  552-checker-primitive-typeprop \
-  552-invoke-non-existent-super \
-  553-invoke-super \
-  554-checker-rtp-checkcast \
-  555-UnsafeGetLong-regression \
-  556-invoke-super \
-  558-switch \
-  559-bce-ssa \
-  559-checker-irreducible-loop \
-  559-checker-rtp-ifnotnull \
-  560-packed-switch \
-  561-divrem \
-  561-shared-slowpaths \
-  562-bce-preheader \
-  562-no-intermediate \
-  563-checker-fakestring \
-  564-checker-irreducible-loop \
-  564-checker-negbitwise \
-  565-checker-doublenegbitwise \
-  565-checker-irreducible-loop \
-  566-polymorphic-inlining \
-  568-checker-onebit \
-  570-checker-osr \
-  570-checker-select \
-  571-irreducible-loop \
-  572-checker-array-get-regression \
-  573-checker-checkcast-regression \
-  574-irreducible-and-constant-area \
-  575-checker-isnan \
-  575-checker-string-init-alias \
-  577-checker-fp2int \
-  578-bce-visit \
-  580-checker-round \
-  580-checker-string-fact-intrinsics \
-  581-rtp \
-  582-checker-bce-length \
-  584-checker-div-bool \
-  586-checker-null-array-get \
-  587-inline-class-error \
-  588-checker-irreducib-lifetime-hole \
-  589-super-imt \
-  590-checker-arr-set-null-regression \
-  591-new-instance-string \
-  592-checker-regression-bool-input \
-  593-checker-long-2-float-regression \
-  593-checker-shift-and-simplifier \
-  594-checker-array-alias \
-  594-invoke-super \
-  594-load-string-regression \
-  595-error-class \
-  596-checker-dead-phi \
-  597-deopt-new-string \
-  599-checker-irreducible-loop \
-  600-verifier-fails \
-  601-method-access \
-  602-deoptimizeable \
-  603-checker-instanceof \
-  604-hot-static-interface \
-  605-new-string-from-bytes \
-  608-checker-unresolved-lse \
-  609-checker-inline-interface \
-  609-checker-x86-bounds-check \
-  610-arraycopy \
-  611-checker-simplify-if \
-  612-jit-dex-cache \
-  613-inlining-dex-cache \
-  614-checker-dump-constant-location \
-  615-checker-arm64-store-zero \
-  617-clinit-oome \
-  618-checker-induction \
-  621-checker-new-instance \
-  700-LoadArgRegs \
-  701-easy-div-rem \
-  702-LargeBranchOffset \
-  704-multiply-accumulate \
-  800-smali \
-  802-deoptimization \
-  960-default-smali \
-  961-default-iface-resolution-gen \
-  963-default-range-smali \
-  965-default-verify \
-  966-default-conflict \
-  967-default-ame \
-  968-default-partial-compile-gen \
-  969-iface-super \
-  971-iface-super \
-  972-default-imt-collision \
-  972-iface-super-multidex \
-  973-default-multidex \
-  974-verify-interface-super \
-  975-iface-private
+  562-checker-no-intermediate \
diff --git a/test/Android.bp b/test/Android.bp
index af70486..fe20f29 100644
--- a/test/Android.bp
+++ b/test/Android.bp
@@ -256,6 +256,7 @@
         "910-methods/methods.cc",
         "911-get-stack-trace/stack_trace.cc",
         "912-classes/classes.cc",
+        "913-heaps/heaps.cc",
     ],
     shared_libs: [
         "libbase",
@@ -265,10 +266,7 @@
 art_cc_test_library {
     name: "libtiagent",
     defaults: ["libtiagent-defaults"],
-    shared_libs: [
-        "libart",
-        "libopenjdkjvmti",
-    ],
+    shared_libs: ["libart"],
 }
 
 art_cc_test_library {
@@ -277,10 +275,7 @@
         "libtiagent-defaults",
         "art_debug_defaults",
     ],
-    shared_libs: [
-        "libartd",
-        "libopenjdkjvmtid",
-    ],
+    shared_libs: ["libartd"],
 }
 
 cc_defaults {
diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk
index 2251b7e..96b984d 100644
--- a/test/Android.run-test.mk
+++ b/test/Android.run-test.mk
@@ -279,6 +279,7 @@
   910-methods \
   911-get-stack-trace \
   912-classes \
+  913-heaps \
 
 ifneq (,$(filter target,$(TARGET_TYPES)))
   ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,target,$(RUN_TYPES),$(PREBUILD_TYPES), \
@@ -374,15 +375,16 @@
 # * 137-cfi needs to unwind a second forked process. We're using a primitive sleep to wait till we
 #   hope the second process got into the expected state. The slowness of gcstress makes this bad.
 # * 908-gc-start-finish expects GCs only to be run at clear points. The reduced heap size makes
-#   this non-deterministic.
+#   this non-deterministic. Same for 913.
 # * 961-default-iface-resolution-gen and 964-default-iface-init-genare very long tests that often
 #   will take more than the timeout to run when gcstress is enabled. This is because gcstress
 #   slows down allocations significantly which these tests do a lot.
 TEST_ART_BROKEN_GCSTRESS_RUN_TESTS := \
   137-cfi \
   908-gc-start-finish \
+  913-heaps \
   961-default-iface-resolution-gen \
-  964-default-iface-init-gen \  
+  964-default-iface-init-gen
 
 ifneq (,$(filter gcstress,$(GC_TYPES)))
   ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
@@ -501,8 +503,10 @@
 # also uses Generic JNI instead of the JNI compiler.
 # Test 906 iterates the heap filtering with different options. No instances should be created
 # between those runs to be able to have precise checks.
+# Test 902 hits races with the JIT compiler. b/32821077
 TEST_ART_BROKEN_JIT_RUN_TESTS := \
   137-cfi \
+  902-hello-transformation \
   904-object-allocation \
   906-iterate-heap \
 
@@ -604,13 +608,7 @@
 TEST_ART_BROKEN_INTERPRETER_READ_BARRIER_RUN_TESTS :=
 
 # Tests that should fail in the read barrier configuration with the Optimizing compiler (AOT).
-# 484: Baker's fast path based read barrier compiler instrumentation generates code containing
-#      more parallel moves on x86, thus some Checker assertions may fail.
-# 527: On ARM64 and ARM, the read barrier instrumentation does not support the HIntermediateAddress
-#      instruction yet (b/26601270).
-TEST_ART_BROKEN_OPTIMIZING_READ_BARRIER_RUN_TESTS := \
-  484-checker-register-hints \
-  527-checker-array-access-split
+TEST_ART_BROKEN_OPTIMIZING_READ_BARRIER_RUN_TESTS :=
 
 # Tests that should fail in the read barrier configuration with JIT (Optimizing compiler).
 TEST_ART_BROKEN_JIT_READ_BARRIER_RUN_TESTS :=
@@ -790,6 +788,10 @@
 TEST_ART_TARGET_SYNC_DEPS += $(OUT_DIR)/$(ART_TEST_LIST_device_$(TARGET_2ND_ARCH)_libnativebridgetest)
 endif
 
+# Also need libopenjdkjvmti.
+TEST_ART_TARGET_SYNC_DEPS += libopenjdkjvmti
+TEST_ART_TARGET_SYNC_DEPS += libopenjdkjvmtid
+
 # All tests require the host executables. The tests also depend on the core images, but on
 # specific version depending on the compiler.
 ART_TEST_HOST_RUN_TEST_DEPENDENCIES := \
@@ -803,7 +805,9 @@
   $(OUT_DIR)/$(ART_TEST_LIST_host_$(ART_HOST_ARCH)_libnativebridgetest) \
   $(ART_HOST_OUT_SHARED_LIBRARIES)/libjavacore$(ART_HOST_SHLIB_EXTENSION) \
   $(ART_HOST_OUT_SHARED_LIBRARIES)/libopenjdk$(ART_HOST_SHLIB_EXTENSION) \
-  $(ART_HOST_OUT_SHARED_LIBRARIES)/libopenjdkd$(ART_HOST_SHLIB_EXTENSION)
+  $(ART_HOST_OUT_SHARED_LIBRARIES)/libopenjdkd$(ART_HOST_SHLIB_EXTENSION) \
+  $(ART_HOST_OUT_SHARED_LIBRARIES)/libopenjdkjvmti$(ART_HOST_SHLIB_EXTENSION) \
+  $(ART_HOST_OUT_SHARED_LIBRARIES)/libopenjdkjvmtid$(ART_HOST_SHLIB_EXTENSION) \
 
 ifneq ($(HOST_PREFER_32_BIT),true)
 ART_TEST_HOST_RUN_TEST_DEPENDENCIES += \
@@ -816,7 +820,10 @@
   $(OUT_DIR)/$(ART_TEST_LIST_host_$(2ND_ART_HOST_ARCH)_libnativebridgetest) \
   $(2ND_ART_HOST_OUT_SHARED_LIBRARIES)/libjavacore$(ART_HOST_SHLIB_EXTENSION) \
   $(2ND_ART_HOST_OUT_SHARED_LIBRARIES)/libopenjdk$(ART_HOST_SHLIB_EXTENSION) \
-  $(2ND_ART_HOST_OUT_SHARED_LIBRARIES)/libopenjdkd$(ART_HOST_SHLIB_EXTENSION)
+  $(2ND_ART_HOST_OUT_SHARED_LIBRARIES)/libopenjdkd$(ART_HOST_SHLIB_EXTENSION) \
+  $(2ND_ART_HOST_OUT_SHARED_LIBRARIES)/libopenjdkjvmti$(ART_HOST_SHLIB_EXTENSION) \
+  $(2ND_ART_HOST_OUT_SHARED_LIBRARIES)/libopenjdkjvmtid$(ART_HOST_SHLIB_EXTENSION) \
+
 endif
 
 # Create a rule to build and run a tests following the form:
diff --git a/test/562-no-intermediate/src/Main.java b/test/DexToDexDecompiler/Main.java
similarity index 67%
copy from test/562-no-intermediate/src/Main.java
copy to test/DexToDexDecompiler/Main.java
index 3b74d6f..8f5075a 100644
--- a/test/562-no-intermediate/src/Main.java
+++ b/test/DexToDexDecompiler/Main.java
@@ -15,13 +15,20 @@
  */
 
 public class Main {
-
-  /// CHECK-START-ARM64: int Main.main(String[]) register_allocator (after)
-  /// CHECK-NOT: IntermediateAddress
-  public static void main(String[] args) {
-    array[index] += Math.cos(42);
+  Main() {
+    // Will be quickened with RETURN_VOID_NO_BARRIER.
   }
 
-  static int index = 0;
-  static double[] array = new double[2];
+  public static void main() {
+    Main m = new Main();
+    Object o = m;
+    // The call and field accesses will be quickened.
+    m.foo(m.a);
+
+    // The checkcast will be quickened.
+    m.foo(((Main)o).a);
+  }
+
+  int a;
+  void foo(int a) {}
 }
diff --git a/test/MyClassNatives/MyClassNatives.java b/test/MyClassNatives/MyClassNatives.java
index 3cb1f23..c601e3e 100644
--- a/test/MyClassNatives/MyClassNatives.java
+++ b/test/MyClassNatives/MyClassNatives.java
@@ -139,8 +139,8 @@
         float f9, int i10, float f10);
 
     // Normal native
-    native static void stackArgsSignExtendedMips64(int i1, int i2, int i3, int i4, int i5, int i6,
-        int i7, int i8);
+    native static long getStackArgSignExtendedMips64(int i1, int i2, int i3, int i4, int i5, int i6,
+        int stack_arg);
 
     // Normal native
     static native double logD(double d);
@@ -273,8 +273,8 @@
         float f9, int i10, float f10);
 
     @FastNative
-    native static void stackArgsSignExtendedMips64_Fast(int i1, int i2, int i3, int i4, int i5, int i6,
-        int i7, int i8);
+    native static long getStackArgSignExtendedMips64_Fast(int i1, int i2, int i3, int i4, int i5, int i6,
+        int stack_arg);
 
     @FastNative
     static native double logD_Fast(double d);
@@ -316,10 +316,6 @@
         float f9, int i10, float f10);
 
     @CriticalNative
-    native static void stackArgsSignExtendedMips64_Critical(int i1, int i2, int i3, int i4, int i5, int i6,
-        int i7, int i8);
-
-    @CriticalNative
     static native double logD_Critical(double d);
     @CriticalNative
     static native float logF_Critical(float f);
diff --git a/test/common/runtime_state.cc b/test/common/runtime_state.cc
index 4248148..9cfa324 100644
--- a/test/common/runtime_state.cc
+++ b/test/common/runtime_state.cc
@@ -128,9 +128,10 @@
     return;
   }
 
+  Thread* self = Thread::Current();
   ArtMethod* method = nullptr;
   {
-    ScopedObjectAccess soa(Thread::Current());
+    ScopedObjectAccess soa(self);
 
     ScopedUtfChars chars(env, method_name);
     CHECK(chars.c_str() != nullptr);
@@ -147,13 +148,26 @@
     } else {
       // Sleep to yield to the compiler thread.
       usleep(1000);
-      ScopedObjectAccess soa(Thread::Current());
+      ScopedObjectAccess soa(self);
       // Make sure there is a profiling info, required by the compiler.
-      ProfilingInfo::Create(soa.Self(), method, /* retry_allocation */ true);
+      ProfilingInfo::Create(self, method, /* retry_allocation */ true);
       // Will either ensure it's compiled or do the compilation itself.
-      jit->CompileMethod(method, soa.Self(), /* osr */ false);
+      jit->CompileMethod(method, self, /* osr */ false);
     }
   }
 }
 
+extern "C" JNIEXPORT jboolean JNICALL Java_Main_hasSingleImplementation(JNIEnv* env,
+                                                                        jclass,
+                                                                        jclass cls,
+                                                                        jstring method_name) {
+  ArtMethod* method = nullptr;
+  ScopedObjectAccess soa(Thread::Current());
+  ScopedUtfChars chars(env, method_name);
+  CHECK(chars.c_str() != nullptr);
+  method = soa.Decode<mirror::Class>(cls)->FindDeclaredVirtualMethodByName(
+      chars.c_str(), kRuntimePointerSize);
+  return method->HasSingleImplementation();
+}
+
 }  // namespace art
diff --git a/test/etc/run-test-jar b/test/etc/run-test-jar
index 3535f32..bb3a3ad 100755
--- a/test/etc/run-test-jar
+++ b/test/etc/run-test-jar
@@ -58,6 +58,7 @@
 ARGS=""
 EXTERNAL_LOG_TAGS="n" # if y respect externally set ANDROID_LOG_TAGS.
 DRY_RUN="n" # if y prepare to run the test but don't run it.
+TEST_VDEX="n"
 
 while true; do
     if [ "x$1" = "x--quiet" ]; then
@@ -148,7 +149,7 @@
         SECONDARY_DEX=":$DEX_LOCATION/$TEST_NAME-ex.jar"
         # Enable cfg-append to make sure we get the dump for both dex files.
         # (otherwise the runtime compilation of the secondary dex will overwrite
-        # the dump of the first one)
+        # the dump of the first one).
         FLAGS="${FLAGS} -Xcompiler-option --dump-cfg-append"
         COMPILE_FLAGS="${COMPILE_FLAGS} --dump-cfg-append"
         shift
@@ -243,6 +244,9 @@
     elif [ "x$1" = "x--dry-run" ]; then
         DRY_RUN="y"
         shift
+    elif [ "x$1" = "x--vdex" ]; then
+        TEST_VDEX="y"
+        shift
     elif expr "x$1" : "x--" >/dev/null 2>&1; then
         echo "unknown $0 option: $1" 1>&2
         exit 1
@@ -397,9 +401,30 @@
 fi
 
 if [ "$HOST" = "n" ]; then
-  ISA=$(adb shell ls -F /data/dalvik-cache | grep -Ewo "${ARCHITECTURES_PATTERN}")
+  # Need to be root to query /data/dalvik-cache
+  adb root > /dev/null
+  adb wait-for-device
+  ISA=
+  ISA_adb_invocation=
+  ISA_outcome=
+  # We iterate a few times to workaround an adb issue. b/32655576
+  for i in {1..10}; do
+    ISA_adb_invocation=$(adb shell ls -F /data/dalvik-cache)
+    ISA_outcome=$?
+    ISA=$(echo $ISA_adb_invocation | grep -Ewo "${ARCHITECTURES_PATTERN}")
+    if [ x"$ISA" != "x" ]; then
+      break;
+    fi
+  done
   if [ x"$ISA" = "x" ]; then
     echo "Unable to determine architecture"
+    # Print a few things for helping diagnosing the problem.
+    echo "adb invocation output: $ISA_adb_invocation"
+    echo "adb invocation outcome: $ISA_outcome"
+    echo $(adb shell ls -F /data/dalvik-cache)
+    echo $(adb shell ls /data/dalvik-cache)
+    echo ${ARCHITECTURES_PATTERN}
+    echo $(adb shell ls -F /data/dalvik-cache | grep -Ewo "${ARCHITECTURES_PATTERN}")
     exit 1
   fi
 fi
@@ -423,6 +448,7 @@
 fi
 
 dex2oat_cmdline="true"
+vdex_cmdline="true"
 mkdir_locations="${DEX_LOCATION}/dalvik-cache/$ISA"
 strip_cmdline="true"
 
@@ -452,6 +478,9 @@
     # Use -k 1m to SIGKILL it a minute later if it hasn't ended.
     dex2oat_cmdline="timeout -k 1m -s SIGRTMIN+2 1m ${dex2oat_cmdline}"
   fi
+  if [ "$TEST_VDEX" = "y" ]; then
+    vdex_cmdline="${dex2oat_cmdline} --input-vdex=$DEX_LOCATION/oat/$ISA/$TEST_NAME.vdex"
+  fi
 fi
 
 if [ "$STRIP_DEX" = "y" ]; then
@@ -492,6 +521,7 @@
 # Remove whitespace.
 dex2oat_cmdline=$(echo $dex2oat_cmdline)
 dalvikvm_cmdline=$(echo $dalvikvm_cmdline)
+vdex_cmdline=$(echo $vdex_cmdline)
 
 if [ "$HOST" = "n" ]; then
     adb root > /dev/null
@@ -532,6 +562,7 @@
              export LD_LIBRARY_PATH=$LD_LIBRARY_PATH && \
              export PATH=$ANDROID_ROOT/bin:$PATH && \
              $dex2oat_cmdline && \
+             $vdex_cmdline && \
              $strip_cmdline && \
              $dalvikvm_cmdline"
 
@@ -605,7 +636,7 @@
     fi
 
     if [ "$DEV_MODE" = "y" ]; then
-      echo "mkdir -p ${mkdir_locations} && $dex2oat_cmdline && $strip_cmdline && $cmdline"
+      echo "mkdir -p ${mkdir_locations} && $dex2oat_cmdline && $vdex_cmdline && $strip_cmdline && $cmdline"
     fi
 
     cd $ANDROID_BUILD_TOP
@@ -613,6 +644,7 @@
     rm -rf ${DEX_LOCATION}/dalvik-cache/
     mkdir -p ${mkdir_locations} || exit 1
     $dex2oat_cmdline || { echo "Dex2oat failed." >&2 ; exit 2; }
+    $vdex_cmdline || { echo "Dex2oat failed." >&2 ; exit 2; }
     $strip_cmdline || { echo "Strip failed." >&2 ; exit 3; }
 
     # For running, we must turn off logging when dex2oat or patchoat are missing. Otherwise we use
diff --git a/test/run-test b/test/run-test
index 7a4afaf..ea9622a 100755
--- a/test/run-test
+++ b/test/run-test
@@ -351,6 +351,9 @@
     elif [ "x$1" = "x--bisection-search" ]; then
         bisection_search="yes"
         shift
+    elif [ "x$1" = "x--vdex" ]; then
+        run_args="${run_args} --vdex"
+        shift
     elif expr "x$1" : "x--" >/dev/null 2>&1; then
         echo "unknown $0 option: $1" 1>&2
         usage="yes"
@@ -640,6 +643,7 @@
         echo "    --pic-test            Compile the test code position independent."
         echo "    --quiet               Don't print anything except failure messages"
         echo "    --bisection-search    Perform bisection bug search."
+        echo "    --vdex                Test using vdex as in input to dex2oat. Only works with --prebuild."
     ) 1>&2  # Direct to stderr so usage is not printed if --quiet is set.
     exit 1
 fi
@@ -758,8 +762,8 @@
 if [ "$run_checker" = "yes" -a "$target_mode" = "yes" ]; then
   # We will need to `adb pull` the .cfg output from the target onto the host to
   # run checker on it. This file can be big.
-  build_file_size_limit=24576
-  run_file_size_limit=24576
+  build_file_size_limit=32768
+  run_file_size_limit=32768
 fi
 if [ ${USE_JACK} = "false" ]; then
   # Set ulimit if we build with dx only, Jack can generate big temp files.
diff --git a/test/ti-agent/common_helper.h b/test/ti-agent/common_helper.h
new file mode 100644
index 0000000..84997f3
--- /dev/null
+++ b/test/ti-agent/common_helper.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_TEST_TI_AGENT_COMMON_HELPER_H_
+#define ART_TEST_TI_AGENT_COMMON_HELPER_H_
+
+#include "jni.h"
+#include "ScopedLocalRef.h"
+
+namespace art {
+
+template <typename T>
+static jobjectArray CreateObjectArray(JNIEnv* env,
+                                      jint length,
+                                      const char* component_type_descriptor,
+                                      T src) {
+  if (length < 0) {
+    return nullptr;
+  }
+
+  ScopedLocalRef<jclass> obj_class(env, env->FindClass(component_type_descriptor));
+  if (obj_class.get() == nullptr) {
+    return nullptr;
+  }
+
+  ScopedLocalRef<jobjectArray> ret(env, env->NewObjectArray(length, obj_class.get(), nullptr));
+  if (ret.get() == nullptr) {
+    return nullptr;
+  }
+
+  for (jint i = 0; i < length; ++i) {
+    jobject element = src(i);
+    env->SetObjectArrayElement(ret.get(), static_cast<jint>(i), element);
+    env->DeleteLocalRef(element);
+    if (env->ExceptionCheck()) {
+      return nullptr;
+    }
+  }
+
+  return ret.release();
+}
+
+static void SetAllCapabilities(jvmtiEnv* env) {
+  jvmtiCapabilities caps;
+  env->GetPotentialCapabilities(&caps);
+  env->AddCapabilities(&caps);
+}
+
+}  // namespace art
+
+#endif  // ART_TEST_TI_AGENT_COMMON_HELPER_H_
diff --git a/test/ti-agent/common_load.cc b/test/ti-agent/common_load.cc
index 4db953c..a959482 100644
--- a/test/ti-agent/common_load.cc
+++ b/test/ti-agent/common_load.cc
@@ -36,6 +36,7 @@
 #include "910-methods/methods.h"
 #include "911-get-stack-trace/stack_trace.h"
 #include "912-classes/classes.h"
+#include "913-heaps/heaps.h"
 
 namespace art {
 
@@ -64,6 +65,7 @@
   { "910-methods", Test910Methods::OnLoad, nullptr },
   { "911-get-stack-trace", Test911GetStackTrace::OnLoad, nullptr },
   { "912-classes", Test912Classes::OnLoad, nullptr },
+  { "913-heaps", Test913Heaps::OnLoad, nullptr },
 };
 
 static AgentLib* FindAgent(char* name) {
diff --git a/tools/ahat/Android.mk b/tools/ahat/Android.mk
index 27c2054..493eafb 100644
--- a/tools/ahat/Android.mk
+++ b/tools/ahat/Android.mk
@@ -48,7 +48,7 @@
 include $(CLEAR_VARS)
 LOCAL_SRC_FILES := $(call all-java-files-under, test)
 LOCAL_JAR_MANIFEST := test/manifest.txt
-LOCAL_STATIC_JAVA_LIBRARIES := ahat junit
+LOCAL_STATIC_JAVA_LIBRARIES := ahat junit-host
 LOCAL_IS_HOST_MODULE := true
 LOCAL_MODULE_TAGS := tests
 LOCAL_MODULE := ahat-tests
diff --git a/tools/art b/tools/art
index 1394a46..91d6e27 100644
--- a/tools/art
+++ b/tools/art
@@ -30,8 +30,9 @@
 }
 
 function find_libdir() {
+  # Get the actual file, $DALVIKVM may be a symbolic link.
   # Use realpath instead of readlink because Android does not have a readlink.
-  if [ "$(realpath "$ANDROID_ROOT/bin/$DALVIKVM")" = "$(realpath "$ANDROID_ROOT/bin/dalvikvm64")" ]; then
+  if [[ "$(realpath "$ANDROID_ROOT/bin/$DALVIKVM")" == *dalvikvm64 ]]; then
     echo "lib64"
   else
     echo "lib"
diff --git a/tools/buildbot-build.sh b/tools/buildbot-build.sh
index 12e0338..2d26b48 100755
--- a/tools/buildbot-build.sh
+++ b/tools/buildbot-build.sh
@@ -19,7 +19,17 @@
   exit 1
 fi
 
-out_dir=${OUT_DIR-out}
+# Logic for setting out_dir from build/make/core/envsetup.mk:
+if [[ -z $OUT_DIR ]]; then
+  if [[ -z $OUT_DIR_COMMON_BASE ]]; then
+    out_dir=out
+  else
+    out_dir=${OUT_DIR_COMMON_BASE}/${PWD##*/}
+  fi
+else
+  out_dir=${OUT_DIR}
+fi
+
 java_libraries_dir=${out_dir}/target/common/obj/JAVA_LIBRARIES
 common_targets="vogar core-tests apache-harmony-jdwp-tests-hostdex jsr166-tests mockito-target ${out_dir}/host/linux-x86/bin/jack"
 mode="target"
diff --git a/tools/cpp-define-generator/constant_class.def b/tools/cpp-define-generator/constant_class.def
index 58372f9..f46cd33 100644
--- a/tools/cpp-define-generator/constant_class.def
+++ b/tools/cpp-define-generator/constant_class.def
@@ -25,6 +25,7 @@
 
 DEFINE_FLAG_OFFSET(MIRROR_CLASS, STATUS_INITIALIZED,       art::mirror::Class::kStatusInitialized)
 DEFINE_FLAG_OFFSET(ACCESS_FLAGS, CLASS_IS_FINALIZABLE,     art::kAccClassIsFinalizable)
+DEFINE_FLAG_OFFSET(ACCESS_FLAGS, CLASS_IS_INTERFACE,       art::kAccInterface)
 // TODO: We should really have a BitPosition which also checks it's a power of 2.
 DEFINE_FLAG_OFFSET(ACCESS_FLAGS, CLASS_IS_FINALIZABLE_BIT, art::MostSignificantBit(art::kAccClassIsFinalizable))
 
diff --git a/tools/cpp-define-generator/constant_lockword.def b/tools/cpp-define-generator/constant_lockword.def
index 67ed5b5..08d5885 100644
--- a/tools/cpp-define-generator/constant_lockword.def
+++ b/tools/cpp-define-generator/constant_lockword.def
@@ -30,6 +30,10 @@
 DEFINE_LOCK_WORD_EXPR(READ_BARRIER_STATE_MASK_TOGGLED, uint32_t, kReadBarrierStateMaskShiftedToggled)
 DEFINE_LOCK_WORD_EXPR(THIN_LOCK_COUNT_ONE,       int32_t,  kThinLockCountOne)
 
+DEFINE_LOCK_WORD_EXPR(STATE_FORWARDING_ADDRESS, uint32_t, kStateForwardingAddress)
+DEFINE_LOCK_WORD_EXPR(STATE_FORWARDING_ADDRESS_OVERFLOW, uint32_t, kStateForwardingAddressOverflow)
+DEFINE_LOCK_WORD_EXPR(STATE_FORWARDING_ADDRESS_SHIFT, uint32_t, kForwardingAddressShift)
+
 DEFINE_LOCK_WORD_EXPR(GC_STATE_MASK_SHIFTED,   uint32_t,  kGCStateMaskShifted)
 DEFINE_LOCK_WORD_EXPR(GC_STATE_MASK_SHIFTED_TOGGLED, uint32_t, kGCStateMaskShiftedToggled)
 DEFINE_LOCK_WORD_EXPR(GC_STATE_SHIFT,   int32_t,  kGCStateShift)
diff --git a/tools/cpp-define-generator/constant_thread.def b/tools/cpp-define-generator/constant_thread.def
index af5ca21..1364b55 100644
--- a/tools/cpp-define-generator/constant_thread.def
+++ b/tools/cpp-define-generator/constant_thread.def
@@ -25,5 +25,7 @@
 
 DEFINE_THREAD_CONSTANT(SUSPEND_REQUEST,    int32_t, art::kSuspendRequest)
 DEFINE_THREAD_CONSTANT(CHECKPOINT_REQUEST, int32_t, art::kCheckpointRequest)
+DEFINE_THREAD_CONSTANT(EMPTY_CHECKPOINT_REQUEST, int32_t, art::kEmptyCheckpointRequest)
+DEFINE_THREAD_CONSTANT(SUSPEND_OR_CHECKPOINT_REQUEST,  int32_t, art::kSuspendRequest | art::kCheckpointRequest | art::kEmptyCheckpointRequest)
 
 #undef DEFINE_THREAD_CONSTANT
diff --git a/tools/cpp-define-generator/generate-asm-support b/tools/cpp-define-generator/generate-asm-support
index f95648b..fcdf72f 100755
--- a/tools/cpp-define-generator/generate-asm-support
+++ b/tools/cpp-define-generator/generate-asm-support
@@ -5,4 +5,4 @@
 
 [[ -z ${ANDROID_BUILD_TOP+x} ]] && (echo "Run source build/envsetup.sh first" >&2 && exit 1)
 
-cpp-define-generator-datad > ${ANDROID_BUILD_TOP}/art/runtime/generated/asm_support_gen.h
+cpp-define-generator-data > ${ANDROID_BUILD_TOP}/art/runtime/generated/asm_support_gen.h
diff --git a/tools/cpp-define-generator/main.cc b/tools/cpp-define-generator/main.cc
index a1b463a..fc99f8a 100644
--- a/tools/cpp-define-generator/main.cc
+++ b/tools/cpp-define-generator/main.cc
@@ -59,12 +59,12 @@
 }
 
 template <typename T>
-void cpp_define(std::string name, T value) {
+void cpp_define(const std::string& name, T value) {
   std::cout << "#define " << name << " " << pretty_format(value) << std::endl;
 }
 
 template <typename T>
-void emit_check_eq(T value, std::string expr) {
+void emit_check_eq(T value, const std::string& expr) {
   std::cout << "DEFINE_CHECK_EQ(" << value << ", (" << expr << "))" << std::endl;
 }
 
diff --git a/tools/dexfuzz/README b/tools/dexfuzz/README
index a0658ec..c1cdf1e 100644
--- a/tools/dexfuzz/README
+++ b/tools/dexfuzz/README
@@ -4,7 +4,7 @@
 DexFuzz is primarily a tool for fuzzing DEX files. Fuzzing is the introduction of
 subtle changes ("mutations") to a file to produce a new test case. These test cases
 can be used to test the various modes of execution available to ART (Interpreter,
-Quick compiler, Optimizing compiler) to check for bugs in these modes of execution.
+Optimizing compiler) to check for bugs in these modes of execution.
 This is done by differential testing - each test file is executed with each mode of
 execution, and any differences between the resulting outputs may be an indication of
 a bug in one of the modes.
@@ -53,17 +53,16 @@
 
 And also at least two of the following backends:
   --interpreter
-  --quick
   --optimizing
 
 Note that if you wanted to test both ARM and ARM64 on an ARM64 device, you can use
 --allarm. Also in this case only one backend is needed, if i.e., you wanted to test
-ARM Quick Backend vs. ARM64 Quick Backend.
+ARM Optimizing Backend vs. ARM64 Optimizing Backend.
 
 Some legal examples:
-  --arm --quick --optimizing
-  --x86 --quick --optimizing --interpreter
-  --allarm --quick
+  --arm --optimizing --interpreter
+  --x86 --optimizing --interpreter
+  --allarm --optimizing
 
 Add in --device=<device name, e.g. device:generic> if you want to specify a device.
 Add in --execute-dir=<dir on device> if you want to specify an execution directory.
@@ -98,7 +97,6 @@
              those occurrences.
 Timed Out  - mutated files that timed out for one or more backends.
              Current timeouts are:
-               Quick - 5 seconds
                Optimizing - 5 seconds
                Intepreter - 30 seconds
               (use --short-timeouts to set all backends to 2 seconds.)
diff --git a/tools/dexfuzz/src/dexfuzz/Options.java b/tools/dexfuzz/src/dexfuzz/Options.java
index b442b22..99e03e8 100644
--- a/tools/dexfuzz/src/dexfuzz/Options.java
+++ b/tools/dexfuzz/src/dexfuzz/Options.java
@@ -51,6 +51,7 @@
   public static boolean usingSpecificDevice = false;
   public static int repeat = 1;
   public static String executeDirectory = "/data/art-test";
+  public static String androidRoot = "";
   public static String dumpMutationsFile = "mutations.dump";
   public static String loadMutationsFile = "mutations.dump";
   public static String reportLogFile = "report.log";
@@ -61,7 +62,6 @@
   public static boolean executeOnHost;
   public static boolean noBootImage;
   public static boolean useInterpreter;
-  public static boolean useQuick;
   public static boolean useOptimizing;
   public static boolean useArchArm;
   public static boolean useArchArm64;
@@ -96,12 +96,13 @@
     Log.always("                           the argument given to adb -s. Default execution mode.");
     Log.always("    --execute-dir=<dir>  : Push tests to this directory to execute them.");
     Log.always("                           (Default: /data/art-test)");
+    Log.always("    --android-root=<dir> : Set path where dalvikvm should look for binaries.");
+    Log.always("                           Use this when pushing binaries to a custom location.");
     Log.always("    --no-boot-image      : Use this flag when boot.art is not available.");
     Log.always("    --skip-host-verify   : When executing, skip host-verification stage");
     Log.always("    --execute-class=<c>  : When executing, execute this class (default: Main)");
     Log.always("");
     Log.always("    --interpreter        : Include the Interpreter in comparisons");
-    Log.always("    --quick              : Include the Quick Compiler in comparisons");
     Log.always("    --optimizing         : Include the Optimizing Compiler in comparisons");
     Log.always("");
     Log.always("    --arm                : Include ARM backends in comparisons");
@@ -160,8 +161,6 @@
       skipHostVerify = true;
     } else if (flag.equals("interpreter")) {
       useInterpreter = true;
-    } else if (flag.equals("quick")) {
-      useQuick = true;
     } else if (flag.equals("optimizing")) {
       useOptimizing = true;
     } else if (flag.equals("arm")) {
@@ -261,6 +260,8 @@
       usingSpecificDevice = true;
     } else if (key.equals("execute-dir")) {
       executeDirectory = value;
+    } else if (key.equals("android-root")) {
+      androidRoot = value;
     } else {
       Log.error("Unrecognised key: --" + key);
       usage();
@@ -423,18 +424,15 @@
       if (useInterpreter) {
         backends++;
       }
-      if (useQuick) {
-        backends++;
-      }
       if (useOptimizing) {
         backends++;
       }
       if (useArchArm && useArchArm64) {
-        // Could just be comparing quick-ARM versus quick-ARM64?
+        // Could just be comparing optimizing-ARM versus optimizing-ARM64?
         backends++;
       }
       if (backends < 2) {
-        Log.error("Not enough backends specified! Try --quick --interpreter!");
+        Log.error("Not enough backends specified! Try --optimizing --interpreter!");
         return false;
       }
     }
diff --git a/tools/dexfuzz/src/dexfuzz/executors/Arm64OptimizingBackendExecutor.java b/tools/dexfuzz/src/dexfuzz/executors/Arm64OptimizingBackendExecutor.java
index 72e36e8..84ed4c4 100644
--- a/tools/dexfuzz/src/dexfuzz/executors/Arm64OptimizingBackendExecutor.java
+++ b/tools/dexfuzz/src/dexfuzz/executors/Arm64OptimizingBackendExecutor.java
@@ -29,6 +29,9 @@
   protected String constructCommand(String programName) {
     StringBuilder commandBuilder = new StringBuilder();
     commandBuilder.append("dalvikvm64 -Xcompiler-option --compiler-backend=Optimizing ");
+    // The -Xno-dex-file-fallback option ensures that the execution does not default to
+    // interpreter if compilations fails.
+    commandBuilder.append("-Xno-dex-file-fallback ");
     if (device.noBootImageAvailable()) {
       commandBuilder.append("-Ximage:/data/art-test/core.art -Xnorelocate ");
     }
diff --git a/tools/dexfuzz/src/dexfuzz/executors/Arm64QuickBackendExecutor.java b/tools/dexfuzz/src/dexfuzz/executors/Arm64QuickBackendExecutor.java
deleted file mode 100644
index d9228ed..0000000
--- a/tools/dexfuzz/src/dexfuzz/executors/Arm64QuickBackendExecutor.java
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (C) 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package dexfuzz.executors;
-
-import dexfuzz.listeners.BaseListener;
-
-public class Arm64QuickBackendExecutor extends Executor {
-
-  public Arm64QuickBackendExecutor(BaseListener listener, Device device) {
-    super("ARM64 Quick Backend", 5, listener, Architecture.ARM64, device,
-        /*needsCleanCodeCache*/ true, /*isBisectable*/ false);
-  }
-
-  @Override
-  protected String constructCommand(String programName) {
-    StringBuilder commandBuilder = new StringBuilder();
-    commandBuilder.append("dalvikvm64 -Xcompiler-option --compiler-backend=Quick ");
-    if (device.noBootImageAvailable()) {
-      commandBuilder.append("-Ximage:/data/art-test/core.art -Xnorelocate ");
-    }
-    commandBuilder.append("-cp ").append(testLocation).append("/").append(programName).append(" ");
-    commandBuilder.append(executeClass);
-    return commandBuilder.toString();
-  }
-}
diff --git a/tools/dexfuzz/src/dexfuzz/executors/ArmOptimizingBackendExecutor.java b/tools/dexfuzz/src/dexfuzz/executors/ArmOptimizingBackendExecutor.java
index ded8cf9..26a5eea 100644
--- a/tools/dexfuzz/src/dexfuzz/executors/ArmOptimizingBackendExecutor.java
+++ b/tools/dexfuzz/src/dexfuzz/executors/ArmOptimizingBackendExecutor.java
@@ -29,6 +29,9 @@
   protected String constructCommand(String programName) {
     StringBuilder commandBuilder = new StringBuilder();
     commandBuilder.append("dalvikvm32 -Xcompiler-option --compiler-backend=Optimizing ");
+    // The -Xno-dex-file-fallback option ensures that the execution does not default to
+    // interpreter if compilations fails.
+    commandBuilder.append("-Xno-dex-file-fallback ");
     if (device.noBootImageAvailable()) {
       commandBuilder.append("-Ximage:/data/art-test/core.art -Xnorelocate ");
     }
diff --git a/tools/dexfuzz/src/dexfuzz/executors/ArmQuickBackendExecutor.java b/tools/dexfuzz/src/dexfuzz/executors/ArmQuickBackendExecutor.java
deleted file mode 100644
index 0eb35f7..0000000
--- a/tools/dexfuzz/src/dexfuzz/executors/ArmQuickBackendExecutor.java
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (C) 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package dexfuzz.executors;
-
-import dexfuzz.listeners.BaseListener;
-
-public class ArmQuickBackendExecutor extends Executor {
-
-  public ArmQuickBackendExecutor(BaseListener listener, Device device) {
-    super("ARM Quick Backend", 5, listener, Architecture.ARM, device,
-        /*needsCleanCodeCache*/ true, /*isBisectable*/ false);
-  }
-
-  @Override
-  protected String constructCommand(String programName) {
-    StringBuilder commandBuilder = new StringBuilder();
-    commandBuilder.append("dalvikvm32 -Xcompiler-option --compiler-backend=Quick ");
-    if (device.noBootImageAvailable()) {
-      commandBuilder.append("-Ximage:/data/art-test/core.art -Xnorelocate ");
-    }
-    commandBuilder.append("-cp ").append(testLocation).append("/").append(programName).append(" ");
-    commandBuilder.append(executeClass);
-    return commandBuilder.toString();
-  }
-}
diff --git a/tools/dexfuzz/src/dexfuzz/executors/Executor.java b/tools/dexfuzz/src/dexfuzz/executors/Executor.java
index c62a3ad..2bcf3a1 100644
--- a/tools/dexfuzz/src/dexfuzz/executors/Executor.java
+++ b/tools/dexfuzz/src/dexfuzz/executors/Executor.java
@@ -177,7 +177,15 @@
    * Executes runtime.
    */
   public void execute(String programName) {
-    executionResult = executeCommandWithTimeout(constructCommand(programName), true);
+    String command = "";
+    String androidRoot = Options.androidRoot.trim();
+    if (androidRoot.length() != 0) {
+      command = "PATH=" + androidRoot + "/bin ";
+      command += "ANDROID_ROOT=" + androidRoot + " ";
+      command += "LD_LIBRARY_PATH="+ androidRoot + "/lib:" + androidRoot + "/lib64 ";
+    }
+    command += constructCommand(programName);
+    executionResult = executeCommandWithTimeout(command, true);
   }
 
   /**
diff --git a/tools/dexfuzz/src/dexfuzz/executors/Mips64OptimizingBackendExecutor.java b/tools/dexfuzz/src/dexfuzz/executors/Mips64OptimizingBackendExecutor.java
index 72d43e7..883ff2a 100644
--- a/tools/dexfuzz/src/dexfuzz/executors/Mips64OptimizingBackendExecutor.java
+++ b/tools/dexfuzz/src/dexfuzz/executors/Mips64OptimizingBackendExecutor.java
@@ -29,6 +29,9 @@
   protected String constructCommand(String programName) {
     StringBuilder commandBuilder = new StringBuilder();
     commandBuilder.append("dalvikvm64 -Xcompiler-option --compiler-backend=Optimizing ");
+    // The -Xno-dex-file-fallback option ensures that the execution does not default to
+    // interpreter if compilations fails.
+    commandBuilder.append("-Xno-dex-file-fallback ");
     commandBuilder.append("-cp ").append(testLocation).append("/").append(programName).append(" ");
     commandBuilder.append(executeClass);
     return commandBuilder.toString();
diff --git a/tools/dexfuzz/src/dexfuzz/executors/Mips64QuickBackendExecutor.java b/tools/dexfuzz/src/dexfuzz/executors/Mips64QuickBackendExecutor.java
deleted file mode 100644
index e7e5ff6..0000000
--- a/tools/dexfuzz/src/dexfuzz/executors/Mips64QuickBackendExecutor.java
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Copyright (C) 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package dexfuzz.executors;
-
-import dexfuzz.listeners.BaseListener;
-
-public class Mips64QuickBackendExecutor extends Executor {
-
-  public Mips64QuickBackendExecutor(BaseListener listener, Device device) {
-    super("MIPS64 Quick Backend", 5, listener, Architecture.MIPS64, device,
-        /*needsCleanCodeCache*/ true, /*isBisectable*/ false);
-  }
-
-  @Override
-  protected String constructCommand(String programName) {
-    StringBuilder commandBuilder = new StringBuilder();
-    commandBuilder.append("dalvikvm64 -Xcompiler-option --compiler-backend=Quick ");
-    commandBuilder.append("-cp ").append(testLocation).append("/").append(programName).append(" ");
-    commandBuilder.append(executeClass);
-    return commandBuilder.toString();
-  }
-}
diff --git a/tools/dexfuzz/src/dexfuzz/executors/MipsOptimizingBackendExecutor.java b/tools/dexfuzz/src/dexfuzz/executors/MipsOptimizingBackendExecutor.java
index 63f6858..b7babdc 100644
--- a/tools/dexfuzz/src/dexfuzz/executors/MipsOptimizingBackendExecutor.java
+++ b/tools/dexfuzz/src/dexfuzz/executors/MipsOptimizingBackendExecutor.java
@@ -29,6 +29,9 @@
   protected String constructCommand(String programName) {
     StringBuilder commandBuilder = new StringBuilder();
     commandBuilder.append("dalvikvm32 -Xcompiler-option --compiler-backend=Optimizing ");
+    // The -Xno-dex-file-fallback option ensures that the execution does not default to
+    // interpreter if compilations fails.
+    commandBuilder.append("-Xno-dex-file-fallback ");
     commandBuilder.append("-cp ").append(testLocation).append("/").append(programName).append(" ");
     commandBuilder.append(executeClass);
     return commandBuilder.toString();
diff --git a/tools/dexfuzz/src/dexfuzz/executors/MipsQuickBackendExecutor.java b/tools/dexfuzz/src/dexfuzz/executors/MipsQuickBackendExecutor.java
deleted file mode 100644
index b262090..0000000
--- a/tools/dexfuzz/src/dexfuzz/executors/MipsQuickBackendExecutor.java
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Copyright (C) 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package dexfuzz.executors;
-
-import dexfuzz.listeners.BaseListener;
-
-public class MipsQuickBackendExecutor extends Executor {
-
-  public MipsQuickBackendExecutor(BaseListener listener, Device device) {
-    super("MIPS Quick Backend", 5, listener, Architecture.MIPS, device,
-        /*needsCleanCodeCache*/ true, /*isBisectable*/ false);
-  }
-
-  @Override
-  protected String constructCommand(String programName) {
-    StringBuilder commandBuilder = new StringBuilder();
-    commandBuilder.append("dalvikvm32 -Xcompiler-option --compiler-backend=Quick ");
-    commandBuilder.append("-cp ").append(testLocation).append("/").append(programName).append(" ");
-    commandBuilder.append(executeClass);
-    return commandBuilder.toString();
-  }
-}
diff --git a/tools/dexfuzz/src/dexfuzz/executors/X86OptimizingBackendExecutor.java b/tools/dexfuzz/src/dexfuzz/executors/X86OptimizingBackendExecutor.java
index 5908a8b..1d62051 100644
--- a/tools/dexfuzz/src/dexfuzz/executors/X86OptimizingBackendExecutor.java
+++ b/tools/dexfuzz/src/dexfuzz/executors/X86OptimizingBackendExecutor.java
@@ -30,6 +30,9 @@
   protected String constructCommand(String programName) {
     StringBuilder commandBuilder = new StringBuilder();
     commandBuilder.append("dalvikvm32 -Xcompiler-option --compiler-backend=Optimizing ");
+    // The -Xno-dex-file-fallback option ensures that the execution does not default to
+    // interpreter if compilations fails.
+    commandBuilder.append("-Xno-dex-file-fallback ");
     if (Options.executeOnHost) {
       commandBuilder.append(device.getHostExecutionFlags()).append(" ");
     }
diff --git a/tools/dexfuzz/src/dexfuzz/executors/X86QuickBackendExecutor.java b/tools/dexfuzz/src/dexfuzz/executors/X86QuickBackendExecutor.java
deleted file mode 100644
index 9e8039d..0000000
--- a/tools/dexfuzz/src/dexfuzz/executors/X86QuickBackendExecutor.java
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (C) 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package dexfuzz.executors;
-
-import dexfuzz.Options;
-import dexfuzz.listeners.BaseListener;
-
-public class X86QuickBackendExecutor extends Executor {
-
-  public X86QuickBackendExecutor(BaseListener listener, Device device) {
-    super("x86 Quick Backend", 5, listener, Architecture.X86, device,
-        /*needsCleanCodeCache*/ true, /*isBisectable*/ false);
-  }
-
-  @Override
-  protected String constructCommand(String programName) {
-    StringBuilder commandBuilder = new StringBuilder();
-    commandBuilder.append("dalvikvm32 -Xcompiler-option --compiler-backend=Quick ");
-    if (Options.executeOnHost) {
-      commandBuilder.append(device.getHostExecutionFlags()).append(" ");
-    }
-    commandBuilder.append("-cp ").append(testLocation).append("/").append(programName).append(" ");
-    commandBuilder.append(executeClass);
-    return commandBuilder.toString();
-  }
-}
diff --git a/tools/dexfuzz/src/dexfuzz/executors/X86_64OptimizingBackendExecutor.java b/tools/dexfuzz/src/dexfuzz/executors/X86_64OptimizingBackendExecutor.java
index 28ff1a5..ad44259 100644
--- a/tools/dexfuzz/src/dexfuzz/executors/X86_64OptimizingBackendExecutor.java
+++ b/tools/dexfuzz/src/dexfuzz/executors/X86_64OptimizingBackendExecutor.java
@@ -29,6 +29,9 @@
   protected String constructCommand(String programName) {
     StringBuilder commandBuilder = new StringBuilder();
     commandBuilder.append("dalvikvm64 -Xcompiler-option --compiler-backend=Optimizing ");
+    // The -Xno-dex-file-fallback option ensures that the execution does not default to
+    // interpreter if compilations fails.
+    commandBuilder.append("-Xno-dex-file-fallback ");
     commandBuilder.append("-cp ").append(testLocation).append("/").append(programName).append(" ");
     commandBuilder.append(executeClass);
     return commandBuilder.toString();
diff --git a/tools/dexfuzz/src/dexfuzz/executors/X86_64QuickBackendExecutor.java b/tools/dexfuzz/src/dexfuzz/executors/X86_64QuickBackendExecutor.java
deleted file mode 100644
index 22cafe2..0000000
--- a/tools/dexfuzz/src/dexfuzz/executors/X86_64QuickBackendExecutor.java
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Copyright (C) 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package dexfuzz.executors;
-
-import dexfuzz.listeners.BaseListener;
-
-public class X86_64QuickBackendExecutor extends Executor {
-
-  public X86_64QuickBackendExecutor(BaseListener listener, Device device) {
-    super("x86_64 Quick Backend", 5, listener, Architecture.X86_64, device,
-        /*needsCleanCodeCache*/ true, /*isBisectable*/ false);
-  }
-
-  @Override
-  protected String constructCommand(String programName) {
-    StringBuilder commandBuilder = new StringBuilder();
-    commandBuilder.append("dalvikvm64 -Xcompiler-option --compiler-backend=Quick ");
-    commandBuilder.append("-cp ").append(testLocation).append("/").append(programName).append(" ");
-    commandBuilder.append(executeClass);
-    return commandBuilder.toString();
-  }
-}
diff --git a/tools/dexfuzz/src/dexfuzz/fuzzers/Fuzzer.java b/tools/dexfuzz/src/dexfuzz/fuzzers/Fuzzer.java
index bc39d79..1797d90 100644
--- a/tools/dexfuzz/src/dexfuzz/fuzzers/Fuzzer.java
+++ b/tools/dexfuzz/src/dexfuzz/fuzzers/Fuzzer.java
@@ -22,24 +22,18 @@
 import dexfuzz.executors.Architecture;
 import dexfuzz.executors.Arm64InterpreterExecutor;
 import dexfuzz.executors.Arm64OptimizingBackendExecutor;
-import dexfuzz.executors.Arm64QuickBackendExecutor;
 import dexfuzz.executors.ArmInterpreterExecutor;
 import dexfuzz.executors.ArmOptimizingBackendExecutor;
-import dexfuzz.executors.ArmQuickBackendExecutor;
 import dexfuzz.executors.Device;
 import dexfuzz.executors.Executor;
 import dexfuzz.executors.Mips64InterpreterExecutor;
 import dexfuzz.executors.Mips64OptimizingBackendExecutor;
-import dexfuzz.executors.Mips64QuickBackendExecutor;
 import dexfuzz.executors.MipsInterpreterExecutor;
 import dexfuzz.executors.MipsOptimizingBackendExecutor;
-import dexfuzz.executors.MipsQuickBackendExecutor;
 import dexfuzz.executors.X86InterpreterExecutor;
 import dexfuzz.executors.X86OptimizingBackendExecutor;
-import dexfuzz.executors.X86QuickBackendExecutor;
 import dexfuzz.executors.X86_64InterpreterExecutor;
 import dexfuzz.executors.X86_64OptimizingBackendExecutor;
-import dexfuzz.executors.X86_64QuickBackendExecutor;
 import dexfuzz.listeners.BaseListener;
 import dexfuzz.program.Mutation;
 import dexfuzz.program.Program;
@@ -121,18 +115,13 @@
     }
   }
 
-  private void addExecutorsForArchitecture(Device device, Class<? extends Executor> quick,
-      Class<? extends Executor> optimizing, Class<? extends Executor> interpreter) {
-    // NB: Currently QuickBackend MUST come immediately before same arch's Interpreter.
+  private void addExecutorsForArchitecture(Device device, Class<? extends Executor> optimizing,
+      Class<? extends Executor> interpreter) {
+    // NB: Currently OptimizingBackend MUST come immediately before same arch's Interpreter.
     // This is because intepreter execution relies on there being an OAT file already
     // created to produce correct debug information. Otherwise we will see
     // false-positive divergences.
     try {
-      if (Options.useQuick) {
-        Constructor<? extends Executor> constructor =
-            quick.getConstructor(BaseListener.class, Device.class);
-        executors.add(constructor.newInstance(listener, device));
-      }
       if (Options.useOptimizing) {
         Constructor<? extends Executor> constructor =
             optimizing.getConstructor(BaseListener.class, Device.class);
@@ -165,33 +154,33 @@
     }
 
     if (Options.useArchArm64) {
-      addExecutorsForArchitecture(device, Arm64QuickBackendExecutor.class,
-          Arm64OptimizingBackendExecutor.class, Arm64InterpreterExecutor.class);
+      addExecutorsForArchitecture(device, Arm64OptimizingBackendExecutor.class,
+          Arm64InterpreterExecutor.class);
     }
 
     if (Options.useArchArm) {
-      addExecutorsForArchitecture(device, ArmQuickBackendExecutor.class,
-          ArmOptimizingBackendExecutor.class, ArmInterpreterExecutor.class);
+      addExecutorsForArchitecture(device, ArmOptimizingBackendExecutor.class,
+          ArmInterpreterExecutor.class);
     }
 
     if (Options.useArchX86_64) {
-      addExecutorsForArchitecture(device, X86_64QuickBackendExecutor.class,
-          X86_64OptimizingBackendExecutor.class, X86_64InterpreterExecutor.class);
+      addExecutorsForArchitecture(device, X86_64OptimizingBackendExecutor.class,
+          X86_64InterpreterExecutor.class);
     }
 
     if (Options.useArchX86) {
-      addExecutorsForArchitecture(device, X86QuickBackendExecutor.class,
-          X86OptimizingBackendExecutor.class, X86InterpreterExecutor.class);
+      addExecutorsForArchitecture(device, X86OptimizingBackendExecutor.class,
+          X86InterpreterExecutor.class);
     }
 
     if (Options.useArchMips64) {
-      addExecutorsForArchitecture(device, Mips64QuickBackendExecutor.class,
-          Mips64OptimizingBackendExecutor.class, Mips64InterpreterExecutor.class);
+      addExecutorsForArchitecture(device, Mips64OptimizingBackendExecutor.class,
+          Mips64InterpreterExecutor.class);
     }
 
     if (Options.useArchMips) {
-      addExecutorsForArchitecture(device, MipsQuickBackendExecutor.class,
-          MipsOptimizingBackendExecutor.class, MipsInterpreterExecutor.class);
+      addExecutorsForArchitecture(device, MipsOptimizingBackendExecutor.class,
+          MipsInterpreterExecutor.class);
     }
 
     // Add the first backend as the golden executor for self-divergence tests.