MIPS32: Improve method invocation

Improvements include:
- CodeGeneratorMIPS::GenerateStaticOrDirectCall() supports:
  - MethodLoadKind::kDirectAddressWithFixup (via literals)
  - CodePtrLocation::kCallDirectWithFixup (via literals)
  - MethodLoadKind::kDexCachePcRelative
- 32-bit literals to support the above (not ready for general-
  purpose applications yet because RA is not saved in leaf
  methods, but is clobbered on MIPS32R2 when simulating
  PC-relative addressing (MIPS32R6 is OK because it has
  PC-relative addressing with the lwpc instruction))
- shorter instruction sequences for recursive static/direct
  calls

Tested:
- test-art-host-gtest
- test-art-target-gtest and test-art-target-run-test-optimizing on:
  - MIPS32R2 QEMU
  - CI20 board
  - MIPS32R6 (2nd arch) QEMU

Change-Id: Id5b137ad32d5590487fd154c9a01d3b3e7e044ff
diff --git a/disassembler/disassembler_mips.cc b/disassembler/disassembler_mips.cc
index 1f51311..769263e 100644
--- a/disassembler/disassembler_mips.cc
+++ b/disassembler/disassembler_mips.cc
@@ -330,8 +330,10 @@
   { kITypeMask, 55u << kOpcodeShift, "ld", "TO", },
   { kITypeMask, 56u << kOpcodeShift, "sc", "TO", },
   { kITypeMask, 57u << kOpcodeShift, "swc1", "tO", },
+  { kJTypeMask, 58u << kOpcodeShift, "balc", "P" },
   { kITypeMask | (0x1f << 16), (59u << kOpcodeShift) | (30 << 16), "auipc", "Si" },
   { kITypeMask | (0x3 << 19), (59u << kOpcodeShift) | (0 << 19), "addiupc", "Sp" },
+  { kITypeMask | (0x3 << 19), (59u << kOpcodeShift) | (1 << 19), "lwpc", "So" },
   { kITypeMask, 61u << kOpcodeShift, "sdc1", "tO", },
   { kITypeMask | (0x1f << 21), 62u << kOpcodeShift, "jialc", "Ti" },
   { kITypeMask | (1 << 21), (62u << kOpcodeShift) | (1 << 21), "bnezc", "Sb" },  // TODO: de-dup?
@@ -509,7 +511,15 @@
               }
             }
             break;
-          case 'P':  // 26-bit offset in bc.
+          case 'o':  // 19-bit offset in lwpc.
+            {
+              int32_t offset = (instruction & 0x7ffff) - ((instruction & 0x40000) << 1);
+              offset <<= 2;
+              args << FormatInstructionPointer(instr_ptr + offset);
+              args << StringPrintf("  ; %+d", offset);
+            }
+            break;
+          case 'P':  // 26-bit offset in bc and balc.
             {
               int32_t offset = (instruction & 0x3ffffff) - ((instruction & 0x2000000) << 1);
               offset <<= 2;
@@ -540,6 +550,7 @@
     }
   }
 
+  // TODO: Simplify this once these sequences are simplified in the compiler.
   // Special cases for sequences of:
   //   pc-relative +/- 2GB branch:
   //     auipc  reg, imm