Compile-time tuning: assembly phase

Not as much compile-time gain from reworking the assembly phase as I'd
hoped, but still worthwhile.  Should see ~2% improvement thanks to
the assembly rework.  On the other hand, expect some huge gains for some
application thanks to better detection of large machine-generated init
methods.  Thinkfree shows a 25% improvement.

The major assembly change was to establish thread the LIR nodes that
require fixup into a fixup chain.  Only those are processed during the
final assembly pass(es).  This doesn't help for methods which only
require a single pass to assemble, but does speed up the larger methods
which required multiple assembly passes.

Also replaced the block_map_ basic block lookup table (which contained
space for a BasicBlock* for each dex instruction unit) with a block id
map - cutting its space requirements by half in a 32-bit pointer
environment.

Changes:
  o Reduce size of LIR struct by 12.5% (one of the big memory users)
  o Repurpose the use/def portion of the LIR after optimization complete.
  o Encode instruction bits to LIR
  o Thread LIR nodes requiring pc fixup
  o Change follow-on assembly passes to only consider fixup LIRs
  o Switch on pc-rel fixup kind
  o Fast-path for small methods - single pass assembly
  o Avoid using cb[n]z for null checks (almost always exceed displacement)
  o Improve detection of large initialization methods.
  o Rework def/use flag setup.
  o Remove a sequential search from FindBlock using lookup table of 16-bit
    block ids rather than full block pointers.
  o Eliminate pcRelFixup and use fixup kind instead.
  o Add check for 16-bit overflow on dex offset.

Change-Id: I4c6615f83fed46f84629ad6cfe4237205a9562b4
diff --git a/compiler/dex/mir_graph.cc b/compiler/dex/mir_graph.cc
index c234298..fb306de 100644
--- a/compiler/dex/mir_graph.cc
+++ b/compiler/dex/mir_graph.cc
@@ -96,10 +96,9 @@
       try_block_addr_(NULL),
       entry_block_(NULL),
       exit_block_(NULL),
-      cur_block_(NULL),
       num_blocks_(0),
       current_code_item_(NULL),
-      block_map_(arena, 0, kGrowableArrayMisc),
+      dex_pc_to_block_map_(arena, 0, kGrowableArrayMisc),
       current_method_(kInvalidEntry),
       current_offset_(kInvalidEntry),
       def_count_(0),
@@ -109,7 +108,9 @@
       attributes_(METHOD_IS_LEAF),  // Start with leaf assumption, change on encountering invoke.
       checkstats_(NULL),
       special_case_(kNoHandler),
-      arena_(arena) {
+      arena_(arena),
+      backward_branches_(0),
+      forward_branches_(0) {
   try_block_addr_ = new (arena_) ArenaBitVector(arena_, 0, true /* expandable */);
 }
 
@@ -151,7 +152,7 @@
   orig_block->terminated_by_return = false;
 
   /* Add it to the quick lookup cache */
-  block_map_.Put(bottom_block->start_offset, bottom_block);
+  dex_pc_to_block_map_.Put(bottom_block->start_offset, bottom_block->id);
 
   /* Handle the taken path */
   bottom_block->taken = orig_block->taken;
@@ -196,6 +197,23 @@
     DCHECK_EQ(*immed_pred_block_p, orig_block);
     *immed_pred_block_p = bottom_block;
   }
+
+  // Associate dex instructions in the bottom block with the new container.
+  MIR* p = bottom_block->first_mir_insn;
+  while (p != NULL) {
+    int opcode = p->dalvikInsn.opcode;
+    /*
+     * Some messiness here to ensure that we only enter real opcodes and only the
+     * first half of a potentially throwing instruction that has been split into
+     * CHECK and work portions.  The 2nd half of a split operation will have a non-null
+     * throw_insn pointer that refers to the 1st half.
+     */
+    if ((opcode == kMirOpCheck) || (!IsPseudoMirOp(opcode) && (p->meta.throw_insn == NULL))) {
+      dex_pc_to_block_map_.Put(p->offset, bottom_block->id);
+    }
+    p = (p == bottom_block->last_mir_insn) ? NULL : p->next;
+  }
+
   return bottom_block;
 }
 
@@ -209,39 +227,37 @@
  */
 BasicBlock* MIRGraph::FindBlock(unsigned int code_offset, bool split, bool create,
                                 BasicBlock** immed_pred_block_p) {
-  BasicBlock* bb;
-  unsigned int i;
-
   if (code_offset >= cu_->code_item->insns_size_in_code_units_) {
     return NULL;
   }
-  bb = block_map_.Get(code_offset);
-  if ((bb != NULL) || !create) {
+
+  int block_id = dex_pc_to_block_map_.Get(code_offset);
+  BasicBlock* bb = (block_id == 0) ? NULL : block_list_.Get(block_id);
+
+  if ((bb != NULL) && (bb->start_offset == code_offset)) {
+    // Does this containing block start with the desired instruction?
     return bb;
   }
 
-  if (split) {
-    for (i = block_list_.Size(); i > 0; i--) {
-      bb = block_list_.Get(i - 1);
-      if (bb->block_type != kDalvikByteCode) continue;
-      /* Check if a branch jumps into the middle of an existing block */
-      if ((code_offset > bb->start_offset) && (bb->last_mir_insn != NULL) &&
-          (code_offset <= bb->last_mir_insn->offset)) {
-        BasicBlock *new_bb = SplitBlock(code_offset, bb, bb == *immed_pred_block_p ?
-                                       immed_pred_block_p : NULL);
-        return new_bb;
-      }
-    }
+  // No direct hit.
+  if (!create) {
+    return NULL;
   }
 
-  /* Create a new one */
+  if (bb != NULL) {
+    // The target exists somewhere in an existing block.
+    return SplitBlock(code_offset, bb, bb == *immed_pred_block_p ?  immed_pred_block_p : NULL);
+  }
+
+  // Create a new block.
   bb = NewMemBB(kDalvikByteCode, num_blocks_++);
   block_list_.Insert(bb);
   bb->start_offset = code_offset;
-  block_map_.Put(bb->start_offset, bb);
+  dex_pc_to_block_map_.Put(bb->start_offset, bb->id);
   return bb;
 }
 
+
 /* Identify code range in try blocks and set up the empty catch blocks */
 void MIRGraph::ProcessTryCatchBlocks() {
   int tries_size = current_code_item_->tries_size_;
@@ -307,6 +323,7 @@
     default:
       LOG(FATAL) << "Unexpected opcode(" << insn->dalvikInsn.opcode << ") with kBranch set";
   }
+  CountBranch(target);
   BasicBlock *taken_block = FindBlock(target, /* split */ true, /* create */ true,
                                       /* immed_pred_block_p */ &cur_block);
   cur_block->taken = taken_block;
@@ -485,6 +502,9 @@
    * pseudo exception edge MIR.  Note also that this new block is
    * not automatically terminated after the work portion, and may
    * contain following instructions.
+   *
+   * Note also that the dex_pc_to_block_map_ entry for the potentially
+   * throwing instruction will refer to the original basic block.
    */
   BasicBlock *new_block = NewMemBB(kDalvikByteCode, num_blocks_++);
   block_list_.Insert(new_block);
@@ -518,8 +538,9 @@
       current_code_item_->insns_ + current_code_item_->insns_size_in_code_units_;
 
   // TODO: need to rework expansion of block list & try_block_addr when inlining activated.
+  // TUNING: use better estimate of basic blocks for following resize.
   block_list_.Resize(block_list_.Size() + current_code_item_->insns_size_in_code_units_);
-  block_map_.SetSize(block_map_.Size() + current_code_item_->insns_size_in_code_units_);
+  dex_pc_to_block_map_.SetSize(dex_pc_to_block_map_.Size() + current_code_item_->insns_size_in_code_units_);
 
   // TODO: replace with explicit resize routine.  Using automatic extension side effect for now.
   try_block_addr_->SetBit(current_code_item_->insns_size_in_code_units_);
@@ -560,10 +581,7 @@
   DCHECK_EQ(current_offset_, 0);
   cur_block->start_offset = current_offset_;
   block_list_.Insert(cur_block);
-  /* Add first block to the fast lookup cache */
-// FIXME: block map needs association with offset/method pair rather than just offset
-  block_map_.Put(cur_block->start_offset, cur_block);
-// FIXME: this needs to insert at the insert point rather than entry block.
+  // FIXME: this needs to insert at the insert point rather than entry block.
   entry_block_->fall_through = cur_block;
   cur_block->predecessors->Insert(entry_block_);
 
@@ -589,7 +607,6 @@
       opcode_count_[static_cast<int>(opcode)]++;
     }
 
-
     /* Possible simple method? */
     if (live_pattern) {
       live_pattern = false;
@@ -640,6 +657,9 @@
       AppendMIR(cur_block, insn);
     }
 
+    // Associate the starting dex_pc for this opcode with its containing basic block.
+    dex_pc_to_block_map_.Put(insn->offset, cur_block->id);
+
     code_ptr += width;
 
     if (flags & Instruction::kBranch) {