Compile-time tuning: assembly phase
Not as much compile-time gain from reworking the assembly phase as I'd
hoped, but still worthwhile. Should see ~2% improvement thanks to
the assembly rework. On the other hand, expect some huge gains for some
application thanks to better detection of large machine-generated init
methods. Thinkfree shows a 25% improvement.
The major assembly change was to establish thread the LIR nodes that
require fixup into a fixup chain. Only those are processed during the
final assembly pass(es). This doesn't help for methods which only
require a single pass to assemble, but does speed up the larger methods
which required multiple assembly passes.
Also replaced the block_map_ basic block lookup table (which contained
space for a BasicBlock* for each dex instruction unit) with a block id
map - cutting its space requirements by half in a 32-bit pointer
environment.
Changes:
o Reduce size of LIR struct by 12.5% (one of the big memory users)
o Repurpose the use/def portion of the LIR after optimization complete.
o Encode instruction bits to LIR
o Thread LIR nodes requiring pc fixup
o Change follow-on assembly passes to only consider fixup LIRs
o Switch on pc-rel fixup kind
o Fast-path for small methods - single pass assembly
o Avoid using cb[n]z for null checks (almost always exceed displacement)
o Improve detection of large initialization methods.
o Rework def/use flag setup.
o Remove a sequential search from FindBlock using lookup table of 16-bit
block ids rather than full block pointers.
o Eliminate pcRelFixup and use fixup kind instead.
o Add check for 16-bit overflow on dex offset.
Change-Id: I4c6615f83fed46f84629ad6cfe4237205a9562b4
diff --git a/compiler/dex/mir_graph.cc b/compiler/dex/mir_graph.cc
index c234298..fb306de 100644
--- a/compiler/dex/mir_graph.cc
+++ b/compiler/dex/mir_graph.cc
@@ -96,10 +96,9 @@
try_block_addr_(NULL),
entry_block_(NULL),
exit_block_(NULL),
- cur_block_(NULL),
num_blocks_(0),
current_code_item_(NULL),
- block_map_(arena, 0, kGrowableArrayMisc),
+ dex_pc_to_block_map_(arena, 0, kGrowableArrayMisc),
current_method_(kInvalidEntry),
current_offset_(kInvalidEntry),
def_count_(0),
@@ -109,7 +108,9 @@
attributes_(METHOD_IS_LEAF), // Start with leaf assumption, change on encountering invoke.
checkstats_(NULL),
special_case_(kNoHandler),
- arena_(arena) {
+ arena_(arena),
+ backward_branches_(0),
+ forward_branches_(0) {
try_block_addr_ = new (arena_) ArenaBitVector(arena_, 0, true /* expandable */);
}
@@ -151,7 +152,7 @@
orig_block->terminated_by_return = false;
/* Add it to the quick lookup cache */
- block_map_.Put(bottom_block->start_offset, bottom_block);
+ dex_pc_to_block_map_.Put(bottom_block->start_offset, bottom_block->id);
/* Handle the taken path */
bottom_block->taken = orig_block->taken;
@@ -196,6 +197,23 @@
DCHECK_EQ(*immed_pred_block_p, orig_block);
*immed_pred_block_p = bottom_block;
}
+
+ // Associate dex instructions in the bottom block with the new container.
+ MIR* p = bottom_block->first_mir_insn;
+ while (p != NULL) {
+ int opcode = p->dalvikInsn.opcode;
+ /*
+ * Some messiness here to ensure that we only enter real opcodes and only the
+ * first half of a potentially throwing instruction that has been split into
+ * CHECK and work portions. The 2nd half of a split operation will have a non-null
+ * throw_insn pointer that refers to the 1st half.
+ */
+ if ((opcode == kMirOpCheck) || (!IsPseudoMirOp(opcode) && (p->meta.throw_insn == NULL))) {
+ dex_pc_to_block_map_.Put(p->offset, bottom_block->id);
+ }
+ p = (p == bottom_block->last_mir_insn) ? NULL : p->next;
+ }
+
return bottom_block;
}
@@ -209,39 +227,37 @@
*/
BasicBlock* MIRGraph::FindBlock(unsigned int code_offset, bool split, bool create,
BasicBlock** immed_pred_block_p) {
- BasicBlock* bb;
- unsigned int i;
-
if (code_offset >= cu_->code_item->insns_size_in_code_units_) {
return NULL;
}
- bb = block_map_.Get(code_offset);
- if ((bb != NULL) || !create) {
+
+ int block_id = dex_pc_to_block_map_.Get(code_offset);
+ BasicBlock* bb = (block_id == 0) ? NULL : block_list_.Get(block_id);
+
+ if ((bb != NULL) && (bb->start_offset == code_offset)) {
+ // Does this containing block start with the desired instruction?
return bb;
}
- if (split) {
- for (i = block_list_.Size(); i > 0; i--) {
- bb = block_list_.Get(i - 1);
- if (bb->block_type != kDalvikByteCode) continue;
- /* Check if a branch jumps into the middle of an existing block */
- if ((code_offset > bb->start_offset) && (bb->last_mir_insn != NULL) &&
- (code_offset <= bb->last_mir_insn->offset)) {
- BasicBlock *new_bb = SplitBlock(code_offset, bb, bb == *immed_pred_block_p ?
- immed_pred_block_p : NULL);
- return new_bb;
- }
- }
+ // No direct hit.
+ if (!create) {
+ return NULL;
}
- /* Create a new one */
+ if (bb != NULL) {
+ // The target exists somewhere in an existing block.
+ return SplitBlock(code_offset, bb, bb == *immed_pred_block_p ? immed_pred_block_p : NULL);
+ }
+
+ // Create a new block.
bb = NewMemBB(kDalvikByteCode, num_blocks_++);
block_list_.Insert(bb);
bb->start_offset = code_offset;
- block_map_.Put(bb->start_offset, bb);
+ dex_pc_to_block_map_.Put(bb->start_offset, bb->id);
return bb;
}
+
/* Identify code range in try blocks and set up the empty catch blocks */
void MIRGraph::ProcessTryCatchBlocks() {
int tries_size = current_code_item_->tries_size_;
@@ -307,6 +323,7 @@
default:
LOG(FATAL) << "Unexpected opcode(" << insn->dalvikInsn.opcode << ") with kBranch set";
}
+ CountBranch(target);
BasicBlock *taken_block = FindBlock(target, /* split */ true, /* create */ true,
/* immed_pred_block_p */ &cur_block);
cur_block->taken = taken_block;
@@ -485,6 +502,9 @@
* pseudo exception edge MIR. Note also that this new block is
* not automatically terminated after the work portion, and may
* contain following instructions.
+ *
+ * Note also that the dex_pc_to_block_map_ entry for the potentially
+ * throwing instruction will refer to the original basic block.
*/
BasicBlock *new_block = NewMemBB(kDalvikByteCode, num_blocks_++);
block_list_.Insert(new_block);
@@ -518,8 +538,9 @@
current_code_item_->insns_ + current_code_item_->insns_size_in_code_units_;
// TODO: need to rework expansion of block list & try_block_addr when inlining activated.
+ // TUNING: use better estimate of basic blocks for following resize.
block_list_.Resize(block_list_.Size() + current_code_item_->insns_size_in_code_units_);
- block_map_.SetSize(block_map_.Size() + current_code_item_->insns_size_in_code_units_);
+ dex_pc_to_block_map_.SetSize(dex_pc_to_block_map_.Size() + current_code_item_->insns_size_in_code_units_);
// TODO: replace with explicit resize routine. Using automatic extension side effect for now.
try_block_addr_->SetBit(current_code_item_->insns_size_in_code_units_);
@@ -560,10 +581,7 @@
DCHECK_EQ(current_offset_, 0);
cur_block->start_offset = current_offset_;
block_list_.Insert(cur_block);
- /* Add first block to the fast lookup cache */
-// FIXME: block map needs association with offset/method pair rather than just offset
- block_map_.Put(cur_block->start_offset, cur_block);
-// FIXME: this needs to insert at the insert point rather than entry block.
+ // FIXME: this needs to insert at the insert point rather than entry block.
entry_block_->fall_through = cur_block;
cur_block->predecessors->Insert(entry_block_);
@@ -589,7 +607,6 @@
opcode_count_[static_cast<int>(opcode)]++;
}
-
/* Possible simple method? */
if (live_pattern) {
live_pattern = false;
@@ -640,6 +657,9 @@
AppendMIR(cur_block, insn);
}
+ // Associate the starting dex_pc for this opcode with its containing basic block.
+ dex_pc_to_block_map_.Put(insn->offset, cur_block->id);
+
code_ptr += width;
if (flags & Instruction::kBranch) {