Compiler: replace DOM traversal computation

Originally the old trace JIT used a few recursive graph walking
algorithms - which was perfectly reasonable given that the graph
size was capped at a few dozen nodes at most.  These were replaced
with iterative walk order computations  - or at least I thought
they all were.  Missed one of them, which caused a stack overflow
on a pathologically large method compilation.

Renaming of some arena_allocator items for consistency and clarity.
More detailed memory usage logging.  Reworked the allocator to waste
less space when an allocation doesn't fit and a new block must be
allocated.

Change-Id: I4d84dded3c47819eefa0de90ebb821dd12eb8be8
diff --git a/src/compiler/dex/quick/gen_invoke.cc b/src/compiler/dex/quick/gen_invoke.cc
index efacff0..9fd4a86 100644
--- a/src/compiler/dex/quick/gen_invoke.cc
+++ b/src/compiler/dex/quick/gen_invoke.cc
@@ -1158,11 +1158,13 @@
   }
   RegLocation rl_object = LoadValue(rl_src_obj, kCoreReg);
   RegLocation rl_offset = LoadValue(rl_src_offset, kCoreReg);
-  RegLocation rl_value = LoadValue(rl_src_value, kCoreReg);
+  RegLocation rl_value;
   if (is_long) {
+    rl_value = LoadValueWide(rl_src_value, kCoreReg);
     OpRegReg(kOpAdd, rl_object.low_reg, rl_offset.low_reg);
     StoreBaseDispWide(rl_object.low_reg, 0, rl_value.low_reg, rl_value.high_reg);
   } else {
+    rl_value = LoadValue(rl_src_value, kCoreReg);
     StoreBaseIndexed(rl_object.low_reg, rl_offset.low_reg, rl_value.low_reg, 0, kWord);
   }
   if (is_volatile) {
diff --git a/src/compiler/dex/quick/ralloc_util.cc b/src/compiler/dex/quick/ralloc_util.cc
index dd38914..30ed1b7 100644
--- a/src/compiler/dex/quick/ralloc_util.cc
+++ b/src/compiler/dex/quick/ralloc_util.cc
@@ -37,6 +37,10 @@
     if (reg_pool_->FPRegs[i].is_temp)
       reg_pool_->FPRegs[i].in_use = false;
   }
+  // Reset temp tracking sanity check.
+  if (kIsDebugBuild) {
+    live_sreg_ = INVALID_SREG;
+  }
 }
 
  /*