Merge "Revert "Revert "Revert "Revert "[optimizing] Improve x86 shifts"""""

commit: 2b689e370be9f32a88b419edf2d78eb6befade9c [log] [tgz]
author: Roland Levillain <rpl@google.com> Tue May 05 13:45:56 2015 +0000
committer: Gerrit Code Review <noreply-gerritcodereview@google.com> Tue May 05 13:45:57 2015 +0000
tree: 57059d67a8f8e712608778c409cb54e195524e36
parent: 898fa9b96a579715d124671102886242bd62f393 [diff]
parent: 7394569c9252b277710b2d7d3fc35fb0dd48fc4b [diff]
diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc
index 812642b..2375595 100644
--- a/compiler/optimizing/register_allocator.cc
+++ b/compiler/optimizing/register_allocator.cc

@@ -768,7 +768,7 @@
     }
   } else {
     DCHECK(!current->IsHighInterval());
-    int hint = current->FindFirstRegisterHint(free_until);
+    int hint = current->FindFirstRegisterHint(free_until, liveness_);
     if (hint != kNoRegister) {
       DCHECK(!IsBlocked(hint));
       reg = hint;
@@ -1101,8 +1101,8 @@
 }
 
 LiveInterval* RegisterAllocator::SplitBetween(LiveInterval* interval, size_t from, size_t to) {
-  HBasicBlock* block_from = liveness_.GetBlockFromPosition(from);
-  HBasicBlock* block_to = liveness_.GetBlockFromPosition(to);
+  HBasicBlock* block_from = liveness_.GetBlockFromPosition(from / 2);
+  HBasicBlock* block_to = liveness_.GetBlockFromPosition(to / 2);
   DCHECK(block_from != nullptr);
   DCHECK(block_to != nullptr);
 
@@ -1111,6 +1111,41 @@
     return Split(interval, to);
   }
 
+  /*
+   * Non-linear control flow will force moves at every branch instruction to the new location.
+   * To avoid having all branches doing the moves, we find the next non-linear position and
+   * split the interval at this position. Take the following example (block number is the linear
+   * order position):
+   *
+   *     B1
+   *    /  \
+   *   B2  B3
+   *    \  /
+   *     B4
+   *
+   * B2 needs to split an interval, whose next use is in B4. If we were to split at the
+   * beginning of B4, B3 would need to do a move between B3 and B4 to ensure the interval
+   * is now in the correct location. It makes performance worst if the interval is spilled
+   * and both B2 and B3 need to reload it before entering B4.
+   *
+   * By splitting at B3, we give a chance to the register allocator to allocate the
+   * interval to the same register as in B1, and therefore avoid doing any
+   * moves in B3.
+   */
+  if (block_from->GetDominator() != nullptr) {
+    const GrowableArray<HBasicBlock*>& dominated = block_from->GetDominator()->GetDominatedBlocks();
+    for (size_t i = 0; i < dominated.Size(); ++i) {
+      size_t position = dominated.Get(i)->GetLifetimeStart();
+      if ((position > from) && (block_to->GetLifetimeStart() > position)) {
+        // Even if we found a better block, we continue iterating in case
+        // a dominated block is closer.
+        // Note that dominated blocks are not sorted in liveness order.
+        block_to = dominated.Get(i);
+        DCHECK_NE(block_to, block_from);
+      }
+    }
+  }
+
   // If `to` is in a loop, find the outermost loop header which does not contain `from`.
   for (HLoopInformationOutwardIterator it(*block_to); !it.Done(); it.Advance()) {
     HBasicBlock* header = it.Current()->GetHeader();

diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc
index 0bbcb30..1784168 100644
--- a/compiler/optimizing/ssa_liveness_analysis.cc
+++ b/compiler/optimizing/ssa_liveness_analysis.cc

@@ -322,7 +322,8 @@
   return location.IsPair() ? location.low() : location.reg();
 }
 
-int LiveInterval::FindFirstRegisterHint(size_t* free_until) const {
+int LiveInterval::FindFirstRegisterHint(size_t* free_until,
+                                        const SsaLivenessAnalysis& liveness) const {
   DCHECK(!IsHighInterval());
   if (IsTemp()) return kNoRegister;
 
@@ -336,6 +337,26 @@
     }
   }
 
+  if (IsSplit() && liveness.IsAtBlockBoundary(GetStart() / 2)) {
+    // If the start of this interval is at a block boundary, we look at the
+    // location of the interval in blocks preceding the block this interval
+    // starts at. If one location is a register we return it as a hint. This
+    // will avoid a move between the two blocks.
+    HBasicBlock* block = liveness.GetBlockFromPosition(GetStart() / 2);
+    for (size_t i = 0; i < block->GetPredecessors().Size(); ++i) {
+      size_t position = block->GetPredecessors().Get(i)->GetLifetimeEnd() - 1;
+      // We know positions above GetStart() do not have a location yet.
+      if (position < GetStart()) {
+        LiveInterval* existing = GetParent()->GetSiblingAt(position);
+        if (existing != nullptr
+            && existing->HasRegister()
+            && (free_until[existing->GetRegister()] > GetStart())) {
+          return existing->GetRegister();
+        }
+      }
+    }
+  }
+
   UsePosition* use = first_use_;
   size_t start = GetStart();
   size_t end = GetEnd();

diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h
index b74e655..7b98c4e 100644
--- a/compiler/optimizing/ssa_liveness_analysis.h
+++ b/compiler/optimizing/ssa_liveness_analysis.h

@@ -23,6 +23,7 @@
 namespace art {
 
 class CodeGenerator;
+class SsaLivenessAnalysis;
 
 static constexpr int kNoRegister = -1;
 
@@ -690,7 +691,7 @@
   // Returns the first register hint that is at least free before
   // the value contained in `free_until`. If none is found, returns
   // `kNoRegister`.
-  int FindFirstRegisterHint(size_t* free_until) const;
+  int FindFirstRegisterHint(size_t* free_until, const SsaLivenessAnalysis& liveness) const;
 
   // If there is enough at the definition site to find a register (for example
   // it uses the same input as the first input), returns the register as a hint.
@@ -1116,14 +1117,18 @@
   }
 
   HBasicBlock* GetBlockFromPosition(size_t index) const {
-    HInstruction* instruction = GetInstructionFromPosition(index / 2);
+    HInstruction* instruction = GetInstructionFromPosition(index);
     if (instruction == nullptr) {
       // If we are at a block boundary, get the block following.
-      instruction = GetInstructionFromPosition((index / 2) + 1);
+      instruction = GetInstructionFromPosition(index + 1);
     }
     return instruction->GetBlock();
   }
 
+  bool IsAtBlockBoundary(size_t index) const {
+    return GetInstructionFromPosition(index) == nullptr;
+  }
+
   HInstruction* GetTempUser(LiveInterval* temp) const {
     // A temporary shares the same lifetime start as the instruction that requires it.
     DCHECK(temp->IsTemp());

diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index 8490afb..b4a45c6 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc

@@ -691,6 +691,8 @@
         include_cfi = false;
       } else if (option == "--debuggable") {
         debuggable = true;
+        include_debug_symbols = true;
+        include_cfi = true;
       } else if (option.starts_with("--profile-file=")) {
         profile_file_ = option.substr(strlen("--profile-file=")).data();
         VLOG(compiler) << "dex2oat: profile file is " << profile_file_;

diff --git a/oatdump/oatdump.cc b/oatdump/oatdump.cc
index b9a3d37..949c2cb 100644
--- a/oatdump/oatdump.cc
+++ b/oatdump/oatdump.cc

@@ -2092,7 +2092,7 @@
   gc::space::ImageSpace& image_space_;
   const ImageHeader& image_header_;
   std::unique_ptr<OatDumper> oat_dumper_;
-  std::unique_ptr<OatDumperOptions> oat_dumper_options_;
+  OatDumperOptions* oat_dumper_options_;
 
   DISALLOW_COPY_AND_ASSIGN(ImageDumper);
 };

diff --git a/runtime/interpreter/interpreter.cc b/runtime/interpreter/interpreter.cc
index 423b952..a37aee5 100644
--- a/runtime/interpreter/interpreter.cc
+++ b/runtime/interpreter/interpreter.cc

@@ -423,7 +423,7 @@
     }
     ShadowFrame* old_frame = shadow_frame;
     shadow_frame = shadow_frame->GetLink();
-    delete old_frame;
+    ShadowFrame::DeleteDeoptimizedFrame(old_frame);
   }
   ret_val->SetJ(value.GetJ());
 }

diff --git a/runtime/jni_internal.cc b/runtime/jni_internal.cc
index fc3826b..9bb08a2 100644
--- a/runtime/jni_internal.cc
+++ b/runtime/jni_internal.cc

@@ -2109,10 +2109,12 @@
         m = c->FindVirtualMethod(name, sig);
       }
       if (m == nullptr) {
-        c->DumpClass(LOG(ERROR), mirror::Class::kDumpClassFullDetail);
-        LOG(return_errors ? ERROR : FATAL) << "Failed to register native method "
+        LOG(return_errors ? ERROR : INTERNAL_FATAL) << "Failed to register native method "
             << PrettyDescriptor(c) << "." << name << sig << " in "
             << c->GetDexCache()->GetLocation()->ToModifiedUtf8();
+        // Safe to pass in LOG(FATAL) since the log object aborts in destructor and only goes
+        // out of scope after the DumpClass is done executing.
+        c->DumpClass(LOG(return_errors ? ERROR : FATAL), mirror::Class::kDumpClassFullDetail);
         ThrowNoSuchMethodError(soa, c, name, sig, "static or non-static");
         return JNI_ERR;
       } else if (!m->IsNative()) {

diff --git a/runtime/quick_exception_handler.cc b/runtime/quick_exception_handler.cc
index 2432603..a80eed6 100644
--- a/runtime/quick_exception_handler.cc
+++ b/runtime/quick_exception_handler.cc

@@ -202,7 +202,8 @@
                                       h_method, m->GetAccessFlags(), true, true, true, true);
     bool verifier_success = verifier.Verify();
     CHECK(verifier_success) << PrettyMethod(h_method.Get());
-    ShadowFrame* new_frame = ShadowFrame::Create(num_regs, nullptr, h_method.Get(), dex_pc);
+    ShadowFrame* new_frame = ShadowFrame::CreateDeoptimizedFrame(
+        num_regs, nullptr, h_method.Get(), dex_pc);
     self_->SetShadowFrameUnderConstruction(new_frame);
     const std::vector<int32_t> kinds(verifier.DescribeVRegs(dex_pc));
 

diff --git a/runtime/scoped_thread_state_change.h b/runtime/scoped_thread_state_change.h
index b93fcb4..99750a1 100644
--- a/runtime/scoped_thread_state_change.h
+++ b/runtime/scoped_thread_state_change.h

@@ -133,11 +133,7 @@
   T AddLocalReference(mirror::Object* obj) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     Locks::mutator_lock_->AssertSharedHeld(Self());
     DCHECK(IsRunnable());  // Don't work with raw objects in non-runnable states.
-    if (obj == nullptr) {
-      return nullptr;
-    }
-    DCHECK_NE((reinterpret_cast<uintptr_t>(obj) & 0xffff0000), 0xebad0000);
-    return Env()->AddLocalReference<T>(obj);
+    return obj == nullptr ? nullptr : Env()->AddLocalReference<T>(obj);
   }
 
   template<typename T>

diff --git a/runtime/stack.h b/runtime/stack.h
index e2af5ee..3f1bff8 100644
--- a/runtime/stack.h
+++ b/runtime/stack.h

@@ -74,12 +74,18 @@
   }
 
   // Create ShadowFrame in heap for deoptimization.
-  static ShadowFrame* Create(uint32_t num_vregs, ShadowFrame* link,
-                             mirror::ArtMethod* method, uint32_t dex_pc) {
+  static ShadowFrame* CreateDeoptimizedFrame(uint32_t num_vregs, ShadowFrame* link,
+                                             mirror::ArtMethod* method, uint32_t dex_pc) {
     uint8_t* memory = new uint8_t[ComputeSize(num_vregs)];
     return Create(num_vregs, link, method, dex_pc, memory);
   }
 
+  // Delete a ShadowFrame allocated on the heap for deoptimization.
+  static void DeleteDeoptimizedFrame(ShadowFrame* sf) {
+    uint8_t* memory = reinterpret_cast<uint8_t*>(sf);
+    delete[] memory;
+  }
+
   // Create ShadowFrame for interpreter using provided memory.
   static ShadowFrame* Create(uint32_t num_vregs, ShadowFrame* link,
                              mirror::ArtMethod* method, uint32_t dex_pc, void* memory) {

diff --git a/runtime/trace.h b/runtime/trace.h
index 06824b8..df6d5e7 100644
--- a/runtime/trace.h
+++ b/runtime/trace.h

@@ -189,7 +189,7 @@
   std::unique_ptr<File> trace_file_;
 
   // Buffer to store trace data.
-  std::unique_ptr<uint8_t> buf_;
+  std::unique_ptr<uint8_t[]> buf_;
 
   // Flags enabling extra tracing of things such as alloc counts.
   const int flags_;

diff --git a/runtime/utils.cc b/runtime/utils.cc
index e18af00..650214f 100644
--- a/runtime/utils.cc
+++ b/runtime/utils.cc

@@ -262,8 +262,8 @@
 
 void NanoSleep(uint64_t ns) {
   timespec tm;
-  tm.tv_sec = 0;
-  tm.tv_nsec = ns;
+  tm.tv_sec = ns / MsToNs(1000);
+  tm.tv_nsec = ns - static_cast<uint64_t>(tm.tv_sec) * MsToNs(1000);
   nanosleep(&tm, nullptr);
 }
 

diff --git a/runtime/utils_test.cc b/runtime/utils_test.cc
index 259fe33..195de0c 100644
--- a/runtime/utils_test.cc
+++ b/runtime/utils_test.cc

@@ -515,4 +515,10 @@
   EXPECT_FALSE(IsAbsoluteUint<32>(UINT_MAX_plus1));
 }
 
+TEST_F(UtilsTest, TestSleep) {
+  auto start = NanoTime();
+  NanoSleep(MsToNs(1500));
+  EXPECT_GT(NanoTime() - start, MsToNs(1000));
+}
+
 }  // namespace art

diff --git a/test/484-checker-register-hints/expected.txt b/test/484-checker-register-hints/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/484-checker-register-hints/expected.txt


diff --git a/test/484-checker-register-hints/info.txt b/test/484-checker-register-hints/info.txt
new file mode 100644
index 0000000..8923680
--- /dev/null
+++ b/test/484-checker-register-hints/info.txt

@@ -0,0 +1,4 @@
+Checks that the register allocator does not punish other
+blocks because one block forced spilling. The block that
+forces the spilling should restore the registers at the merge
+point.

diff --git a/test/484-checker-register-hints/src/Main.java b/test/484-checker-register-hints/src/Main.java
new file mode 100644
index 0000000..33952d9
--- /dev/null
+++ b/test/484-checker-register-hints/src/Main.java

@@ -0,0 +1,138 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  // CHECK-START: void Main.test1(boolean, int, int, int, int, int) register (after)
+  // CHECK:       name "B0"
+  // CHECK-NOT:     ParallelMove
+  // CHECK:       name "B1"
+  // CHECK-NOT:   end_block
+  // CHECK:         If
+  // CHECK-NOT:     ParallelMove
+  // CHECK:       name "B3"
+  // CHECK-NOT:   end_block
+  // CHECK:         ArraySet
+  // We could check here that there is a parallel move, but it's only valid
+  // for some architectures (for example x86), as other architectures may
+  // not do move at all.
+  // CHECK:       end_block
+  // CHECK-NOT:     ParallelMove
+
+  public static void test1(boolean z, int a, int b, int c, int d, int m) {
+    int e = live1;
+    int f = live2;
+    int g = live3;
+    if (z) {
+    } else {
+      // Create enough live instructions to force spilling on x86.
+      int h = live4;
+      int i = live5;
+      array[2] = e + i + h;
+      array[3] = f + i + h;
+      array[4] = g + i + h;
+      array[0] = h;
+      array[1] = i + h;
+
+    }
+    live1 = e + f + g;
+  }
+
+  // CHECK-START: void Main.test2(boolean, int, int, int, int, int) register (after)
+  // CHECK:       name "B0"
+  // CHECK-NOT:     ParallelMove
+  // CHECK:       name "B1"
+  // CHECK-NOT:   end_block
+  // CHECK:         If
+  // CHECK-NOT:     ParallelMove
+  // CHECK:       name "B3"
+  // CHECK-NOT:   end_block
+  // CHECK:         ArraySet
+  // We could check here that there is a parallel move, but it's only valid
+  // for some architectures (for example x86), as other architectures may
+  // not do move at all.
+  // CHECK:       end_block
+  // CHECK-NOT:     ParallelMove
+
+  public static void test2(boolean z, int a, int b, int c, int d, int m) {
+    int e = live1;
+    int f = live2;
+    int g = live3;
+    if (z) {
+      if (y) {
+        int h = live4;
+        int i = live5;
+        array[2] = e + i + h;
+        array[3] = f + i + h;
+        array[4] = g + i + h;
+        array[0] = h;
+        array[1] = i + h;
+      }
+    }
+    live1 = e + f + g;
+  }
+
+  // CHECK-START: void Main.test3(boolean, int, int, int, int, int) register (after)
+  // CHECK:       name "B0"
+  // CHECK-NOT:     ParallelMove
+  // CHECK:       name "B1"
+  // CHECK-NOT:   end_block
+  // CHECK:         If
+  // CHECK-NOT:     ParallelMove
+  // CHECK:       name "B6"
+  // CHECK-NOT:   end_block
+  // CHECK:         ArraySet
+  // We could check here that there is a parallel move, but it's only valid
+  // for some architectures (for example x86), as other architectures may
+  // not do move at all.
+  // CHECK:       end_block
+  // CHECK-NOT:     ParallelMove
+
+  public static void test3(boolean z, int a, int b, int c, int d, int m) {
+    // Same version as test2, but with branches reversed, to ensure
+    // whatever linear order is computed, we will get the same results.
+    int e = live1;
+    int f = live2;
+    int g = live3;
+    if (z) {
+      live1 = e;
+    } else {
+      if (y) {
+        live1 = e;
+      } else {
+        int h = live4;
+        int i = live5;
+        array[2] = e + i + h;
+        array[3] = f + i + h;
+        array[4] = g + i + h;
+        array[0] = h;
+        array[1] = i + h;
+      }
+    }
+    live1 = e + f + g;
+  }
+
+  public static void main(String[] args) {
+  }
+
+  static boolean y;
+  static int live1;
+  static int live2;
+  static int live3;
+  static int live4;
+  static int live5;
+  static int[] array;
+}
commit	2b689e370be9f32a88b419edf2d78eb6befade9c	[log] [tgz]
author	Roland Levillain <rpl@google.com>	Tue May 05 13:45:56 2015 +0000
committer	Gerrit Code Review <noreply-gerritcodereview@google.com>	Tue May 05 13:45:57 2015 +0000
tree	57059d67a8f8e712608778c409cb54e195524e36
parent	898fa9b96a579715d124671102886242bd62f393 [diff]
parent	7394569c9252b277710b2d7d3fc35fb0dd48fc4b [diff]