Merge "Revert "Add implicit null and stack checks for x86""
diff --git a/runtime/base/mutex-inl.h b/runtime/base/mutex-inl.h
index 1890181..3e5cdba 100644
--- a/runtime/base/mutex-inl.h
+++ b/runtime/base/mutex-inl.h
@@ -23,7 +23,6 @@
 
 #define ATRACE_TAG ATRACE_TAG_DALVIK
 
-#include "cutils/atomic-inline.h"
 #include "cutils/trace.h"
 
 #include "base/stringprintf.h"
@@ -152,20 +151,20 @@
 #if ART_USE_FUTEXES
   bool done = false;
   do {
-    int32_t cur_state = state_;
+    int32_t cur_state = state_.LoadRelaxed();
     if (LIKELY(cur_state >= 0)) {
       // Add as an extra reader.
-      done = android_atomic_acquire_cas(cur_state, cur_state + 1, &state_) == 0;
+      done = state_.CompareExchangeWeakAcquire(cur_state, cur_state + 1);
     } else {
       // Owner holds it exclusively, hang up.
       ScopedContentionRecorder scr(this, GetExclusiveOwnerTid(), SafeGetTid(self));
-      android_atomic_inc(&num_pending_readers_);
-      if (futex(&state_, FUTEX_WAIT, cur_state, NULL, NULL, 0) != 0) {
+      ++num_pending_readers_;
+      if (futex(state_.Address(), FUTEX_WAIT, cur_state, NULL, NULL, 0) != 0) {
         if (errno != EAGAIN) {
           PLOG(FATAL) << "futex wait failed for " << name_;
         }
       }
-      android_atomic_dec(&num_pending_readers_);
+      --num_pending_readers_;
     }
   } while (!done);
 #else
@@ -184,14 +183,18 @@
 #if ART_USE_FUTEXES
   bool done = false;
   do {
-    int32_t cur_state = state_;
+    int32_t cur_state = state_.LoadRelaxed();
     if (LIKELY(cur_state > 0)) {
-      // Reduce state by 1.
-      done = android_atomic_release_cas(cur_state, cur_state - 1, &state_) == 0;
-      if (done && (cur_state - 1) == 0) {  // cas may fail due to noise?
-        if (num_pending_writers_.LoadRelaxed() > 0 || num_pending_readers_ > 0) {
+      // Reduce state by 1 and impose lock release load/store ordering.
+      // Note, the relaxed loads below musn't reorder before the CompareExchange.
+      // TODO: the ordering here is non-trivial as state is split across 3 fields, fix by placing
+      // a status bit into the state on contention.
+      done = state_.CompareExchangeWeakSequentiallyConsistent(cur_state, cur_state - 1);
+      if (done && (cur_state - 1) == 0) {  // Weak CAS may fail spuriously.
+        if (num_pending_writers_.LoadRelaxed() > 0 ||
+            num_pending_readers_.LoadRelaxed() > 0) {
           // Wake any exclusive waiters as there are now no readers.
-          futex(&state_, FUTEX_WAKE, -1, NULL, NULL, 0);
+          futex(state_.Address(), FUTEX_WAKE, -1, NULL, NULL, 0);
         }
       }
     } else {
@@ -233,7 +236,7 @@
 
 inline uint64_t ReaderWriterMutex::GetExclusiveOwnerTid() const {
 #if ART_USE_FUTEXES
-  int32_t state = state_;
+  int32_t state = state_.LoadRelaxed();
   if (state == 0) {
     return 0;  // No owner.
   } else if (state > 0) {
diff --git a/runtime/base/mutex.cc b/runtime/base/mutex.cc
index fd1eb12..bde2886 100644
--- a/runtime/base/mutex.cc
+++ b/runtime/base/mutex.cc
@@ -262,7 +262,7 @@
 Mutex::Mutex(const char* name, LockLevel level, bool recursive)
     : BaseMutex(name, level), recursive_(recursive), recursion_count_(0) {
 #if ART_USE_FUTEXES
-  state_ = 0;
+  DCHECK_EQ(0, state_.LoadRelaxed());
   DCHECK_EQ(0, num_contenders_.LoadRelaxed());
 #else
   CHECK_MUTEX_CALL(pthread_mutex_init, (&mutex_, nullptr));
@@ -272,13 +272,13 @@
 
 Mutex::~Mutex() {
 #if ART_USE_FUTEXES
-  if (state_ != 0) {
+  if (state_.LoadRelaxed() != 0) {
     Runtime* runtime = Runtime::Current();
     bool shutting_down = runtime == nullptr || runtime->IsShuttingDown(Thread::Current());
     LOG(shutting_down ? WARNING : FATAL) << "destroying mutex with owner: " << exclusive_owner_;
   } else {
     CHECK_EQ(exclusive_owner_, 0U)  << "unexpectedly found an owner on unlocked mutex " << name_;
-    CHECK_EQ(num_contenders_.LoadRelaxed(), 0)
+    CHECK_EQ(num_contenders_.LoadSequentiallyConsistent(), 0)
         << "unexpectedly found a contender on mutex " << name_;
   }
 #else
@@ -305,15 +305,15 @@
 #if ART_USE_FUTEXES
     bool done = false;
     do {
-      int32_t cur_state = state_;
+      int32_t cur_state = state_.LoadRelaxed();
       if (LIKELY(cur_state == 0)) {
-        // Change state from 0 to 1.
-        done = __sync_bool_compare_and_swap(&state_, 0 /* cur_state */, 1 /* new state */);
+        // Change state from 0 to 1 and impose load/store ordering appropriate for lock acquisition.
+        done = state_.CompareExchangeWeakAcquire(0 /* cur_state */, 1 /* new state */);
       } else {
         // Failed to acquire, hang up.
         ScopedContentionRecorder scr(this, SafeGetTid(self), GetExclusiveOwnerTid());
         num_contenders_++;
-        if (futex(&state_, FUTEX_WAIT, 1, NULL, NULL, 0) != 0) {
+        if (futex(state_.Address(), FUTEX_WAIT, 1, NULL, NULL, 0) != 0) {
           // EAGAIN and EINTR both indicate a spurious failure, try again from the beginning.
           // We don't use TEMP_FAILURE_RETRY so we can intentionally retry to acquire the lock.
           if ((errno != EAGAIN) && (errno != EINTR)) {
@@ -323,11 +323,7 @@
         num_contenders_--;
       }
     } while (!done);
-    // We assert that no memory fence is needed here, since
-    // __sync_bool_compare_and_swap includes it.
-    // TODO: Change state_ to be a art::Atomic and use an intention revealing CAS operation
-    // that exposes the ordering semantics.
-    DCHECK_EQ(state_, 1);
+    DCHECK_EQ(state_.LoadRelaxed(), 1);
 #else
     CHECK_MUTEX_CALL(pthread_mutex_lock, (&mutex_));
 #endif
@@ -352,16 +348,15 @@
 #if ART_USE_FUTEXES
     bool done = false;
     do {
-      int32_t cur_state = state_;
+      int32_t cur_state = state_.LoadRelaxed();
       if (cur_state == 0) {
-        // Change state from 0 to 1.
-        done = __sync_bool_compare_and_swap(&state_, 0 /* cur_state */, 1 /* new state */);
+        // Change state from 0 to 1 and impose load/store ordering appropriate for lock acquisition.
+        done = state_.CompareExchangeWeakAcquire(0 /* cur_state */, 1 /* new state */);
       } else {
         return false;
       }
     } while (!done);
-    // We again assert no memory fence is needed.
-    DCHECK_EQ(state_, 1);
+    DCHECK_EQ(state_.LoadRelaxed(), 1);
 #else
     int result = pthread_mutex_trylock(&mutex_);
     if (result == EBUSY) {
@@ -399,17 +394,19 @@
 #if ART_USE_FUTEXES
     bool done = false;
     do {
-      int32_t cur_state = state_;
+      int32_t cur_state = state_.LoadRelaxed();
       if (LIKELY(cur_state == 1)) {
-        // The __sync_bool_compare_and_swap enforces the necessary memory ordering.
         // We're no longer the owner.
         exclusive_owner_ = 0;
-        // Change state to 0.
-        done =  __sync_bool_compare_and_swap(&state_, cur_state, 0 /* new state */);
+        // Change state to 0 and impose load/store ordering appropriate for lock release.
+        // Note, the relaxed loads below musn't reorder before the CompareExchange.
+        // TODO: the ordering here is non-trivial as state is split across 3 fields, fix by placing
+        // a status bit into the state on contention.
+        done =  state_.CompareExchangeWeakSequentiallyConsistent(cur_state, 0 /* new state */);
         if (LIKELY(done)) {  // Spurious fail?
-          // Wake a contender
+          // Wake a contender.
           if (UNLIKELY(num_contenders_.LoadRelaxed() > 0)) {
-            futex(&state_, FUTEX_WAKE, 1, NULL, NULL, 0);
+            futex(state_.Address(), FUTEX_WAKE, 1, NULL, NULL, 0);
           }
         }
       } else {
@@ -459,9 +456,9 @@
 
 ReaderWriterMutex::~ReaderWriterMutex() {
 #if ART_USE_FUTEXES
-  CHECK_EQ(state_, 0);
+  CHECK_EQ(state_.LoadRelaxed(), 0);
   CHECK_EQ(exclusive_owner_, 0U);
-  CHECK_EQ(num_pending_readers_, 0);
+  CHECK_EQ(num_pending_readers_.LoadRelaxed(), 0);
   CHECK_EQ(num_pending_writers_.LoadRelaxed(), 0);
 #else
   // We can't use CHECK_MUTEX_CALL here because on shutdown a suspended daemon thread
@@ -484,25 +481,25 @@
 #if ART_USE_FUTEXES
   bool done = false;
   do {
-    int32_t cur_state = state_;
+    int32_t cur_state = state_.LoadRelaxed();
     if (LIKELY(cur_state == 0)) {
-      // Change state from 0 to -1.
-      done =  __sync_bool_compare_and_swap(&state_, 0 /* cur_state*/, -1 /* new state */);
+      // Change state from 0 to -1 and impose load/store ordering appropriate for lock acquisition.
+      done =  state_.CompareExchangeWeakAcquire(0 /* cur_state*/, -1 /* new state */);
     } else {
       // Failed to acquire, hang up.
       ScopedContentionRecorder scr(this, SafeGetTid(self), GetExclusiveOwnerTid());
-      num_pending_writers_++;
-      if (futex(&state_, FUTEX_WAIT, cur_state, NULL, NULL, 0) != 0) {
+      ++num_pending_writers_;
+      if (futex(state_.Address(), FUTEX_WAIT, cur_state, NULL, NULL, 0) != 0) {
         // EAGAIN and EINTR both indicate a spurious failure, try again from the beginning.
         // We don't use TEMP_FAILURE_RETRY so we can intentionally retry to acquire the lock.
         if ((errno != EAGAIN) && (errno != EINTR)) {
           PLOG(FATAL) << "futex wait failed for " << name_;
         }
       }
-      num_pending_writers_--;
+      --num_pending_writers_;
     }
   } while (!done);
-  DCHECK_EQ(state_, -1);
+  DCHECK_EQ(state_.LoadRelaxed(), -1);
 #else
   CHECK_MUTEX_CALL(pthread_rwlock_wrlock, (&rwlock_));
 #endif
@@ -520,16 +517,20 @@
 #if ART_USE_FUTEXES
   bool done = false;
   do {
-    int32_t cur_state = state_;
+    int32_t cur_state = state_.LoadRelaxed();
     if (LIKELY(cur_state == -1)) {
       // We're no longer the owner.
       exclusive_owner_ = 0;
-      // Change state from -1 to 0.
-      done =  __sync_bool_compare_and_swap(&state_, -1 /* cur_state*/, 0 /* new state */);
-      if (LIKELY(done)) {  // cmpxchg may fail due to noise?
+      // Change state from -1 to 0 and impose load/store ordering appropriate for lock release.
+      // Note, the relaxed loads below musn't reorder before the CompareExchange.
+      // TODO: the ordering here is non-trivial as state is split across 3 fields, fix by placing
+      // a status bit into the state on contention.
+      done =  state_.CompareExchangeWeakSequentiallyConsistent(-1 /* cur_state*/, 0 /* new state */);
+      if (LIKELY(done)) {  // Weak CAS may fail spuriously.
         // Wake any waiters.
-        if (UNLIKELY(num_pending_readers_ > 0 || num_pending_writers_.LoadRelaxed() > 0)) {
-          futex(&state_, FUTEX_WAKE, -1, NULL, NULL, 0);
+        if (UNLIKELY(num_pending_readers_.LoadRelaxed() > 0 ||
+                     num_pending_writers_.LoadRelaxed() > 0)) {
+          futex(state_.Address(), FUTEX_WAKE, -1, NULL, NULL, 0);
         }
       }
     } else {
@@ -550,10 +551,10 @@
   timespec end_abs_ts;
   InitTimeSpec(true, CLOCK_REALTIME, ms, ns, &end_abs_ts);
   do {
-    int32_t cur_state = state_;
+    int32_t cur_state = state_.LoadRelaxed();
     if (cur_state == 0) {
-      // Change state from 0 to -1.
-      done =  __sync_bool_compare_and_swap(&state_, 0 /* cur_state */, -1 /* new state */);
+      // Change state from 0 to -1 and impose load/store ordering appropriate for lock acquisition.
+      done =  state_.CompareExchangeWeakAcquire(0 /* cur_state */, -1 /* new state */);
     } else {
       // Failed to acquire, hang up.
       timespec now_abs_ts;
@@ -563,10 +564,10 @@
         return false;  // Timed out.
       }
       ScopedContentionRecorder scr(this, SafeGetTid(self), GetExclusiveOwnerTid());
-      num_pending_writers_++;
-      if (futex(&state_, FUTEX_WAIT, cur_state, &rel_ts, NULL, 0) != 0) {
+      ++num_pending_writers_;
+      if (futex(state_.Address(), FUTEX_WAIT, cur_state, &rel_ts, NULL, 0) != 0) {
         if (errno == ETIMEDOUT) {
-          num_pending_writers_--;
+          --num_pending_writers_;
           return false;  // Timed out.
         } else if ((errno != EAGAIN) && (errno != EINTR)) {
           // EAGAIN and EINTR both indicate a spurious failure,
@@ -575,7 +576,7 @@
           PLOG(FATAL) << "timed futex wait failed for " << name_;
         }
       }
-      num_pending_writers_--;
+      --num_pending_writers_;
     }
   } while (!done);
 #else
@@ -602,10 +603,10 @@
 #if ART_USE_FUTEXES
   bool done = false;
   do {
-    int32_t cur_state = state_;
+    int32_t cur_state = state_.LoadRelaxed();
     if (cur_state >= 0) {
-      // Add as an extra reader.
-      done =  __sync_bool_compare_and_swap(&state_, cur_state, cur_state + 1);
+      // Add as an extra reader and impose load/store ordering appropriate for lock acquisition.
+      done =  state_.CompareExchangeWeakAcquire(cur_state, cur_state + 1);
     } else {
       // Owner holds it exclusively.
       return false;
@@ -702,7 +703,7 @@
       // mutex unlocks will awaken the requeued waiter thread.
       done = futex(sequence_.Address(), FUTEX_CMP_REQUEUE, 0,
                    reinterpret_cast<const timespec*>(std::numeric_limits<int32_t>::max()),
-                   &guard_.state_, cur_sequence) != -1;
+                   guard_.state_.Address(), cur_sequence) != -1;
       if (!done) {
         if (errno != EAGAIN) {
           PLOG(FATAL) << "futex cmp requeue failed for " << name_;
diff --git a/runtime/base/mutex.h b/runtime/base/mutex.h
index 81e62ab..9dc7dea 100644
--- a/runtime/base/mutex.h
+++ b/runtime/base/mutex.h
@@ -226,7 +226,8 @@
   }
   void AssertNotHeld(const Thread* self) { AssertNotHeldExclusive(self); }
 
-  // Id associated with exclusive owner.
+  // Id associated with exclusive owner. No memory ordering semantics if called from a thread other
+  // than the owner.
   uint64_t GetExclusiveOwnerTid() const;
 
   // Returns how many times this Mutex has been locked, it is better to use AssertHeld/NotHeld.
@@ -239,7 +240,7 @@
  private:
 #if ART_USE_FUTEXES
   // 0 is unheld, 1 is held.
-  volatile int32_t state_;
+  AtomicInteger state_;
   // Exclusive owner.
   volatile uint64_t exclusive_owner_;
   // Number of waiting contenders.
@@ -343,7 +344,8 @@
     }
   }
 
-  // Id associated with exclusive owner.
+  // Id associated with exclusive owner. No memory ordering semantics if called from a thread other
+  // than the owner.
   uint64_t GetExclusiveOwnerTid() const;
 
   virtual void Dump(std::ostream& os) const;
@@ -351,12 +353,12 @@
  private:
 #if ART_USE_FUTEXES
   // -1 implies held exclusive, +ve shared held by state_ many owners.
-  volatile int32_t state_;
-  // Exclusive owner.
+  AtomicInteger state_;
+  // Exclusive owner. Modification guarded by this mutex.
   volatile uint64_t exclusive_owner_;
-  // Pending readers.
-  volatile int32_t num_pending_readers_;
-  // Pending writers.
+  // Number of contenders waiting for a reader share.
+  AtomicInteger num_pending_readers_;
+  // Number of contenders waiting to be the writer.
   AtomicInteger num_pending_writers_;
 #else
   pthread_rwlock_t rwlock_;
diff --git a/runtime/common_runtime_test.h b/runtime/common_runtime_test.h
index fdbc9c2..289dc1d 100644
--- a/runtime/common_runtime_test.h
+++ b/runtime/common_runtime_test.h
@@ -26,7 +26,7 @@
 #include <fstream>
 #include <memory>
 
-#include "../../external/icu4c/common/unicode/uvernum.h"
+#include "../../external/icu/icu4c/source/common/unicode/uvernum.h"
 #include "base/macros.h"
 #include "base/stl_util.h"
 #include "base/stringprintf.h"
diff --git a/runtime/gc/accounting/card_table-inl.h b/runtime/gc/accounting/card_table-inl.h
index a1d001e..ad0a4f43 100644
--- a/runtime/gc/accounting/card_table-inl.h
+++ b/runtime/gc/accounting/card_table-inl.h
@@ -17,9 +17,9 @@
 #ifndef ART_RUNTIME_GC_ACCOUNTING_CARD_TABLE_INL_H_
 #define ART_RUNTIME_GC_ACCOUNTING_CARD_TABLE_INL_H_
 
+#include "atomic.h"
 #include "base/logging.h"
 #include "card_table.h"
-#include "cutils/atomic-inline.h"
 #include "space_bitmap.h"
 #include "utils.h"
 
@@ -28,18 +28,23 @@
 namespace accounting {
 
 static inline bool byte_cas(byte old_value, byte new_value, byte* address) {
+#if defined(__i386__) || defined(__x86_64__)
+  Atomic<byte>* byte_atomic = reinterpret_cast<Atomic<byte>*>(address);
+  return byte_atomic->CompareExchangeWeakRelaxed(old_value, new_value);
+#else
   // Little endian means most significant byte is on the left.
   const size_t shift_in_bytes = reinterpret_cast<uintptr_t>(address) % sizeof(uintptr_t);
   // Align the address down.
   address -= shift_in_bytes;
   const size_t shift_in_bits = shift_in_bytes * kBitsPerByte;
-  int32_t* word_address = reinterpret_cast<int32_t*>(address);
+  AtomicInteger* word_atomic = reinterpret_cast<AtomicInteger*>(address);
+
   // Word with the byte we are trying to cas cleared.
-  const int32_t cur_word = *word_address & ~(0xFF << shift_in_bits);
+  const int32_t cur_word = word_atomic->LoadRelaxed() & ~(0xFF << shift_in_bits);
   const int32_t old_word = cur_word | (static_cast<int32_t>(old_value) << shift_in_bits);
   const int32_t new_word = cur_word | (static_cast<int32_t>(new_value) << shift_in_bits);
-  bool success = android_atomic_cas(old_word, new_word, word_address) == 0;
-  return success;
+  return word_atomic->CompareExchangeWeakRelaxed(old_word, new_word);
+#endif
 }
 
 template <typename Visitor>
@@ -174,8 +179,8 @@
       for (size_t i = 0; i < sizeof(uintptr_t); ++i) {
         new_bytes[i] = visitor(expected_bytes[i]);
       }
-      if (LIKELY(android_atomic_cas(expected_word, new_word,
-                                    reinterpret_cast<int32_t*>(word_cur)) == 0)) {
+      Atomic<uintptr_t>* atomic_word = reinterpret_cast<Atomic<uintptr_t>*>(word_cur);
+      if (LIKELY(atomic_word->CompareExchangeWeakRelaxed(expected_word, new_word))) {
         for (size_t i = 0; i < sizeof(uintptr_t); ++i) {
           const byte expected_byte = expected_bytes[i];
           const byte new_byte = new_bytes[i];
diff --git a/runtime/gc/allocator/rosalloc.cc b/runtime/gc/allocator/rosalloc.cc
index 09fb97a..722576f 100644
--- a/runtime/gc/allocator/rosalloc.cc
+++ b/runtime/gc/allocator/rosalloc.cc
@@ -159,7 +159,7 @@
     if (it != free_page_runs_.rend() && (last_free_page_run = *it)->End(this) == base_ + footprint_) {
       // There is a free page run at the end.
       DCHECK(last_free_page_run->IsFree());
-      DCHECK_EQ(page_map_[ToPageMapIndex(last_free_page_run)], kPageMapEmpty);
+      DCHECK(IsFreePage(ToPageMapIndex(last_free_page_run)));
       last_free_page_run_size = last_free_page_run->ByteSize(this);
     } else {
       // There is no free page run at the end.
@@ -248,7 +248,7 @@
     // Update the page map.
     size_t page_map_idx = ToPageMapIndex(res);
     for (size_t i = 0; i < num_pages; i++) {
-      DCHECK_EQ(page_map_[page_map_idx + i], kPageMapEmpty);
+      DCHECK(IsFreePage(page_map_idx + i));
     }
     switch (page_map_type) {
     case kPageMapRun:
@@ -301,8 +301,7 @@
     pm_part_type = kPageMapLargeObjectPart;
     break;
   default:
-    pm_part_type = kPageMapEmpty;
-    LOG(FATAL) << "Unreachable - RosAlloc::FreePages() : " << "pm_idx=" << pm_idx << ", pm_type="
+    LOG(FATAL) << "Unreachable - " << __PRETTY_FUNCTION__ << " : " << "pm_idx=" << pm_idx << ", pm_type="
                << static_cast<int>(pm_type) << ", ptr=" << std::hex
                << reinterpret_cast<intptr_t>(ptr);
     return 0;
@@ -330,7 +329,7 @@
   }
 
   if (kTraceRosAlloc) {
-    LOG(INFO) << "RosAlloc::FreePages() : 0x" << std::hex << reinterpret_cast<intptr_t>(ptr)
+    LOG(INFO) << __PRETTY_FUNCTION__ << " : 0x" << std::hex << reinterpret_cast<intptr_t>(ptr)
               << "-0x" << (reinterpret_cast<intptr_t>(ptr) + byte_size)
               << "(" << std::dec << (num_pages * kPageSize) << ")";
   }
@@ -347,7 +346,7 @@
   if (!free_page_runs_.empty()) {
     // Try to coalesce in the higher address direction.
     if (kTraceRosAlloc) {
-      LOG(INFO) << "RosAlloc::FreePages() : trying to coalesce a free page run 0x"
+      LOG(INFO) << __PRETTY_FUNCTION__ << "RosAlloc::FreePages() : trying to coalesce a free page run 0x"
                 << std::hex << reinterpret_cast<uintptr_t>(fpr) << " [" << std::dec << pm_idx << "] -0x"
                 << std::hex << reinterpret_cast<uintptr_t>(fpr->End(this)) << " [" << std::dec
                 << (fpr->End(this) == End() ? page_map_size_ : ToPageMapIndex(fpr->End(this))) << "]";
@@ -497,27 +496,27 @@
                 << ", page_map_entry=" << static_cast<int>(page_map_entry);
     }
     switch (page_map_[pm_idx]) {
-      case kPageMapEmpty:
-        LOG(FATAL) << "Unreachable - page map type: " << page_map_[pm_idx];
-        return 0;
       case kPageMapLargeObject:
         return FreePages(self, ptr, false);
       case kPageMapLargeObjectPart:
         LOG(FATAL) << "Unreachable - page map type: " << page_map_[pm_idx];
         return 0;
-      case kPageMapRun:
       case kPageMapRunPart: {
-        size_t pi = pm_idx;
-        DCHECK(page_map_[pi] == kPageMapRun || page_map_[pi] == kPageMapRunPart);
         // Find the beginning of the run.
-        while (page_map_[pi] != kPageMapRun) {
-          pi--;
-          DCHECK_LT(pi, capacity_ / kPageSize);
-        }
-        DCHECK_EQ(page_map_[pi], kPageMapRun);
-        run = reinterpret_cast<Run*>(base_ + pi * kPageSize);
+        do {
+          --pm_idx;
+          DCHECK_LT(pm_idx, capacity_ / kPageSize);
+        } while (page_map_[pm_idx] != kPageMapRun);
+        // Fall-through.
+      case kPageMapRun:
+        run = reinterpret_cast<Run*>(base_ + pm_idx * kPageSize);
         DCHECK_EQ(run->magic_num_, kMagicNum);
         break;
+      case kPageMapReleased:
+        // Fall-through.
+      case kPageMapEmpty:
+        LOG(FATAL) << "Unreachable - page map type: " << page_map_[pm_idx];
+        return 0;
       }
       default:
         LOG(FATAL) << "Unreachable - page map type: " << page_map_[pm_idx];
@@ -594,7 +593,8 @@
     if (kIsDebugBuild && current_run != dedicated_full_run_) {
       full_runs_[idx].insert(current_run);
       if (kTraceRosAlloc) {
-        LOG(INFO) << __FUNCTION__ << " : Inserted run 0x" << std::hex << reinterpret_cast<intptr_t>(current_run)
+        LOG(INFO) << __PRETTY_FUNCTION__ << " : Inserted run 0x" << std::hex
+                  << reinterpret_cast<intptr_t>(current_run)
                   << " into full_runs_[" << std::dec << idx << "]";
       }
       DCHECK(non_full_runs_[idx].find(current_run) == non_full_runs_[idx].end());
@@ -1358,6 +1358,8 @@
   for (size_t i = 0; i < end; ++i) {
     byte pm = page_map_[i];
     switch (pm) {
+      case kPageMapReleased:
+        // Fall-through.
       case kPageMapEmpty: {
         FreePageRun* fpr = reinterpret_cast<FreePageRun*>(base_ + i * kPageSize);
         if (free_page_runs_.find(fpr) != free_page_runs_.end()) {
@@ -1370,8 +1372,8 @@
           curr_fpr_size = fpr->ByteSize(this);
           DCHECK_EQ(curr_fpr_size % kPageSize, static_cast<size_t>(0));
           remaining_curr_fpr_size = curr_fpr_size - kPageSize;
-          stream << "[" << i << "]=Empty (FPR start)"
-                 << " fpr_size=" << curr_fpr_size
+          stream << "[" << i << "]=" << (pm == kPageMapReleased ? "Released" : "Empty")
+                 << " (FPR start) fpr_size=" << curr_fpr_size
                  << " remaining_fpr_size=" << remaining_curr_fpr_size << std::endl;
           if (remaining_curr_fpr_size == 0) {
             // Reset at the end of the current free page run.
@@ -1441,43 +1443,46 @@
   size_t pm_idx = RoundDownToPageMapIndex(ptr);
   MutexLock mu(Thread::Current(), lock_);
   switch (page_map_[pm_idx]) {
-  case kPageMapEmpty:
-    LOG(FATAL) << "Unreachable - RosAlloc::UsableSize(): pm_idx=" << pm_idx << ", ptr=" << std::hex
-               << reinterpret_cast<intptr_t>(ptr);
-    break;
-  case kPageMapLargeObject: {
-    size_t num_pages = 1;
-    size_t idx = pm_idx + 1;
-    size_t end = page_map_size_;
-    while (idx < end && page_map_[idx] == kPageMapLargeObjectPart) {
-      num_pages++;
-      idx++;
+    case kPageMapReleased:
+      // Fall-through.
+    case kPageMapEmpty:
+      LOG(FATAL) << "Unreachable - " << __PRETTY_FUNCTION__ << ": pm_idx=" << pm_idx << ", ptr="
+                 << std::hex << reinterpret_cast<intptr_t>(ptr);
+      break;
+    case kPageMapLargeObject: {
+      size_t num_pages = 1;
+      size_t idx = pm_idx + 1;
+      size_t end = page_map_size_;
+      while (idx < end && page_map_[idx] == kPageMapLargeObjectPart) {
+        num_pages++;
+        idx++;
+      }
+      return num_pages * kPageSize;
     }
-    return num_pages * kPageSize;
-  }
-  case kPageMapLargeObjectPart:
-    LOG(FATAL) << "Unreachable - RosAlloc::UsableSize(): pm_idx=" << pm_idx << ", ptr=" << std::hex
-               << reinterpret_cast<intptr_t>(ptr);
-    break;
-  case kPageMapRun:
-  case kPageMapRunPart: {
-    // Find the beginning of the run.
-    while (page_map_[pm_idx] != kPageMapRun) {
-      pm_idx--;
-      DCHECK_LT(pm_idx, capacity_ / kPageSize);
+    case kPageMapLargeObjectPart:
+      LOG(FATAL) << "Unreachable - " << __PRETTY_FUNCTION__ << ": pm_idx=" << pm_idx << ", ptr="
+                 << std::hex << reinterpret_cast<intptr_t>(ptr);
+      break;
+    case kPageMapRun:
+    case kPageMapRunPart: {
+      // Find the beginning of the run.
+      while (page_map_[pm_idx] != kPageMapRun) {
+        pm_idx--;
+        DCHECK_LT(pm_idx, capacity_ / kPageSize);
+      }
+      DCHECK_EQ(page_map_[pm_idx], kPageMapRun);
+      Run* run = reinterpret_cast<Run*>(base_ + pm_idx * kPageSize);
+      DCHECK_EQ(run->magic_num_, kMagicNum);
+      size_t idx = run->size_bracket_idx_;
+      size_t offset_from_slot_base = reinterpret_cast<byte*>(ptr)
+          - (reinterpret_cast<byte*>(run) + headerSizes[idx]);
+      DCHECK_EQ(offset_from_slot_base % bracketSizes[idx], static_cast<size_t>(0));
+      return IndexToBracketSize(idx);
     }
-    DCHECK_EQ(page_map_[pm_idx], kPageMapRun);
-    Run* run = reinterpret_cast<Run*>(base_ + pm_idx * kPageSize);
-    DCHECK_EQ(run->magic_num_, kMagicNum);
-    size_t idx = run->size_bracket_idx_;
-    size_t offset_from_slot_base = reinterpret_cast<byte*>(ptr)
-        - (reinterpret_cast<byte*>(run) + headerSizes[idx]);
-    DCHECK_EQ(offset_from_slot_base % bracketSizes[idx], static_cast<size_t>(0));
-    return IndexToBracketSize(idx);
-  }
-  default:
-    LOG(FATAL) << "Unreachable - page map type: " << page_map_[pm_idx];
-    break;
+    default: {
+      LOG(FATAL) << "Unreachable - page map type: " << page_map_[pm_idx];
+      break;
+    }
   }
   return 0;
 }
@@ -1490,7 +1495,7 @@
   if (it != free_page_runs_.rend() && (last_free_page_run = *it)->End(this) == base_ + footprint_) {
     // Remove the last free page run, if any.
     DCHECK(last_free_page_run->IsFree());
-    DCHECK_EQ(page_map_[ToPageMapIndex(last_free_page_run)], kPageMapEmpty);
+    DCHECK(IsFreePage(ToPageMapIndex(last_free_page_run)));
     DCHECK_EQ(last_free_page_run->ByteSize(this) % kPageSize, static_cast<size_t>(0));
     DCHECK_EQ(last_free_page_run->End(this), base_ + footprint_);
     free_page_runs_.erase(last_free_page_run);
@@ -1500,7 +1505,7 @@
     size_t new_num_of_pages = new_footprint / kPageSize;
     DCHECK_GE(page_map_size_, new_num_of_pages);
     // Zero out the tail of the page map.
-    byte* zero_begin = page_map_ + new_num_of_pages;
+    byte* zero_begin = const_cast<byte*>(page_map_) + new_num_of_pages;
     byte* madvise_begin = AlignUp(zero_begin, kPageSize);
     DCHECK_LE(madvise_begin, page_map_mem_map_->End());
     size_t madvise_size = page_map_mem_map_->End() - madvise_begin;
@@ -1543,6 +1548,8 @@
   while (i < pm_end) {
     byte pm = page_map_[i];
     switch (pm) {
+      case kPageMapReleased:
+        // Fall-through.
       case kPageMapEmpty: {
         // The start of a free page run.
         FreePageRun* fpr = reinterpret_cast<FreePageRun*>(base_ + i * kPageSize);
@@ -1560,7 +1567,7 @@
         size_t num_pages = fpr_size / kPageSize;
         if (kIsDebugBuild) {
           for (size_t j = i + 1; j < i + num_pages; ++j) {
-            DCHECK_EQ(page_map_[j], kPageMapEmpty);
+            DCHECK(IsFreePage(j));
           }
         }
         i += fpr_size / kPageSize;
@@ -1672,7 +1679,7 @@
       full_runs_[idx].insert(run);
       DCHECK(full_runs_[idx].find(run) != full_runs_[idx].end());
       if (kTraceRosAlloc) {
-        LOG(INFO) << __FUNCTION__  << " : Inserted run 0x" << std::hex
+        LOG(INFO) << __PRETTY_FUNCTION__  << " : Inserted run 0x" << std::hex
                   << reinterpret_cast<intptr_t>(run)
                   << " into full_runs_[" << std::dec << idx << "]";
       }
@@ -1685,7 +1692,7 @@
     non_full_runs_[idx].insert(run);
     DCHECK(non_full_runs_[idx].find(run) != non_full_runs_[idx].end());
     if (kTraceRosAlloc) {
-      LOG(INFO) << __FUNCTION__ << " : Inserted run 0x" << std::hex
+      LOG(INFO) << __PRETTY_FUNCTION__ << " : Inserted run 0x" << std::hex
                 << reinterpret_cast<intptr_t>(run)
                 << " into non_full_runs_[" << std::dec << idx << "]";
     }
@@ -1865,7 +1872,7 @@
 void RosAlloc::Verify() {
   Thread* self = Thread::Current();
   CHECK(Locks::mutator_lock_->IsExclusiveHeld(self))
-      << "The mutator locks isn't exclusively locked at RosAlloc::Verify()";
+      << "The mutator locks isn't exclusively locked at " << __PRETTY_FUNCTION__;
   MutexLock mu(self, *Locks::thread_list_lock_);
   ReaderMutexLock wmu(self, bulk_free_lock_);
   std::vector<Run*> runs;
@@ -1876,6 +1883,8 @@
     while (i < pm_end) {
       byte pm = page_map_[i];
       switch (pm) {
+        case kPageMapReleased:
+          // Fall-through.
         case kPageMapEmpty: {
           // The start of a free page run.
           FreePageRun* fpr = reinterpret_cast<FreePageRun*>(base_ + i * kPageSize);
@@ -1889,7 +1898,7 @@
           CHECK_GT(num_pages, static_cast<uintptr_t>(0))
               << "A free page run size must be > 0 : " << fpr_size;
           for (size_t j = i + 1; j < i + num_pages; ++j) {
-            CHECK_EQ(page_map_[j], kPageMapEmpty)
+            CHECK(IsFreePage(j))
                 << "A mismatch between the page map table for kPageMapEmpty "
                 << " at page index " << j
                 << " and the free page run size : page index range : "
@@ -2097,48 +2106,36 @@
   Thread* self = Thread::Current();
   size_t reclaimed_bytes = 0;
   size_t i = 0;
-  while (true) {
-    MutexLock mu(self, lock_);
-    // Check the page map size which might have changed due to grow/shrink.
-    size_t pm_end = page_map_size_;
-    if (i >= pm_end) {
-      // Reached the end.
-      break;
-    }
+  // Check the page map size which might have changed due to grow/shrink.
+  while (i < page_map_size_) {
+    // Reading the page map without a lock is racy but the race is benign since it should only
+    // result in occasionally not releasing pages which we could release.
     byte pm = page_map_[i];
     switch (pm) {
       case kPageMapEmpty: {
-        // The start of a free page run. Release pages.
-        FreePageRun* fpr = reinterpret_cast<FreePageRun*>(base_ + i * kPageSize);
-        DCHECK(free_page_runs_.find(fpr) != free_page_runs_.end());
-        size_t fpr_size = fpr->ByteSize(this);
-        DCHECK(IsAligned<kPageSize>(fpr_size));
-        byte* start = reinterpret_cast<byte*>(fpr);
-        if (kIsDebugBuild) {
-          // In the debug build, the first page of a free page run
-          // contains a magic number for debugging. Exclude it.
-          start = reinterpret_cast<byte*>(fpr) + kPageSize;
+        // Only lock if we have an empty page since we want to prevent other threads racing in.
+        MutexLock mu(self, lock_);
+        // Check that it's still empty after we acquired the lock since another thread could have
+        // raced in and placed an allocation here.
+        pm = page_map_[i];
+        if (LIKELY(pm == kPageMapEmpty)) {
+          // The start of a free page run. Release pages.
+          FreePageRun* fpr = reinterpret_cast<FreePageRun*>(base_ + i * kPageSize);
+          DCHECK(free_page_runs_.find(fpr) != free_page_runs_.end());
+          size_t fpr_size = fpr->ByteSize(this);
+          DCHECK(IsAligned<kPageSize>(fpr_size));
+          byte* start = reinterpret_cast<byte*>(fpr);
+          reclaimed_bytes += ReleasePageRange(start, start + fpr_size);
+          i += fpr_size / kPageSize;
+          DCHECK_LE(i, page_map_size_);
         }
-        byte* end = reinterpret_cast<byte*>(fpr) + fpr_size;
-        if (!kMadviseZeroes) {
-          memset(start, 0, end - start);
-        }
-        CHECK_EQ(madvise(start, end - start, MADV_DONTNEED), 0);
-        reclaimed_bytes += fpr_size;
-        size_t num_pages = fpr_size / kPageSize;
-        if (kIsDebugBuild) {
-          for (size_t j = i + 1; j < i + num_pages; ++j) {
-            DCHECK_EQ(page_map_[j], kPageMapEmpty);
-          }
-        }
-        i += num_pages;
-        DCHECK_LE(i, pm_end);
         break;
       }
       case kPageMapLargeObject:      // Fall through.
       case kPageMapLargeObjectPart:  // Fall through.
       case kPageMapRun:              // Fall through.
       case kPageMapRunPart:          // Fall through.
+      case kPageMapReleased:         // Fall through since it is already released.
         ++i;
         break;  // Skip.
       default:
@@ -2149,6 +2146,35 @@
   return reclaimed_bytes;
 }
 
+size_t RosAlloc::ReleasePageRange(byte* start, byte* end) {
+  DCHECK_ALIGNED(start, kPageSize);
+  DCHECK_ALIGNED(end, kPageSize);
+  DCHECK_LT(start, end);
+  if (kIsDebugBuild) {
+    // In the debug build, the first page of a free page run
+    // contains a magic number for debugging. Exclude it.
+    start += kPageSize;
+  }
+  if (!kMadviseZeroes) {
+    // TODO: Do this when we resurrect the page instead.
+    memset(start, 0, end - start);
+  }
+  CHECK_EQ(madvise(start, end - start, MADV_DONTNEED), 0);
+  size_t pm_idx = ToPageMapIndex(start);
+  size_t reclaimed_bytes = 0;
+  // Calculate reclaimed bytes and upate page map.
+  const size_t max_idx = pm_idx + (end - start) / kPageSize;
+  for (; pm_idx < max_idx; ++pm_idx) {
+    DCHECK(IsFreePage(pm_idx));
+    if (page_map_[pm_idx] == kPageMapEmpty) {
+      // Mark the page as released and update how many bytes we released.
+      reclaimed_bytes += kPageSize;
+      page_map_[pm_idx] = kPageMapReleased;
+    }
+  }
+  return reclaimed_bytes;
+}
+
 }  // namespace allocator
 }  // namespace gc
 }  // namespace art
diff --git a/runtime/gc/allocator/rosalloc.h b/runtime/gc/allocator/rosalloc.h
index 13f61ec..fad0dc8 100644
--- a/runtime/gc/allocator/rosalloc.h
+++ b/runtime/gc/allocator/rosalloc.h
@@ -99,27 +99,8 @@
       byte* start = reinterpret_cast<byte*>(this);
       size_t byte_size = ByteSize(rosalloc);
       DCHECK_EQ(byte_size % kPageSize, static_cast<size_t>(0));
-      bool release_pages = ShouldReleasePages(rosalloc);
-      if (kIsDebugBuild) {
-        // Exclude the first page that stores the magic number.
-        DCHECK_GE(byte_size, static_cast<size_t>(kPageSize));
-        start += kPageSize;
-        byte_size -= kPageSize;
-        if (byte_size > 0) {
-          if (release_pages) {
-            if (!kMadviseZeroes) {
-              memset(start, 0, byte_size);
-            }
-            madvise(start, byte_size, MADV_DONTNEED);
-          }
-        }
-      } else {
-        if (release_pages) {
-          if (!kMadviseZeroes) {
-            memset(start, 0, byte_size);
-          }
-          madvise(start, byte_size, MADV_DONTNEED);
-        }
+      if (ShouldReleasePages(rosalloc)) {
+        rosalloc->ReleasePageRange(start, start + byte_size);
       }
     }
   };
@@ -462,14 +443,15 @@
   std::string size_bracket_lock_names[kNumOfSizeBrackets];
   // The types of page map entries.
   enum {
-    kPageMapEmpty           = 0,  // Not allocated.
-    kPageMapRun             = 1,  // The beginning of a run.
-    kPageMapRunPart         = 2,  // The non-beginning part of a run.
-    kPageMapLargeObject     = 3,  // The beginning of a large object.
-    kPageMapLargeObjectPart = 4,  // The non-beginning part of a large object.
+    kPageMapReleased = 0,     // Zero and released back to the OS.
+    kPageMapEmpty,            // Zero but probably dirty.
+    kPageMapRun,              // The beginning of a run.
+    kPageMapRunPart,          // The non-beginning part of a run.
+    kPageMapLargeObject,      // The beginning of a large object.
+    kPageMapLargeObjectPart,  // The non-beginning part of a large object.
   };
   // The table that indicates what pages are currently used for.
-  byte* page_map_;  // No GUARDED_BY(lock_) for kReadPageMapEntryWithoutLockInBulkFree.
+  volatile byte* page_map_;  // No GUARDED_BY(lock_) for kReadPageMapEntryWithoutLockInBulkFree.
   size_t page_map_size_;
   size_t max_page_map_size_;
   std::unique_ptr<MemMap> page_map_mem_map_;
@@ -536,6 +518,9 @@
   // Revoke the current runs which share an index with the thread local runs.
   void RevokeThreadUnsafeCurrentRuns();
 
+  // Release a range of pages.
+  size_t ReleasePageRange(byte* start, byte* end) EXCLUSIVE_LOCKS_REQUIRED(lock_);
+
  public:
   RosAlloc(void* base, size_t capacity, size_t max_capacity,
            PageReleaseMode page_release_mode,
@@ -588,6 +573,11 @@
   static Run* GetDedicatedFullRun() {
     return dedicated_full_run_;
   }
+  bool IsFreePage(size_t idx) const {
+    DCHECK_LT(idx, capacity_ / kPageSize);
+    byte pm_type = page_map_[idx];
+    return pm_type == kPageMapReleased || pm_type == kPageMapEmpty;
+  }
 
   // Callbacks for InspectAll that will count the number of bytes
   // allocated and objects allocated, respectively.
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index 696728b..e9adca0 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -114,7 +114,7 @@
       desired_collector_type_(foreground_collector_type_),
       heap_trim_request_lock_(nullptr),
       last_trim_time_(0),
-      heap_transition_target_time_(0),
+      heap_transition_or_trim_target_time_(0),
       heap_trim_request_pending_(false),
       parallel_gc_threads_(parallel_gc_threads),
       conc_gc_threads_(conc_gc_threads),
@@ -850,10 +850,10 @@
       MutexLock mu(self, *heap_trim_request_lock_);
       desired_collector_type = desired_collector_type_;
       uint64_t current_time = NanoTime();
-      if (current_time >= heap_transition_target_time_) {
+      if (current_time >= heap_transition_or_trim_target_time_) {
         break;
       }
-      wait_time = heap_transition_target_time_ - current_time;
+      wait_time = heap_transition_or_trim_target_time_ - current_time;
     }
     ScopedThreadStateChange tsc(self, kSleeping);
     usleep(wait_time / 1000);  // Usleep takes microseconds.
@@ -871,9 +871,9 @@
     VLOG(heap) << "Deflating " << count << " monitors took "
         << PrettyDuration(NanoTime() - start_time);
     runtime->GetThreadList()->ResumeAll();
-    // Do a heap trim if it is needed.
-    Trim();
   }
+  // Do a heap trim if it is needed.
+  Trim();
 }
 
 void Heap::Trim() {
@@ -904,9 +904,13 @@
   uint64_t managed_reclaimed = 0;
   for (const auto& space : continuous_spaces_) {
     if (space->IsMallocSpace()) {
-      gc::space::MallocSpace* alloc_space = space->AsMallocSpace();
-      total_alloc_space_size += alloc_space->Size();
-      managed_reclaimed += alloc_space->Trim();
+      gc::space::MallocSpace* malloc_space = space->AsMallocSpace();
+      if (malloc_space->IsRosAllocSpace() || !CareAboutPauseTimes()) {
+        // Don't trim dlmalloc spaces if we care about pauses since this can hold the space lock
+        // for a long period of time.
+        managed_reclaimed += malloc_space->Trim();
+      }
+      total_alloc_space_size += malloc_space->Size();
     }
   }
   total_alloc_space_allocated = GetBytesAllocated() - large_object_space_->GetBytesAllocated();
@@ -919,15 +923,18 @@
   // We never move things in the native heap, so we can finish the GC at this point.
   FinishGC(self, collector::kGcTypeNone);
   size_t native_reclaimed = 0;
+  // Only trim the native heap if we don't care about pauses.
+  if (!CareAboutPauseTimes()) {
 #if defined(USE_DLMALLOC)
-  // Trim the native heap.
-  dlmalloc_trim(0);
-  dlmalloc_inspect_all(DlmallocMadviseCallback, &native_reclaimed);
+    // Trim the native heap.
+    dlmalloc_trim(0);
+    dlmalloc_inspect_all(DlmallocMadviseCallback, &native_reclaimed);
 #elif defined(USE_JEMALLOC)
-  // Jemalloc does it's own internal trimming.
+    // Jemalloc does it's own internal trimming.
 #else
-  UNIMPLEMENTED(WARNING) << "Add trimming support";
+    UNIMPLEMENTED(WARNING) << "Add trimming support";
 #endif
+  }
   uint64_t end_ns = NanoTime();
   VLOG(heap) << "Heap trim of managed (duration=" << PrettyDuration(gc_heap_end_ns - start_ns)
       << ", advised=" << PrettySize(managed_reclaimed) << ") and native (duration="
@@ -2693,17 +2700,14 @@
     if (desired_collector_type_ == desired_collector_type) {
       return;
     }
-    heap_transition_target_time_ = std::max(heap_transition_target_time_, NanoTime() + delta_time);
+    heap_transition_or_trim_target_time_ =
+        std::max(heap_transition_or_trim_target_time_, NanoTime() + delta_time);
     desired_collector_type_ = desired_collector_type;
   }
   SignalHeapTrimDaemon(self);
 }
 
 void Heap::RequestHeapTrim() {
-  // Request a heap trim only if we do not currently care about pause times.
-  if (CareAboutPauseTimes()) {
-    return;
-  }
   // GC completed and now we must decide whether to request a heap trim (advising pages back to the
   // kernel) or not. Issuing a request will also cause trimming of the libc heap. As a trim scans
   // a space it will hold its lock and can become a cause of jank.
@@ -2733,6 +2737,10 @@
       return;
     }
     heap_trim_request_pending_ = true;
+    uint64_t current_time = NanoTime();
+    if (heap_transition_or_trim_target_time_ < current_time) {
+      heap_transition_or_trim_target_time_ = current_time + kHeapTrimWait;
+    }
   }
   // Notify the daemon thread which will actually do the heap trim.
   SignalHeapTrimDaemon(self);
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index 6d70a38..c9ea03e 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -769,8 +769,8 @@
   Mutex* heap_trim_request_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
   // When we want to perform the next heap trim (nano seconds).
   uint64_t last_trim_time_ GUARDED_BY(heap_trim_request_lock_);
-  // When we want to perform the next heap transition (nano seconds).
-  uint64_t heap_transition_target_time_ GUARDED_BY(heap_trim_request_lock_);
+  // When we want to perform the next heap transition (nano seconds) or heap trim.
+  uint64_t heap_transition_or_trim_target_time_ GUARDED_BY(heap_trim_request_lock_);
   // If we have a heap trim request pending.
   bool heap_trim_request_pending_ GUARDED_BY(heap_trim_request_lock_);
 
@@ -981,6 +981,7 @@
   friend class VerifyReferenceCardVisitor;
   friend class VerifyReferenceVisitor;
   friend class VerifyObjectVisitor;
+  friend class ScopedHeapFill;
   friend class ScopedHeapLock;
   friend class space::SpaceTest;
 
@@ -997,6 +998,25 @@
   DISALLOW_IMPLICIT_CONSTRUCTORS(Heap);
 };
 
+// ScopedHeapFill changes the bytes allocated counter to be equal to the growth limit. This
+// causes the next allocation to perform a GC and possibly an OOM. It can be used to ensure that a
+// GC happens in specific methods such as ThrowIllegalMonitorStateExceptionF in Monitor::Wait.
+class ScopedHeapFill {
+ public:
+  explicit ScopedHeapFill(Heap* heap)
+      : heap_(heap),
+        delta_(heap_->GetMaxMemory() - heap_->GetBytesAllocated()) {
+    heap_->num_bytes_allocated_.FetchAndAddSequentiallyConsistent(delta_);
+  }
+  ~ScopedHeapFill() {
+    heap_->num_bytes_allocated_.FetchAndSubSequentiallyConsistent(delta_);
+  }
+
+ private:
+  Heap* const heap_;
+  const int64_t delta_;
+};
+
 }  // namespace gc
 }  // namespace art
 
diff --git a/runtime/interpreter/interpreter_common.cc b/runtime/interpreter/interpreter_common.cc
index c7fb884..9f04b90 100644
--- a/runtime/interpreter/interpreter_common.cc
+++ b/runtime/interpreter/interpreter_common.cc
@@ -772,8 +772,13 @@
     // shadow_frame.GetMethod()->GetDeclaringClass()->GetClassLoader();
     Class* found = Runtime::Current()->GetClassLinker()->FindClass(
         self, descriptor.c_str(), NullHandle<mirror::ClassLoader>());
-    CHECK(found != NULL) << "Class.forName failed in un-started runtime for class: "
-        << PrettyDescriptor(descriptor);
+    if (found == NULL) {
+      if (!self->IsExceptionPending()) {
+        AbortTransaction(self, "Class.forName failed in un-started runtime for class: %s",
+                         PrettyDescriptor(descriptor).c_str());
+      }
+      return;
+    }
     result->SetL(found);
   } else if (name == "java.lang.Class java.lang.Void.lookupType()") {
     result->SetL(Runtime::Current()->GetClassLinker()->FindPrimitiveClass('V'));
diff --git a/sigchainlib/sigchain.cc b/sigchainlib/sigchain.cc
index 26e7d31..5a5805f 100644
--- a/sigchainlib/sigchain.cc
+++ b/sigchainlib/sigchain.cc
@@ -101,11 +101,6 @@
   }
 
   const struct sigaction& action = user_sigactions[sig].GetAction();
-
-  // Only deliver the signal if the signal was not masked out.
-  if (sigismember(&action.sa_mask, sig)) {
-     return;
-  }
   if ((action.sa_flags & SA_SIGINFO) == 0) {
     if (action.sa_handler != NULL) {
       action.sa_handler(sig);
diff --git a/test/082-inline-execute/src/Main.java b/test/082-inline-execute/src/Main.java
index 2e1c6d9..f412034 100644
--- a/test/082-inline-execute/src/Main.java
+++ b/test/082-inline-execute/src/Main.java
@@ -61,6 +61,9 @@
     test_Memory_pokeShort();
     test_Memory_pokeInt();
     test_Memory_pokeLong();
+    test_AtomicBoolean_compareAndSet();
+    test_AtomicInteger_compareAndSet();
+    test_AtomicLong_compareAndSet();
   }
 
   /*
@@ -93,6 +96,60 @@
     Assert.assertNotNull(Thread.currentThread());
   }
 
+  /**
+   * Will test inlining CAS, by inclusion of AtomicBoolean in core.oat.
+   */
+  public static void test_AtomicBoolean_compareAndSet() {
+    java.util.concurrent.atomic.AtomicBoolean ab = new java.util.concurrent.atomic.AtomicBoolean();
+    Assert.assertEquals(ab.compareAndSet(false, false), true);
+    Assert.assertEquals(ab.compareAndSet(true, false), false);
+    Assert.assertEquals(ab.compareAndSet(true, true), false);
+    Assert.assertEquals(ab.compareAndSet(false, true), true);
+    Assert.assertEquals(ab.compareAndSet(false, true), false);
+    Assert.assertEquals(ab.compareAndSet(false, false), false);
+    Assert.assertEquals(ab.compareAndSet(true, true), true);
+    Assert.assertEquals(ab.compareAndSet(true, false), true);
+    Assert.assertEquals(ab.compareAndSet(true, false), false);
+    Assert.assertEquals(ab.compareAndSet(true, true), false);
+    Assert.assertEquals(ab.compareAndSet(false, false), true);
+  }
+
+  /**
+   * Will test inlining CAS, by inclusion of AtomicInteger in core.oat.
+   */
+  public static void test_AtomicInteger_compareAndSet() {
+    java.util.concurrent.atomic.AtomicInteger ab = new java.util.concurrent.atomic.AtomicInteger();
+    Assert.assertEquals(ab.compareAndSet(0, 0), true);
+    Assert.assertEquals(ab.compareAndSet(0x12345678, 0), false);
+    Assert.assertEquals(ab.compareAndSet(0x12345678, 0x12345678), false);
+    Assert.assertEquals(ab.compareAndSet(0, 0x12345678), true);
+    Assert.assertEquals(ab.compareAndSet(0, 0x12345678), false);
+    Assert.assertEquals(ab.compareAndSet(0, 0), false);
+    Assert.assertEquals(ab.compareAndSet(0x12345678, 0x12345678), true);
+    Assert.assertEquals(ab.compareAndSet(0x12345678, 0), true);
+    Assert.assertEquals(ab.compareAndSet(0x12345678, 0), false);
+    Assert.assertEquals(ab.compareAndSet(0x12345678, 0x12345678), false);
+    Assert.assertEquals(ab.compareAndSet(0, 0), true);
+  }
+
+  /**
+   * Will test inlining CAS, by inclusion of AtomicLong in core.oat.
+   */
+  public static void test_AtomicLong_compareAndSet() {
+    java.util.concurrent.atomic.AtomicLong ab = new java.util.concurrent.atomic.AtomicLong();
+    Assert.assertEquals(ab.compareAndSet(0l, 0l), true);
+    Assert.assertEquals(ab.compareAndSet(0x1234567890l, 0l), false);
+    Assert.assertEquals(ab.compareAndSet(0x1234567890l, 0x1234567890l), false);
+    Assert.assertEquals(ab.compareAndSet(0l, 0x1234567890l), true);
+    Assert.assertEquals(ab.compareAndSet(0l, 0x1234567890l), false);
+    Assert.assertEquals(ab.compareAndSet(0l, 0l), false);
+    Assert.assertEquals(ab.compareAndSet(0x1234567890l, 0x1234567890l), true);
+    Assert.assertEquals(ab.compareAndSet(0x1234567890l, 0l), true);
+    Assert.assertEquals(ab.compareAndSet(0x1234567890l, 0l), false);
+    Assert.assertEquals(ab.compareAndSet(0x1234567890l, 0x1234567890l), false);
+    Assert.assertEquals(ab.compareAndSet(0l, 0l), true);
+  }
+
   public static void test_String_length() {
     String str0 = "";
     String str1 = "x";