Parellel mark stack processing
Enabled parallel mark stack processing by using a thread pool.
Optimized object scanning by removing dependent loads for IsClass.
Performance:
Prime: ~10% speedup of partial GC.
Nakasi: ~50% speedup of partial GC.
Change-Id: I43256a068efc47cb52d93108458ea18d4e02fccc
diff --git a/src/gc/space_bitmap.h b/src/gc/space_bitmap.h
index 885491f..25fd538 100644
--- a/src/gc/space_bitmap.h
+++ b/src/gc/space_bitmap.h
@@ -22,6 +22,8 @@
#include <stdint.h>
#include <vector>
+#include "cutils/atomic.h"
+#include "cutils/atomic-inline.h"
#include "UniquePtr.h"
#include "globals.h"
#include "logging.h"
@@ -64,12 +66,32 @@
return static_cast<uintptr_t>(kWordHighBitMask) >> ((offset_ / kAlignment) % kBitsPerWord);
}
- inline void Set(const Object* obj) {
- Modify(obj, true);
+ inline bool Set(const Object* obj) {
+ return Modify(obj, true);
}
- inline void Clear(const Object* obj) {
- Modify(obj, false);
+ inline bool Clear(const Object* obj) {
+ return Modify(obj, false);
+ }
+
+ // Returns true if the object was previously marked.
+ inline bool AtomicTestAndSet(const Object* obj) {
+ uintptr_t addr = reinterpret_cast<uintptr_t>(obj);
+ DCHECK_GE(addr, heap_begin_);
+ const uintptr_t offset = addr - heap_begin_;
+ const size_t index = OffsetToIndex(offset);
+ const word mask = OffsetToMask(offset);
+ word* const address = &bitmap_begin_[index];
+ DCHECK_LT(index, bitmap_size_ / kWordSize) << " bitmap_size_ = " << bitmap_size_;
+ word old_word;
+ do {
+ old_word = *address;
+ // Fast path: The bit is already set.
+ if ((old_word & mask) != 0) {
+ return true;
+ }
+ } while (UNLIKELY(android_atomic_cas(old_word, old_word | mask, address) != 0));
+ return false;
}
void Clear();
@@ -229,6 +251,12 @@
std::string GetName() const;
void SetName(const std::string& name);
+ const void* GetObjectWordAddress(const Object* obj) const {
+ uintptr_t addr = reinterpret_cast<uintptr_t>(obj);
+ const uintptr_t offset = addr - heap_begin_;
+ const size_t index = OffsetToIndex(offset);
+ return &bitmap_begin_[index];
+ }
private:
// TODO: heap_end_ is initialized so that the heap bitmap is empty, this doesn't require the -1,
// however, we document that this is expected on heap_end_
@@ -237,18 +265,21 @@
heap_begin_(reinterpret_cast<uintptr_t>(heap_begin)),
name_(name) {}
- inline void Modify(const Object* obj, bool do_set) {
+ inline bool Modify(const Object* obj, bool do_set) {
uintptr_t addr = reinterpret_cast<uintptr_t>(obj);
DCHECK_GE(addr, heap_begin_);
const uintptr_t offset = addr - heap_begin_;
const size_t index = OffsetToIndex(offset);
const word mask = OffsetToMask(offset);
DCHECK_LT(index, bitmap_size_ / kWordSize) << " bitmap_size_ = " << bitmap_size_;
+ word* address = &bitmap_begin_[index];
+ word old_word = *address;
if (do_set) {
- bitmap_begin_[index] |= mask;
+ *address = old_word | mask;
} else {
- bitmap_begin_[index] &= ~mask;
+ *address = old_word & ~mask;
}
+ return (old_word & mask) != 0;
}
// Backing storage for bitmap.