Rosalloc thread local allocation path without a cas.
Speedup on N4:
MemAllocTest 3044 -> 2396 (~21% reduction)
BinaryTrees 4101 -> 2929 (~26% reduction)
Bug: 9986565
Change-Id: Ia1d1a37b9e001f903c3c056e8ec68fc8c623a78b
diff --git a/runtime/gc/space/valgrind_malloc_space-inl.h b/runtime/gc/space/valgrind_malloc_space-inl.h
index ae8e892..bc329e1 100644
--- a/runtime/gc/space/valgrind_malloc_space-inl.h
+++ b/runtime/gc/space/valgrind_malloc_space-inl.h
@@ -32,10 +32,15 @@
template <size_t kValgrindRedZoneBytes, bool kUseObjSizeForUsable>
inline mirror::Object* AdjustForValgrind(void* obj_with_rdz, size_t num_bytes,
size_t bytes_allocated, size_t usable_size,
- size_t* bytes_allocated_out, size_t* usable_size_out) {
+ size_t bytes_tl_bulk_allocated,
+ size_t* bytes_allocated_out, size_t* usable_size_out,
+ size_t* bytes_tl_bulk_allocated_out) {
if (bytes_allocated_out != nullptr) {
*bytes_allocated_out = bytes_allocated;
}
+ if (bytes_tl_bulk_allocated_out != nullptr) {
+ *bytes_tl_bulk_allocated_out = bytes_tl_bulk_allocated;
+ }
// This cuts over-provision and is a trade-off between testing the over-provisioning code paths
// vs checking overflows in the regular paths.
@@ -82,20 +87,25 @@
kValgrindRedZoneBytes,
kAdjustForRedzoneInAllocSize,
kUseObjSizeForUsable>::AllocWithGrowth(
- Thread* self, size_t num_bytes, size_t* bytes_allocated_out, size_t* usable_size_out) {
+ Thread* self, size_t num_bytes, size_t* bytes_allocated_out, size_t* usable_size_out,
+ size_t* bytes_tl_bulk_allocated_out) {
size_t bytes_allocated;
size_t usable_size;
+ size_t bytes_tl_bulk_allocated;
void* obj_with_rdz = S::AllocWithGrowth(self, num_bytes + 2 * kValgrindRedZoneBytes,
- &bytes_allocated, &usable_size);
+ &bytes_allocated, &usable_size,
+ &bytes_tl_bulk_allocated);
if (obj_with_rdz == nullptr) {
return nullptr;
}
- return valgrind_details::AdjustForValgrind<kValgrindRedZoneBytes,
- kUseObjSizeForUsable>(obj_with_rdz, num_bytes,
- bytes_allocated, usable_size,
- bytes_allocated_out,
- usable_size_out);
+ return valgrind_details::AdjustForValgrind<kValgrindRedZoneBytes, kUseObjSizeForUsable>(
+ obj_with_rdz, num_bytes,
+ bytes_allocated, usable_size,
+ bytes_tl_bulk_allocated,
+ bytes_allocated_out,
+ usable_size_out,
+ bytes_tl_bulk_allocated_out);
}
template <typename S,
@@ -106,11 +116,13 @@
kValgrindRedZoneBytes,
kAdjustForRedzoneInAllocSize,
kUseObjSizeForUsable>::Alloc(
- Thread* self, size_t num_bytes, size_t* bytes_allocated_out, size_t* usable_size_out) {
+ Thread* self, size_t num_bytes, size_t* bytes_allocated_out, size_t* usable_size_out,
+ size_t* bytes_tl_bulk_allocated_out) {
size_t bytes_allocated;
size_t usable_size;
+ size_t bytes_tl_bulk_allocated;
void* obj_with_rdz = S::Alloc(self, num_bytes + 2 * kValgrindRedZoneBytes,
- &bytes_allocated, &usable_size);
+ &bytes_allocated, &usable_size, &bytes_tl_bulk_allocated);
if (obj_with_rdz == nullptr) {
return nullptr;
}
@@ -118,8 +130,10 @@
return valgrind_details::AdjustForValgrind<kValgrindRedZoneBytes,
kUseObjSizeForUsable>(obj_with_rdz, num_bytes,
bytes_allocated, usable_size,
+ bytes_tl_bulk_allocated,
bytes_allocated_out,
- usable_size_out);
+ usable_size_out,
+ bytes_tl_bulk_allocated_out);
}
template <typename S,
@@ -130,20 +144,25 @@
kValgrindRedZoneBytes,
kAdjustForRedzoneInAllocSize,
kUseObjSizeForUsable>::AllocThreadUnsafe(
- Thread* self, size_t num_bytes, size_t* bytes_allocated_out, size_t* usable_size_out) {
+ Thread* self, size_t num_bytes, size_t* bytes_allocated_out, size_t* usable_size_out,
+ size_t* bytes_tl_bulk_allocated_out) {
size_t bytes_allocated;
size_t usable_size;
+ size_t bytes_tl_bulk_allocated;
void* obj_with_rdz = S::AllocThreadUnsafe(self, num_bytes + 2 * kValgrindRedZoneBytes,
- &bytes_allocated, &usable_size);
+ &bytes_allocated, &usable_size,
+ &bytes_tl_bulk_allocated);
if (obj_with_rdz == nullptr) {
return nullptr;
}
- return valgrind_details::AdjustForValgrind<kValgrindRedZoneBytes,
- kUseObjSizeForUsable>(obj_with_rdz, num_bytes,
- bytes_allocated, usable_size,
- bytes_allocated_out,
- usable_size_out);
+ return valgrind_details::AdjustForValgrind<kValgrindRedZoneBytes, kUseObjSizeForUsable>(
+ obj_with_rdz, num_bytes,
+ bytes_allocated, usable_size,
+ bytes_tl_bulk_allocated,
+ bytes_allocated_out,
+ usable_size_out,
+ bytes_tl_bulk_allocated_out);
}
template <typename S,
@@ -226,6 +245,17 @@
mem_map->Size() - initial_size);
}
+template <typename S,
+ size_t kValgrindRedZoneBytes,
+ bool kAdjustForRedzoneInAllocSize,
+ bool kUseObjSizeForUsable>
+size_t ValgrindMallocSpace<S,
+ kValgrindRedZoneBytes,
+ kAdjustForRedzoneInAllocSize,
+ kUseObjSizeForUsable>::MaxBytesBulkAllocatedFor(size_t num_bytes) {
+ return S::MaxBytesBulkAllocatedFor(num_bytes + 2 * kValgrindRedZoneBytes);
+}
+
} // namespace space
} // namespace gc
} // namespace art