X86_64: Add allocation entrypoint switching for CC is_marking
Only X86_64 done so far. Use normal TLAB allocators if GC is not
marking.
Allocation speed goes up by ~8% based on perf sampling.
Without change:
1.19%: art_quick_alloc_object_region_tlab
With change:
0.63%: art_quick_alloc_object_tlab
0.47%: art_quick_alloc_object_region_tlab
Bug: 31018974
Bug: 12687968
Test: test-art-host-run-test
Change-Id: I4c4d9eb229d4ad2f41b856ba5c2958a5eb3b7ffa
diff --git a/runtime/gc/heap-inl.h b/runtime/gc/heap-inl.h
index 97129e8..54f2210 100644
--- a/runtime/gc/heap-inl.h
+++ b/runtime/gc/heap-inl.h
@@ -247,7 +247,7 @@
if (allocator_type != kAllocatorTypeTLAB &&
allocator_type != kAllocatorTypeRegionTLAB &&
allocator_type != kAllocatorTypeRosAlloc &&
- UNLIKELY(IsOutOfMemoryOnAllocation<kGrow>(allocator_type, alloc_size))) {
+ UNLIKELY(IsOutOfMemoryOnAllocation(allocator_type, alloc_size, kGrow))) {
return nullptr;
}
mirror::Object* ret;
@@ -267,8 +267,9 @@
if (kInstrumented && UNLIKELY(is_running_on_memory_tool_)) {
// If running on valgrind or asan, we should be using the instrumented path.
size_t max_bytes_tl_bulk_allocated = rosalloc_space_->MaxBytesBulkAllocatedFor(alloc_size);
- if (UNLIKELY(IsOutOfMemoryOnAllocation<kGrow>(allocator_type,
- max_bytes_tl_bulk_allocated))) {
+ if (UNLIKELY(IsOutOfMemoryOnAllocation(allocator_type,
+ max_bytes_tl_bulk_allocated,
+ kGrow))) {
return nullptr;
}
ret = rosalloc_space_->Alloc(self, alloc_size, bytes_allocated, usable_size,
@@ -277,14 +278,18 @@
DCHECK(!is_running_on_memory_tool_);
size_t max_bytes_tl_bulk_allocated =
rosalloc_space_->MaxBytesBulkAllocatedForNonvirtual(alloc_size);
- if (UNLIKELY(IsOutOfMemoryOnAllocation<kGrow>(allocator_type,
- max_bytes_tl_bulk_allocated))) {
+ if (UNLIKELY(IsOutOfMemoryOnAllocation(allocator_type,
+ max_bytes_tl_bulk_allocated,
+ kGrow))) {
return nullptr;
}
if (!kInstrumented) {
DCHECK(!rosalloc_space_->CanAllocThreadLocal(self, alloc_size));
}
- ret = rosalloc_space_->AllocNonvirtual(self, alloc_size, bytes_allocated, usable_size,
+ ret = rosalloc_space_->AllocNonvirtual(self,
+ alloc_size,
+ bytes_allocated,
+ usable_size,
bytes_tl_bulk_allocated);
}
break;
@@ -292,22 +297,34 @@
case kAllocatorTypeDlMalloc: {
if (kInstrumented && UNLIKELY(is_running_on_memory_tool_)) {
// If running on valgrind, we should be using the instrumented path.
- ret = dlmalloc_space_->Alloc(self, alloc_size, bytes_allocated, usable_size,
+ ret = dlmalloc_space_->Alloc(self,
+ alloc_size,
+ bytes_allocated,
+ usable_size,
bytes_tl_bulk_allocated);
} else {
DCHECK(!is_running_on_memory_tool_);
- ret = dlmalloc_space_->AllocNonvirtual(self, alloc_size, bytes_allocated, usable_size,
+ ret = dlmalloc_space_->AllocNonvirtual(self,
+ alloc_size,
+ bytes_allocated,
+ usable_size,
bytes_tl_bulk_allocated);
}
break;
}
case kAllocatorTypeNonMoving: {
- ret = non_moving_space_->Alloc(self, alloc_size, bytes_allocated, usable_size,
+ ret = non_moving_space_->Alloc(self,
+ alloc_size,
+ bytes_allocated,
+ usable_size,
bytes_tl_bulk_allocated);
break;
}
case kAllocatorTypeLOS: {
- ret = large_object_space_->Alloc(self, alloc_size, bytes_allocated, usable_size,
+ ret = large_object_space_->Alloc(self,
+ alloc_size,
+ bytes_allocated,
+ usable_size,
bytes_tl_bulk_allocated);
// Note that the bump pointer spaces aren't necessarily next to
// the other continuous spaces like the non-moving alloc space or
@@ -315,80 +332,38 @@
DCHECK(ret == nullptr || large_object_space_->Contains(ret));
break;
}
- case kAllocatorTypeTLAB: {
- DCHECK_ALIGNED(alloc_size, space::BumpPointerSpace::kAlignment);
- if (UNLIKELY(self->TlabSize() < alloc_size)) {
- const size_t new_tlab_size = alloc_size + kDefaultTLABSize;
- if (UNLIKELY(IsOutOfMemoryOnAllocation<kGrow>(allocator_type, new_tlab_size))) {
- return nullptr;
- }
- // Try allocating a new thread local buffer, if the allocaiton fails the space must be
- // full so return null.
- if (!bump_pointer_space_->AllocNewTlab(self, new_tlab_size)) {
- return nullptr;
- }
- *bytes_tl_bulk_allocated = new_tlab_size;
- } else {
- *bytes_tl_bulk_allocated = 0;
- }
- // The allocation can't fail.
- ret = self->AllocTlab(alloc_size);
- DCHECK(ret != nullptr);
- *bytes_allocated = alloc_size;
- *usable_size = alloc_size;
- break;
- }
case kAllocatorTypeRegion: {
DCHECK(region_space_ != nullptr);
alloc_size = RoundUp(alloc_size, space::RegionSpace::kAlignment);
- ret = region_space_->AllocNonvirtual<false>(alloc_size, bytes_allocated, usable_size,
+ ret = region_space_->AllocNonvirtual<false>(alloc_size,
+ bytes_allocated,
+ usable_size,
bytes_tl_bulk_allocated);
break;
}
+ case kAllocatorTypeTLAB:
+ FALLTHROUGH_INTENDED;
case kAllocatorTypeRegionTLAB: {
- DCHECK(region_space_ != nullptr);
- DCHECK_ALIGNED(alloc_size, space::RegionSpace::kAlignment);
+ DCHECK_ALIGNED(alloc_size, kObjectAlignment);
+ static_assert(space::RegionSpace::kAlignment == space::BumpPointerSpace::kAlignment,
+ "mismatched alignments");
+ static_assert(kObjectAlignment == space::BumpPointerSpace::kAlignment,
+ "mismatched alignments");
if (UNLIKELY(self->TlabSize() < alloc_size)) {
- if (space::RegionSpace::kRegionSize >= alloc_size) {
- // Non-large. Check OOME for a tlab.
- if (LIKELY(!IsOutOfMemoryOnAllocation<kGrow>(allocator_type, space::RegionSpace::kRegionSize))) {
- // Try to allocate a tlab.
- if (!region_space_->AllocNewTlab(self)) {
- // Failed to allocate a tlab. Try non-tlab.
- ret = region_space_->AllocNonvirtual<false>(alloc_size, bytes_allocated, usable_size,
- bytes_tl_bulk_allocated);
- return ret;
- }
- *bytes_tl_bulk_allocated = space::RegionSpace::kRegionSize;
- // Fall-through.
- } else {
- // Check OOME for a non-tlab allocation.
- if (!IsOutOfMemoryOnAllocation<kGrow>(allocator_type, alloc_size)) {
- ret = region_space_->AllocNonvirtual<false>(alloc_size, bytes_allocated, usable_size,
- bytes_tl_bulk_allocated);
- return ret;
- } else {
- // Neither tlab or non-tlab works. Give up.
- return nullptr;
- }
- }
- } else {
- // Large. Check OOME.
- if (LIKELY(!IsOutOfMemoryOnAllocation<kGrow>(allocator_type, alloc_size))) {
- ret = region_space_->AllocNonvirtual<false>(alloc_size, bytes_allocated, usable_size,
- bytes_tl_bulk_allocated);
- return ret;
- } else {
- return nullptr;
- }
- }
- } else {
- *bytes_tl_bulk_allocated = 0; // Allocated in an existing buffer.
+ // kAllocatorTypeTLAB may be the allocator for region space TLAB if the GC is not marking,
+ // that is why the allocator is not passed down.
+ return AllocWithNewTLAB(self,
+ alloc_size,
+ kGrow,
+ bytes_allocated,
+ usable_size,
+ bytes_tl_bulk_allocated);
}
// The allocation can't fail.
ret = self->AllocTlab(alloc_size);
DCHECK(ret != nullptr);
*bytes_allocated = alloc_size;
+ *bytes_tl_bulk_allocated = 0; // Allocated in an existing buffer.
*usable_size = alloc_size;
break;
}
@@ -408,15 +383,16 @@
return byte_count >= large_object_threshold_ && (c->IsPrimitiveArray() || c->IsStringClass());
}
-template <bool kGrow>
-inline bool Heap::IsOutOfMemoryOnAllocation(AllocatorType allocator_type, size_t alloc_size) {
+inline bool Heap::IsOutOfMemoryOnAllocation(AllocatorType allocator_type,
+ size_t alloc_size,
+ bool grow) {
size_t new_footprint = num_bytes_allocated_.LoadSequentiallyConsistent() + alloc_size;
if (UNLIKELY(new_footprint > max_allowed_footprint_)) {
if (UNLIKELY(new_footprint > growth_limit_)) {
return true;
}
if (!AllocatorMayHaveConcurrentGC(allocator_type) || !IsGcConcurrent()) {
- if (!kGrow) {
+ if (!grow) {
return true;
}
// TODO: Grow for allocation is racy, fix it.
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index ddc3852..3e1cbeb 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -1815,7 +1815,7 @@
break;
}
// Try to transition the heap if the allocation failure was due to the space being full.
- if (!IsOutOfMemoryOnAllocation<false>(allocator, alloc_size)) {
+ if (!IsOutOfMemoryOnAllocation(allocator, alloc_size, /*grow*/ false)) {
// If we aren't out of memory then the OOM was probably from the non moving space being
// full. Attempt to disable compaction and turn the main space into a non moving space.
DisableMovingGc();
@@ -4221,5 +4221,72 @@
gc_pause_listener_.StoreRelaxed(nullptr);
}
+mirror::Object* Heap::AllocWithNewTLAB(Thread* self,
+ size_t alloc_size,
+ bool grow,
+ size_t* bytes_allocated,
+ size_t* usable_size,
+ size_t* bytes_tl_bulk_allocated) {
+ const AllocatorType allocator_type = GetCurrentAllocator();
+ if (allocator_type == kAllocatorTypeTLAB) {
+ DCHECK(bump_pointer_space_ != nullptr);
+ const size_t new_tlab_size = alloc_size + kDefaultTLABSize;
+ if (UNLIKELY(IsOutOfMemoryOnAllocation(allocator_type, new_tlab_size, grow))) {
+ return nullptr;
+ }
+ // Try allocating a new thread local buffer, if the allocation fails the space must be
+ // full so return null.
+ if (!bump_pointer_space_->AllocNewTlab(self, new_tlab_size)) {
+ return nullptr;
+ }
+ *bytes_tl_bulk_allocated = new_tlab_size;
+ } else {
+ DCHECK(allocator_type == kAllocatorTypeRegionTLAB);
+ DCHECK(region_space_ != nullptr);
+ if (space::RegionSpace::kRegionSize >= alloc_size) {
+ // Non-large. Check OOME for a tlab.
+ if (LIKELY(!IsOutOfMemoryOnAllocation(allocator_type,
+ space::RegionSpace::kRegionSize,
+ grow))) {
+ // Try to allocate a tlab.
+ if (!region_space_->AllocNewTlab(self)) {
+ // Failed to allocate a tlab. Try non-tlab.
+ return region_space_->AllocNonvirtual<false>(alloc_size,
+ bytes_allocated,
+ usable_size,
+ bytes_tl_bulk_allocated);
+ }
+ *bytes_tl_bulk_allocated = space::RegionSpace::kRegionSize;
+ // Fall-through to using the TLAB below.
+ } else {
+ // Check OOME for a non-tlab allocation.
+ if (!IsOutOfMemoryOnAllocation(allocator_type, alloc_size, grow)) {
+ return region_space_->AllocNonvirtual<false>(alloc_size,
+ bytes_allocated,
+ usable_size,
+ bytes_tl_bulk_allocated);
+ }
+ // Neither tlab or non-tlab works. Give up.
+ return nullptr;
+ }
+ } else {
+ // Large. Check OOME.
+ if (LIKELY(!IsOutOfMemoryOnAllocation(allocator_type, alloc_size, grow))) {
+ return region_space_->AllocNonvirtual<false>(alloc_size,
+ bytes_allocated,
+ usable_size,
+ bytes_tl_bulk_allocated);
+ }
+ return nullptr;
+ }
+ }
+ // Refilled TLAB, return.
+ mirror::Object* ret = self->AllocTlab(alloc_size);
+ DCHECK(ret != nullptr);
+ *bytes_allocated = alloc_size;
+ *usable_size = alloc_size;
+ return ret;
+}
+
} // namespace gc
} // namespace art
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index 0c671d2..3a8e29b 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -854,6 +854,10 @@
allocator_type != kAllocatorTypeRegionTLAB;
}
static ALWAYS_INLINE bool AllocatorMayHaveConcurrentGC(AllocatorType allocator_type) {
+ if (kUseReadBarrier) {
+ // Read barrier may have the TLAB allocator but is always concurrent. TODO: clean this up.
+ return true;
+ }
return
allocator_type != kAllocatorTypeBumpPointer &&
allocator_type != kAllocatorTypeTLAB;
@@ -923,11 +927,20 @@
size_t* bytes_tl_bulk_allocated)
REQUIRES_SHARED(Locks::mutator_lock_);
+ mirror::Object* AllocWithNewTLAB(Thread* self,
+ size_t alloc_size,
+ bool grow,
+ size_t* bytes_allocated,
+ size_t* usable_size,
+ size_t* bytes_tl_bulk_allocated)
+ REQUIRES_SHARED(Locks::mutator_lock_);
+
void ThrowOutOfMemoryError(Thread* self, size_t byte_count, AllocatorType allocator_type)
REQUIRES_SHARED(Locks::mutator_lock_);
- template <bool kGrow>
- ALWAYS_INLINE bool IsOutOfMemoryOnAllocation(AllocatorType allocator_type, size_t alloc_size);
+ ALWAYS_INLINE bool IsOutOfMemoryOnAllocation(AllocatorType allocator_type,
+ size_t alloc_size,
+ bool grow);
// Run the finalizers. If timeout is non zero, then we use the VMRuntime version.
void RunFinalization(JNIEnv* env, uint64_t timeout);