blob: d5b6de1637ab3d571e053ce47b19530c8ad7ac7d [file] [log] [blame]
Hiroshi Yamauchicf58d4a2013-09-26 14:21:22 -07001/*
2 * Copyright (C) 2013 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#ifndef ART_RUNTIME_GC_ALLOCATOR_ROSALLOC_H_
18#define ART_RUNTIME_GC_ALLOCATOR_ROSALLOC_H_
19
20#include <set>
21#include <stdint.h>
22#include <stdlib.h>
23#include <string>
24#include <sys/mman.h>
25#include <vector>
26
27#include "base/mutex.h"
28#include "base/logging.h"
29#include "globals.h"
30#include "utils.h"
31
32// A boilerplate to use hash_map/hash_set both on host and device.
33#ifdef HAVE_ANDROID_OS
34#include <hash_map>
35#include <hash_set>
36using std::hash_map;
37using std::hash_set;
38#else // HAVE_ANDROID_OS
39#ifdef __DEPRECATED
40#define ROSALLOC_OLD__DEPRECATED __DEPRECATED
41#undef __DEPRECATED
42#endif
43#include <ext/hash_map>
44#include <ext/hash_set>
45#ifdef ROSALLOC_OLD__DEPRECATED
46#define __DEPRECATED ROSALLOC_OLD__DEPRECATED
47#undef ROSALLOC_OLD__DEPRECATED
48#endif
49using __gnu_cxx::hash_map;
50using __gnu_cxx::hash_set;
51#endif // HAVE_ANDROID_OS
52
53namespace art {
54namespace gc {
55namespace allocator {
56
57// A Runs-of-slots memory allocator.
58class RosAlloc {
59 private:
60 // Rerepresents a run of free pages.
61 class FreePageRun {
62 public:
63 byte magic_num_; // The magic number used for debugging only.
64
65 bool IsFree() const {
66 if (kIsDebugBuild) {
67 return magic_num_ == kMagicNumFree;
68 }
69 return true;
70 }
71 size_t ByteSize(RosAlloc* rosalloc) const EXCLUSIVE_LOCKS_REQUIRED(rosalloc->lock_) {
72 const byte* fpr_base = reinterpret_cast<const byte*>(this);
73 size_t pm_idx = rosalloc->ToPageMapIndex(fpr_base);
74 size_t byte_size = rosalloc->free_page_run_size_map_[pm_idx];
75 DCHECK_GE(byte_size, static_cast<size_t>(0));
76 DCHECK_EQ(byte_size % kPageSize, static_cast<size_t>(0));
77 return byte_size;
78 }
79 void SetByteSize(RosAlloc* rosalloc, size_t byte_size)
80 EXCLUSIVE_LOCKS_REQUIRED(rosalloc->lock_) {
81 DCHECK_EQ(byte_size % kPageSize, static_cast<size_t>(0));
82 byte* fpr_base = reinterpret_cast<byte*>(this);
83 size_t pm_idx = rosalloc->ToPageMapIndex(fpr_base);
84 rosalloc->free_page_run_size_map_[pm_idx] = byte_size;
85 }
86 void* Begin() {
87 return reinterpret_cast<void*>(this);
88 }
89 void* End(RosAlloc* rosalloc) EXCLUSIVE_LOCKS_REQUIRED(rosalloc->lock_) {
90 byte* fpr_base = reinterpret_cast<byte*>(this);
91 byte* end = fpr_base + ByteSize(rosalloc);
92 return end;
93 }
94 void ReleasePages(RosAlloc* rosalloc) EXCLUSIVE_LOCKS_REQUIRED(rosalloc->lock_) {
95 size_t byte_size = ByteSize(rosalloc);
96 DCHECK_EQ(byte_size % kPageSize, static_cast<size_t>(0));
97 if (kIsDebugBuild) {
98 // Exclude the first page that stores the magic number.
99 DCHECK_GE(byte_size, static_cast<size_t>(kPageSize));
100 byte_size -= kPageSize;
101 if (byte_size > 0) {
102 madvise(reinterpret_cast<byte*>(this) + kPageSize, byte_size, MADV_DONTNEED);
103 }
104 } else {
105 madvise(this, byte_size, MADV_DONTNEED);
106 }
107 }
108 };
109
110 // Represents a run of memory slots of the same size.
111 //
112 // A run's memory layout:
113 //
114 // +-------------------+
115 // | magic_num |
116 // +-------------------+
117 // | size_bracket_idx |
118 // +-------------------+
119 // | is_thread_local |
120 // +-------------------+
121 // | to_be_bulk_freed |
122 // +-------------------+
123 // | top_slot_idx |
124 // +-------------------+
125 // | |
126 // | alloc bit map |
127 // | |
128 // +-------------------+
129 // | |
130 // | bulk free bit map |
131 // | |
132 // +-------------------+
133 // | |
134 // | thread-local free |
135 // | bit map |
136 // | |
137 // +-------------------+
138 // | padding due to |
139 // | alignment |
140 // +-------------------+
141 // | slot 0 |
142 // +-------------------+
143 // | slot 1 |
144 // +-------------------+
145 // | slot 2 |
146 // +-------------------+
147 // ...
148 // +-------------------+
149 // | last slot |
150 // +-------------------+
151 //
152 class Run {
153 public:
Hiroshi Yamauchie5eedcb2013-11-18 11:55:39 -0800154 byte magic_num_; // The magic number used for debugging.
155 byte size_bracket_idx_; // The index of the size bracket of this run.
156 byte is_thread_local_; // True if this run is used as a thread-local run.
157 byte to_be_bulk_freed_; // Used within BulkFree() to flag a run that's involved with a bulk free.
158 uint32_t top_slot_idx_; // The top slot index when this run is in bump index mode.
159 uint32_t alloc_bit_map_[0]; // The bit map that allocates if each slot is in use.
Hiroshi Yamauchicf58d4a2013-09-26 14:21:22 -0700160
161 // bulk_free_bit_map_[] : The bit map that is used for GC to
162 // temporarily mark the slots to free without using a lock. After
163 // all the slots to be freed in a run are marked, all those slots
164 // get freed in bulk with one locking per run, as opposed to one
165 // locking per slot to minimize the lock contention. This is used
166 // within BulkFree().
167
168 // thread_local_free_bit_map_[] : The bit map that is used for GC
169 // to temporarily mark the slots to free in a thread-local run
170 // without using a lock (without synchronizing the thread that
171 // owns the thread-local run.) When the thread-local run becomes
172 // full, the thread will check this bit map and update the
173 // allocation bit map of the run (that is, the slots get freed.)
174
175 // Returns the byte size of the header except for the bit maps.
176 static size_t fixed_header_size() {
177 Run temp;
178 size_t size = reinterpret_cast<byte*>(&temp.alloc_bit_map_) - reinterpret_cast<byte*>(&temp);
179 DCHECK_EQ(size, static_cast<size_t>(8));
180 return size;
181 }
182 // Returns the base address of the free bit map.
183 uint32_t* bulk_free_bit_map() {
184 return reinterpret_cast<uint32_t*>(reinterpret_cast<byte*>(this) + bulkFreeBitMapOffsets[size_bracket_idx_]);
185 }
186 // Returns the base address of the thread local free bit map.
187 uint32_t* thread_local_free_bit_map() {
188 return reinterpret_cast<uint32_t*>(reinterpret_cast<byte*>(this) + threadLocalFreeBitMapOffsets[size_bracket_idx_]);
189 }
190 void* End() {
191 return reinterpret_cast<byte*>(this) + kPageSize * numOfPages[size_bracket_idx_];
192 }
193 // Frees slots in the allocation bit map with regard to the
194 // thread-local free bit map. Used when a thread-local run becomes
195 // full.
196 bool MergeThreadLocalFreeBitMapToAllocBitMap(bool* is_all_free_after_out);
197 // Frees slots in the allocation bit map with regard to the bulk
198 // free bit map. Used in a bulk free.
199 void MergeBulkFreeBitMapIntoAllocBitMap();
200 // Unions the slots to be freed in the free bit map into the
201 // thread-local free bit map. In a bulk free, as a two-step
202 // process, GC will first record all the slots to free in a run in
203 // the free bit map where it can write without a lock, and later
204 // acquire a lock once per run to union the bits of the free bit
205 // map to the thread-local free bit map.
206 void UnionBulkFreeBitMapToThreadLocalFreeBitMap();
207 // Allocates a slot in a run.
208 void* AllocSlot();
209 // Frees a slot in a run. This is used in a non-bulk free.
210 void FreeSlot(void* ptr);
211 // Marks the slots to free in the bulk free bit map.
212 void MarkBulkFreeBitMap(void* ptr);
213 // Marks the slots to free in the thread-local free bit map.
214 void MarkThreadLocalFreeBitMap(void* ptr);
215 // Returns true if all the slots in the run are not in use.
216 bool IsAllFree();
217 // Returns true if all the slots in the run are in use.
218 bool IsFull();
219 // Clear all the bit maps.
220 void ClearBitMaps();
221 // Iterate over all the slots and apply the given function.
222 void InspectAllSlots(void (*handler)(void* start, void* end, size_t used_bytes, void* callback_arg), void* arg);
223 // Dump the run metadata for debugging.
224 void Dump();
225
226 private:
227 // The common part of MarkFreeBitMap() and MarkThreadLocalFreeBitMap().
228 void MarkFreeBitMapShared(void* ptr, uint32_t* free_bit_map_base, const char* caller_name);
229 };
230
231 // The magic number for a run.
232 static const byte kMagicNum = 42;
233 // The magic number for free pages.
234 static const byte kMagicNumFree = 43;
235 // The number of size brackets. Sync this with the length of Thread::rosalloc_runs_.
236 static const size_t kNumOfSizeBrackets = 34;
237 // The number of smaller size brackets that are 16 bytes apart.
238 static const size_t kNumOfQuantumSizeBrackets = 32;
239 // The sizes (the slot sizes, in bytes) of the size brackets.
240 static size_t bracketSizes[kNumOfSizeBrackets];
241 // The numbers of pages that are used for runs for each size bracket.
242 static size_t numOfPages[kNumOfSizeBrackets];
243 // The numbers of slots of the runs for each size bracket.
244 static size_t numOfSlots[kNumOfSizeBrackets];
245 // The header sizes in bytes of the runs for each size bracket.
246 static size_t headerSizes[kNumOfSizeBrackets];
247 // The byte offsets of the bulk free bit maps of the runs for each size bracket.
248 static size_t bulkFreeBitMapOffsets[kNumOfSizeBrackets];
249 // The byte offsets of the thread-local free bit maps of the runs for each size bracket.
250 static size_t threadLocalFreeBitMapOffsets[kNumOfSizeBrackets];
251
252 // Initialize the run specs (the above arrays).
253 static void Initialize();
254 static bool initialized_;
255
256 // Returns the byte size of the bracket size from the index.
257 static size_t IndexToBracketSize(size_t idx) {
258 DCHECK(idx < kNumOfSizeBrackets);
259 return bracketSizes[idx];
260 }
261 // Returns the index of the size bracket from the bracket size.
262 static size_t BracketSizeToIndex(size_t size) {
263 DCHECK(16 <= size && ((size < 1 * KB && size % 16 == 0) || size == 1 * KB || size == 2 * KB));
264 size_t idx;
265 if (UNLIKELY(size == 1 * KB)) {
266 idx = kNumOfSizeBrackets - 2;
267 } else if (UNLIKELY(size == 2 * KB)) {
268 idx = kNumOfSizeBrackets - 1;
269 } else {
270 DCHECK(size < 1 * KB);
271 DCHECK_EQ(size % 16, static_cast<size_t>(0));
272 idx = size / 16 - 1;
273 }
274 DCHECK(bracketSizes[idx] == size);
275 return idx;
276 }
277 // Rounds up the size up the nearest bracket size.
278 static size_t RoundToBracketSize(size_t size) {
279 DCHECK(size <= kLargeSizeThreshold);
280 if (LIKELY(size <= 512)) {
281 return RoundUp(size, 16);
282 } else if (512 < size && size <= 1 * KB) {
283 return 1 * KB;
284 } else {
285 DCHECK(1 * KB < size && size <= 2 * KB);
286 return 2 * KB;
287 }
288 }
289 // Returns the size bracket index from the byte size with rounding.
290 static size_t SizeToIndex(size_t size) {
291 DCHECK(size <= kLargeSizeThreshold);
292 if (LIKELY(size <= 512)) {
293 return RoundUp(size, 16) / 16 - 1;
294 } else if (512 < size && size <= 1 * KB) {
295 return kNumOfSizeBrackets - 2;
296 } else {
297 DCHECK(1 * KB < size && size <= 2 * KB);
298 return kNumOfSizeBrackets - 1;
299 }
300 }
301 // A combination of SizeToIndex() and RoundToBracketSize().
302 static size_t SizeToIndexAndBracketSize(size_t size, size_t* bracket_size_out) {
303 DCHECK(size <= kLargeSizeThreshold);
304 if (LIKELY(size <= 512)) {
305 size_t bracket_size = RoundUp(size, 16);
306 *bracket_size_out = bracket_size;
307 size_t idx = bracket_size / 16 - 1;
308 DCHECK_EQ(bracket_size, IndexToBracketSize(idx));
309 return idx;
310 } else if (512 < size && size <= 1 * KB) {
311 size_t bracket_size = 1024;
312 *bracket_size_out = bracket_size;
313 size_t idx = kNumOfSizeBrackets - 2;
314 DCHECK_EQ(bracket_size, IndexToBracketSize(idx));
315 return idx;
316 } else {
317 DCHECK(1 * KB < size && size <= 2 * KB);
318 size_t bracket_size = 2048;
319 *bracket_size_out = bracket_size;
320 size_t idx = kNumOfSizeBrackets - 1;
321 DCHECK_EQ(bracket_size, IndexToBracketSize(idx));
322 return idx;
323 }
324 }
325 // Returns the page map index from an address. Requires that the
326 // address is page size aligned.
327 size_t ToPageMapIndex(const void* addr) const {
328 DCHECK(base_ <= addr && addr < base_ + capacity_);
329 size_t byte_offset = reinterpret_cast<const byte*>(addr) - base_;
330 DCHECK_EQ(byte_offset % static_cast<size_t>(kPageSize), static_cast<size_t>(0));
331 return byte_offset / kPageSize;
332 }
333 // Returns the page map index from an address with rounding.
334 size_t RoundDownToPageMapIndex(void* addr) {
335 DCHECK(base_ <= addr && addr < reinterpret_cast<byte*>(base_) + capacity_);
336 return (reinterpret_cast<uintptr_t>(addr) - reinterpret_cast<uintptr_t>(base_)) / kPageSize;
337 }
338
339 // A memory allocation request larger than this size is treated as a large object and allocated
340 // at a page-granularity.
341 static const size_t kLargeSizeThreshold = 2048;
342
343 // We use use thread-local runs for the size Brackets whose indexes
344 // are less than or equal to this index. We use shared (current)
345 // runs for the rest.
346 static const size_t kMaxThreadLocalSizeBracketIdx = 10;
347
Hiroshi Yamauchi3c2856e2013-11-22 13:42:53 -0800348 // If true, check that the returned memory is actually zero.
349 static constexpr bool kCheckZeroMemory = kIsDebugBuild;
350
351 // If true, log verbose details of operations.
352 static constexpr bool kTraceRosAlloc = false;
353
Hiroshi Yamauchicf58d4a2013-09-26 14:21:22 -0700354 struct hash_run {
355 size_t operator()(const RosAlloc::Run* r) const {
356 return reinterpret_cast<size_t>(r);
357 }
358 };
359
360 struct eq_run {
361 bool operator()(const RosAlloc::Run* r1, const RosAlloc::Run* r2) const {
362 return r1 == r2;
363 }
364 };
365
366 // The base address of the memory region that's managed by this allocator.
367 byte* base_;
368
369 // The footprint in bytes of the currently allocated portion of the
370 // memory region.
371 size_t footprint_;
372
373 // The maximum footprint. The address, base_ + capacity_, indicates
374 // the end of the memory region that's managed by this allocator.
375 size_t capacity_;
376
377 // The run sets that hold the runs whose slots are not all
378 // full. non_full_runs_[i] is guarded by size_bracket_locks_[i].
379 std::set<Run*> non_full_runs_[kNumOfSizeBrackets];
380 // The run sets that hold the runs whose slots are all full. This is
381 // debug only. full_runs_[i] is guarded by size_bracket_locks_[i].
382 hash_set<Run*, hash_run, eq_run> full_runs_[kNumOfSizeBrackets];
383 // The set of free pages.
384 std::set<FreePageRun*> free_page_runs_ GUARDED_BY(lock_);
385 // The free page run whose end address is the end of the memory
386 // region that's managed by this allocator, if any.
387 FreePageRun* last_free_page_run_;
388 // The current runs where the allocations are first attempted for
389 // the size brackes that do not use thread-local
390 // runs. current_runs_[i] is guarded by size_bracket_locks_[i].
391 Run* current_runs_[kNumOfSizeBrackets];
392 // The mutexes, one per size bracket.
393 Mutex* size_bracket_locks_[kNumOfSizeBrackets];
394 // The types of page map entries.
395 enum {
396 kPageMapEmpty = 0, // Not allocated.
397 kPageMapRun = 1, // The beginning of a run.
398 kPageMapRunPart = 2, // The non-beginning part of a run.
399 kPageMapLargeObject = 3, // The beginning of a large object.
400 kPageMapLargeObjectPart = 4, // The non-beginning part of a large object.
401 };
402 // The table that indicates what pages are currently used for.
403 std::vector<byte> page_map_ GUARDED_BY(lock_);
404 // The table that indicates the size of free page runs. These sizes
405 // are stored here to avoid storing in the free page header and
406 // release backing pages.
407 std::vector<size_t> free_page_run_size_map_ GUARDED_BY(lock_);
408 // The global lock. Used to guard the page map, the free page set,
409 // and the footprint.
410 Mutex lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
411 // The reader-writer lock to allow one bulk free at a time while
412 // allowing multiple individual frees at the same time.
413 ReaderWriterMutex bulk_free_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
414
415 // The base address of the memory region that's managed by this allocator.
416 byte* Begin() { return base_; }
417 // The end address of the memory region that's managed by this allocator.
418 byte* End() { return base_ + capacity_; }
419
420 // Page-granularity alloc/free
421 void* AllocPages(Thread* self, size_t num_pages, byte page_map_type)
422 EXCLUSIVE_LOCKS_REQUIRED(lock_);
423 void FreePages(Thread* self, void* ptr) EXCLUSIVE_LOCKS_REQUIRED(lock_);
424
425 // Allocate/free a run slot.
426 void* AllocFromRun(Thread* self, size_t size, size_t* bytes_allocated)
427 LOCKS_EXCLUDED(lock_);
428 void FreeFromRun(Thread* self, void* ptr, Run* run)
429 LOCKS_EXCLUDED(lock_);
430
431 // Used to acquire a new/reused run for a size bracket. Used when a
432 // thread-local or current run gets full.
433 Run* RefillRun(Thread* self, size_t idx) LOCKS_EXCLUDED(lock_);
434
435 // The internal of non-bulk Free().
436 void FreeInternal(Thread* self, void* ptr) LOCKS_EXCLUDED(lock_);
437
Hiroshi Yamauchi3c2856e2013-11-22 13:42:53 -0800438 // Allocates large objects.
439 void* AllocLargeObject(Thread* self, size_t size, size_t* bytes_allocated) LOCKS_EXCLUDED(lock_);
440
Hiroshi Yamauchicf58d4a2013-09-26 14:21:22 -0700441 public:
442 RosAlloc(void* base, size_t capacity);
443 void* Alloc(Thread* self, size_t size, size_t* bytes_allocated)
444 LOCKS_EXCLUDED(lock_);
445 void Free(Thread* self, void* ptr)
446 LOCKS_EXCLUDED(bulk_free_lock_);
447 void BulkFree(Thread* self, void** ptrs, size_t num_ptrs)
448 LOCKS_EXCLUDED(bulk_free_lock_);
449 // Returns the size of the allocated slot for a given allocated memory chunk.
450 size_t UsableSize(void* ptr);
451 // Returns the size of the allocated slot for a given size.
452 size_t UsableSize(size_t bytes) {
453 if (UNLIKELY(bytes > kLargeSizeThreshold)) {
454 return RoundUp(bytes, kPageSize);
455 } else {
456 return RoundToBracketSize(bytes);
457 }
458 }
459 // Try to reduce the current footprint by releasing the free page
460 // run at the end of the memory region, if any.
461 bool Trim();
462 // Iterates over all the memory slots and apply the given function.
463 void InspectAll(void (*handler)(void* start, void* end, size_t used_bytes, void* callback_arg),
464 void* arg)
465 LOCKS_EXCLUDED(lock_);
466 // Returns the current footprint.
467 size_t Footprint() LOCKS_EXCLUDED(lock_);
468 // Returns the current capacity, maximum footprint.
469 size_t FootprintLimit() LOCKS_EXCLUDED(lock_);
470 // Update the current capacity.
471 void SetFootprintLimit(size_t bytes) LOCKS_EXCLUDED(lock_);
472 // Releases the thread-local runs assigned to the given thread back to the common set of runs.
473 void RevokeThreadLocalRuns(Thread* thread);
474 // Releases the thread-local runs assigned to all the threads back to the common set of runs.
475 void RevokeAllThreadLocalRuns() LOCKS_EXCLUDED(Locks::thread_list_lock_);
476 // Dumps the page map for debugging.
477 void DumpPageMap(Thread* self);
478
479 // Callbacks for InspectAll that will count the number of bytes
480 // allocated and objects allocated, respectively.
481 static void BytesAllocatedCallback(void* start, void* end, size_t used_bytes, void* arg);
482 static void ObjectsAllocatedCallback(void* start, void* end, size_t used_bytes, void* arg);
483};
484
485} // namespace allocator
486} // namespace gc
487} // namespace art
488
489#endif // ART_RUNTIME_GC_ALLOCATOR_ROSALLOC_H_