blob: c81306ffd5563141d3cca75bf793fa1e316058d9 [file] [log] [blame]
Hiroshi Yamauchicf58d4a2013-09-26 14:21:22 -07001/*
2 * Copyright (C) 2013 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#ifndef ART_RUNTIME_GC_ALLOCATOR_ROSALLOC_H_
18#define ART_RUNTIME_GC_ALLOCATOR_ROSALLOC_H_
19
20#include <set>
21#include <stdint.h>
22#include <stdlib.h>
23#include <string>
24#include <sys/mman.h>
25#include <vector>
26
27#include "base/mutex.h"
28#include "base/logging.h"
29#include "globals.h"
30#include "utils.h"
31
32// A boilerplate to use hash_map/hash_set both on host and device.
33#ifdef HAVE_ANDROID_OS
34#include <hash_map>
35#include <hash_set>
36using std::hash_map;
37using std::hash_set;
38#else // HAVE_ANDROID_OS
39#ifdef __DEPRECATED
40#define ROSALLOC_OLD__DEPRECATED __DEPRECATED
41#undef __DEPRECATED
42#endif
43#include <ext/hash_map>
44#include <ext/hash_set>
45#ifdef ROSALLOC_OLD__DEPRECATED
46#define __DEPRECATED ROSALLOC_OLD__DEPRECATED
47#undef ROSALLOC_OLD__DEPRECATED
48#endif
49using __gnu_cxx::hash_map;
50using __gnu_cxx::hash_set;
51#endif // HAVE_ANDROID_OS
52
53namespace art {
54namespace gc {
55namespace allocator {
56
57// A Runs-of-slots memory allocator.
58class RosAlloc {
59 private:
60 // Rerepresents a run of free pages.
61 class FreePageRun {
62 public:
63 byte magic_num_; // The magic number used for debugging only.
64
65 bool IsFree() const {
66 if (kIsDebugBuild) {
67 return magic_num_ == kMagicNumFree;
68 }
69 return true;
70 }
71 size_t ByteSize(RosAlloc* rosalloc) const EXCLUSIVE_LOCKS_REQUIRED(rosalloc->lock_) {
72 const byte* fpr_base = reinterpret_cast<const byte*>(this);
73 size_t pm_idx = rosalloc->ToPageMapIndex(fpr_base);
74 size_t byte_size = rosalloc->free_page_run_size_map_[pm_idx];
75 DCHECK_GE(byte_size, static_cast<size_t>(0));
76 DCHECK_EQ(byte_size % kPageSize, static_cast<size_t>(0));
77 return byte_size;
78 }
79 void SetByteSize(RosAlloc* rosalloc, size_t byte_size)
80 EXCLUSIVE_LOCKS_REQUIRED(rosalloc->lock_) {
81 DCHECK_EQ(byte_size % kPageSize, static_cast<size_t>(0));
82 byte* fpr_base = reinterpret_cast<byte*>(this);
83 size_t pm_idx = rosalloc->ToPageMapIndex(fpr_base);
84 rosalloc->free_page_run_size_map_[pm_idx] = byte_size;
85 }
86 void* Begin() {
87 return reinterpret_cast<void*>(this);
88 }
89 void* End(RosAlloc* rosalloc) EXCLUSIVE_LOCKS_REQUIRED(rosalloc->lock_) {
90 byte* fpr_base = reinterpret_cast<byte*>(this);
91 byte* end = fpr_base + ByteSize(rosalloc);
92 return end;
93 }
94 void ReleasePages(RosAlloc* rosalloc) EXCLUSIVE_LOCKS_REQUIRED(rosalloc->lock_) {
95 size_t byte_size = ByteSize(rosalloc);
96 DCHECK_EQ(byte_size % kPageSize, static_cast<size_t>(0));
97 if (kIsDebugBuild) {
98 // Exclude the first page that stores the magic number.
99 DCHECK_GE(byte_size, static_cast<size_t>(kPageSize));
100 byte_size -= kPageSize;
101 if (byte_size > 0) {
102 madvise(reinterpret_cast<byte*>(this) + kPageSize, byte_size, MADV_DONTNEED);
103 }
104 } else {
105 madvise(this, byte_size, MADV_DONTNEED);
106 }
107 }
108 };
109
110 // Represents a run of memory slots of the same size.
111 //
112 // A run's memory layout:
113 //
114 // +-------------------+
115 // | magic_num |
116 // +-------------------+
117 // | size_bracket_idx |
118 // +-------------------+
119 // | is_thread_local |
120 // +-------------------+
121 // | to_be_bulk_freed |
122 // +-------------------+
123 // | top_slot_idx |
124 // +-------------------+
125 // | |
126 // | alloc bit map |
127 // | |
128 // +-------------------+
129 // | |
130 // | bulk free bit map |
131 // | |
132 // +-------------------+
133 // | |
134 // | thread-local free |
135 // | bit map |
136 // | |
137 // +-------------------+
138 // | padding due to |
139 // | alignment |
140 // +-------------------+
141 // | slot 0 |
142 // +-------------------+
143 // | slot 1 |
144 // +-------------------+
145 // | slot 2 |
146 // +-------------------+
147 // ...
148 // +-------------------+
149 // | last slot |
150 // +-------------------+
151 //
152 class Run {
153 public:
Hiroshi Yamauchie5eedcb2013-11-18 11:55:39 -0800154 byte magic_num_; // The magic number used for debugging.
155 byte size_bracket_idx_; // The index of the size bracket of this run.
156 byte is_thread_local_; // True if this run is used as a thread-local run.
157 byte to_be_bulk_freed_; // Used within BulkFree() to flag a run that's involved with a bulk free.
158 uint32_t top_slot_idx_; // The top slot index when this run is in bump index mode.
159 uint32_t alloc_bit_map_[0]; // The bit map that allocates if each slot is in use.
Hiroshi Yamauchicf58d4a2013-09-26 14:21:22 -0700160
161 // bulk_free_bit_map_[] : The bit map that is used for GC to
162 // temporarily mark the slots to free without using a lock. After
163 // all the slots to be freed in a run are marked, all those slots
164 // get freed in bulk with one locking per run, as opposed to one
165 // locking per slot to minimize the lock contention. This is used
166 // within BulkFree().
167
168 // thread_local_free_bit_map_[] : The bit map that is used for GC
169 // to temporarily mark the slots to free in a thread-local run
170 // without using a lock (without synchronizing the thread that
171 // owns the thread-local run.) When the thread-local run becomes
172 // full, the thread will check this bit map and update the
173 // allocation bit map of the run (that is, the slots get freed.)
174
175 // Returns the byte size of the header except for the bit maps.
176 static size_t fixed_header_size() {
177 Run temp;
178 size_t size = reinterpret_cast<byte*>(&temp.alloc_bit_map_) - reinterpret_cast<byte*>(&temp);
179 DCHECK_EQ(size, static_cast<size_t>(8));
180 return size;
181 }
182 // Returns the base address of the free bit map.
183 uint32_t* bulk_free_bit_map() {
184 return reinterpret_cast<uint32_t*>(reinterpret_cast<byte*>(this) + bulkFreeBitMapOffsets[size_bracket_idx_]);
185 }
186 // Returns the base address of the thread local free bit map.
187 uint32_t* thread_local_free_bit_map() {
188 return reinterpret_cast<uint32_t*>(reinterpret_cast<byte*>(this) + threadLocalFreeBitMapOffsets[size_bracket_idx_]);
189 }
190 void* End() {
191 return reinterpret_cast<byte*>(this) + kPageSize * numOfPages[size_bracket_idx_];
192 }
193 // Frees slots in the allocation bit map with regard to the
194 // thread-local free bit map. Used when a thread-local run becomes
195 // full.
196 bool MergeThreadLocalFreeBitMapToAllocBitMap(bool* is_all_free_after_out);
197 // Frees slots in the allocation bit map with regard to the bulk
198 // free bit map. Used in a bulk free.
199 void MergeBulkFreeBitMapIntoAllocBitMap();
200 // Unions the slots to be freed in the free bit map into the
201 // thread-local free bit map. In a bulk free, as a two-step
202 // process, GC will first record all the slots to free in a run in
203 // the free bit map where it can write without a lock, and later
204 // acquire a lock once per run to union the bits of the free bit
205 // map to the thread-local free bit map.
206 void UnionBulkFreeBitMapToThreadLocalFreeBitMap();
207 // Allocates a slot in a run.
208 void* AllocSlot();
209 // Frees a slot in a run. This is used in a non-bulk free.
210 void FreeSlot(void* ptr);
211 // Marks the slots to free in the bulk free bit map.
212 void MarkBulkFreeBitMap(void* ptr);
213 // Marks the slots to free in the thread-local free bit map.
214 void MarkThreadLocalFreeBitMap(void* ptr);
215 // Returns true if all the slots in the run are not in use.
216 bool IsAllFree();
217 // Returns true if all the slots in the run are in use.
218 bool IsFull();
219 // Clear all the bit maps.
220 void ClearBitMaps();
221 // Iterate over all the slots and apply the given function.
222 void InspectAllSlots(void (*handler)(void* start, void* end, size_t used_bytes, void* callback_arg), void* arg);
223 // Dump the run metadata for debugging.
224 void Dump();
225
226 private:
227 // The common part of MarkFreeBitMap() and MarkThreadLocalFreeBitMap().
228 void MarkFreeBitMapShared(void* ptr, uint32_t* free_bit_map_base, const char* caller_name);
229 };
230
231 // The magic number for a run.
232 static const byte kMagicNum = 42;
233 // The magic number for free pages.
234 static const byte kMagicNumFree = 43;
235 // The number of size brackets. Sync this with the length of Thread::rosalloc_runs_.
236 static const size_t kNumOfSizeBrackets = 34;
237 // The number of smaller size brackets that are 16 bytes apart.
238 static const size_t kNumOfQuantumSizeBrackets = 32;
239 // The sizes (the slot sizes, in bytes) of the size brackets.
240 static size_t bracketSizes[kNumOfSizeBrackets];
241 // The numbers of pages that are used for runs for each size bracket.
242 static size_t numOfPages[kNumOfSizeBrackets];
243 // The numbers of slots of the runs for each size bracket.
244 static size_t numOfSlots[kNumOfSizeBrackets];
245 // The header sizes in bytes of the runs for each size bracket.
246 static size_t headerSizes[kNumOfSizeBrackets];
247 // The byte offsets of the bulk free bit maps of the runs for each size bracket.
248 static size_t bulkFreeBitMapOffsets[kNumOfSizeBrackets];
249 // The byte offsets of the thread-local free bit maps of the runs for each size bracket.
250 static size_t threadLocalFreeBitMapOffsets[kNumOfSizeBrackets];
251
252 // Initialize the run specs (the above arrays).
253 static void Initialize();
254 static bool initialized_;
255
256 // Returns the byte size of the bracket size from the index.
257 static size_t IndexToBracketSize(size_t idx) {
258 DCHECK(idx < kNumOfSizeBrackets);
259 return bracketSizes[idx];
260 }
261 // Returns the index of the size bracket from the bracket size.
262 static size_t BracketSizeToIndex(size_t size) {
263 DCHECK(16 <= size && ((size < 1 * KB && size % 16 == 0) || size == 1 * KB || size == 2 * KB));
264 size_t idx;
265 if (UNLIKELY(size == 1 * KB)) {
266 idx = kNumOfSizeBrackets - 2;
267 } else if (UNLIKELY(size == 2 * KB)) {
268 idx = kNumOfSizeBrackets - 1;
269 } else {
270 DCHECK(size < 1 * KB);
271 DCHECK_EQ(size % 16, static_cast<size_t>(0));
272 idx = size / 16 - 1;
273 }
274 DCHECK(bracketSizes[idx] == size);
275 return idx;
276 }
277 // Rounds up the size up the nearest bracket size.
278 static size_t RoundToBracketSize(size_t size) {
279 DCHECK(size <= kLargeSizeThreshold);
280 if (LIKELY(size <= 512)) {
281 return RoundUp(size, 16);
282 } else if (512 < size && size <= 1 * KB) {
283 return 1 * KB;
284 } else {
285 DCHECK(1 * KB < size && size <= 2 * KB);
286 return 2 * KB;
287 }
288 }
289 // Returns the size bracket index from the byte size with rounding.
290 static size_t SizeToIndex(size_t size) {
291 DCHECK(size <= kLargeSizeThreshold);
292 if (LIKELY(size <= 512)) {
293 return RoundUp(size, 16) / 16 - 1;
294 } else if (512 < size && size <= 1 * KB) {
295 return kNumOfSizeBrackets - 2;
296 } else {
297 DCHECK(1 * KB < size && size <= 2 * KB);
298 return kNumOfSizeBrackets - 1;
299 }
300 }
301 // A combination of SizeToIndex() and RoundToBracketSize().
302 static size_t SizeToIndexAndBracketSize(size_t size, size_t* bracket_size_out) {
303 DCHECK(size <= kLargeSizeThreshold);
304 if (LIKELY(size <= 512)) {
305 size_t bracket_size = RoundUp(size, 16);
306 *bracket_size_out = bracket_size;
307 size_t idx = bracket_size / 16 - 1;
308 DCHECK_EQ(bracket_size, IndexToBracketSize(idx));
309 return idx;
310 } else if (512 < size && size <= 1 * KB) {
311 size_t bracket_size = 1024;
312 *bracket_size_out = bracket_size;
313 size_t idx = kNumOfSizeBrackets - 2;
314 DCHECK_EQ(bracket_size, IndexToBracketSize(idx));
315 return idx;
316 } else {
317 DCHECK(1 * KB < size && size <= 2 * KB);
318 size_t bracket_size = 2048;
319 *bracket_size_out = bracket_size;
320 size_t idx = kNumOfSizeBrackets - 1;
321 DCHECK_EQ(bracket_size, IndexToBracketSize(idx));
322 return idx;
323 }
324 }
325 // Returns the page map index from an address. Requires that the
326 // address is page size aligned.
327 size_t ToPageMapIndex(const void* addr) const {
328 DCHECK(base_ <= addr && addr < base_ + capacity_);
329 size_t byte_offset = reinterpret_cast<const byte*>(addr) - base_;
330 DCHECK_EQ(byte_offset % static_cast<size_t>(kPageSize), static_cast<size_t>(0));
331 return byte_offset / kPageSize;
332 }
333 // Returns the page map index from an address with rounding.
334 size_t RoundDownToPageMapIndex(void* addr) {
335 DCHECK(base_ <= addr && addr < reinterpret_cast<byte*>(base_) + capacity_);
336 return (reinterpret_cast<uintptr_t>(addr) - reinterpret_cast<uintptr_t>(base_)) / kPageSize;
337 }
338
339 // A memory allocation request larger than this size is treated as a large object and allocated
340 // at a page-granularity.
341 static const size_t kLargeSizeThreshold = 2048;
342
343 // We use use thread-local runs for the size Brackets whose indexes
344 // are less than or equal to this index. We use shared (current)
345 // runs for the rest.
346 static const size_t kMaxThreadLocalSizeBracketIdx = 10;
347
348 struct hash_run {
349 size_t operator()(const RosAlloc::Run* r) const {
350 return reinterpret_cast<size_t>(r);
351 }
352 };
353
354 struct eq_run {
355 bool operator()(const RosAlloc::Run* r1, const RosAlloc::Run* r2) const {
356 return r1 == r2;
357 }
358 };
359
360 // The base address of the memory region that's managed by this allocator.
361 byte* base_;
362
363 // The footprint in bytes of the currently allocated portion of the
364 // memory region.
365 size_t footprint_;
366
367 // The maximum footprint. The address, base_ + capacity_, indicates
368 // the end of the memory region that's managed by this allocator.
369 size_t capacity_;
370
371 // The run sets that hold the runs whose slots are not all
372 // full. non_full_runs_[i] is guarded by size_bracket_locks_[i].
373 std::set<Run*> non_full_runs_[kNumOfSizeBrackets];
374 // The run sets that hold the runs whose slots are all full. This is
375 // debug only. full_runs_[i] is guarded by size_bracket_locks_[i].
376 hash_set<Run*, hash_run, eq_run> full_runs_[kNumOfSizeBrackets];
377 // The set of free pages.
378 std::set<FreePageRun*> free_page_runs_ GUARDED_BY(lock_);
379 // The free page run whose end address is the end of the memory
380 // region that's managed by this allocator, if any.
381 FreePageRun* last_free_page_run_;
382 // The current runs where the allocations are first attempted for
383 // the size brackes that do not use thread-local
384 // runs. current_runs_[i] is guarded by size_bracket_locks_[i].
385 Run* current_runs_[kNumOfSizeBrackets];
386 // The mutexes, one per size bracket.
387 Mutex* size_bracket_locks_[kNumOfSizeBrackets];
388 // The types of page map entries.
389 enum {
390 kPageMapEmpty = 0, // Not allocated.
391 kPageMapRun = 1, // The beginning of a run.
392 kPageMapRunPart = 2, // The non-beginning part of a run.
393 kPageMapLargeObject = 3, // The beginning of a large object.
394 kPageMapLargeObjectPart = 4, // The non-beginning part of a large object.
395 };
396 // The table that indicates what pages are currently used for.
397 std::vector<byte> page_map_ GUARDED_BY(lock_);
398 // The table that indicates the size of free page runs. These sizes
399 // are stored here to avoid storing in the free page header and
400 // release backing pages.
401 std::vector<size_t> free_page_run_size_map_ GUARDED_BY(lock_);
402 // The global lock. Used to guard the page map, the free page set,
403 // and the footprint.
404 Mutex lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
405 // The reader-writer lock to allow one bulk free at a time while
406 // allowing multiple individual frees at the same time.
407 ReaderWriterMutex bulk_free_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
408
409 // The base address of the memory region that's managed by this allocator.
410 byte* Begin() { return base_; }
411 // The end address of the memory region that's managed by this allocator.
412 byte* End() { return base_ + capacity_; }
413
414 // Page-granularity alloc/free
415 void* AllocPages(Thread* self, size_t num_pages, byte page_map_type)
416 EXCLUSIVE_LOCKS_REQUIRED(lock_);
417 void FreePages(Thread* self, void* ptr) EXCLUSIVE_LOCKS_REQUIRED(lock_);
418
419 // Allocate/free a run slot.
420 void* AllocFromRun(Thread* self, size_t size, size_t* bytes_allocated)
421 LOCKS_EXCLUDED(lock_);
422 void FreeFromRun(Thread* self, void* ptr, Run* run)
423 LOCKS_EXCLUDED(lock_);
424
425 // Used to acquire a new/reused run for a size bracket. Used when a
426 // thread-local or current run gets full.
427 Run* RefillRun(Thread* self, size_t idx) LOCKS_EXCLUDED(lock_);
428
429 // The internal of non-bulk Free().
430 void FreeInternal(Thread* self, void* ptr) LOCKS_EXCLUDED(lock_);
431
432 public:
433 RosAlloc(void* base, size_t capacity);
434 void* Alloc(Thread* self, size_t size, size_t* bytes_allocated)
435 LOCKS_EXCLUDED(lock_);
436 void Free(Thread* self, void* ptr)
437 LOCKS_EXCLUDED(bulk_free_lock_);
438 void BulkFree(Thread* self, void** ptrs, size_t num_ptrs)
439 LOCKS_EXCLUDED(bulk_free_lock_);
440 // Returns the size of the allocated slot for a given allocated memory chunk.
441 size_t UsableSize(void* ptr);
442 // Returns the size of the allocated slot for a given size.
443 size_t UsableSize(size_t bytes) {
444 if (UNLIKELY(bytes > kLargeSizeThreshold)) {
445 return RoundUp(bytes, kPageSize);
446 } else {
447 return RoundToBracketSize(bytes);
448 }
449 }
450 // Try to reduce the current footprint by releasing the free page
451 // run at the end of the memory region, if any.
452 bool Trim();
453 // Iterates over all the memory slots and apply the given function.
454 void InspectAll(void (*handler)(void* start, void* end, size_t used_bytes, void* callback_arg),
455 void* arg)
456 LOCKS_EXCLUDED(lock_);
457 // Returns the current footprint.
458 size_t Footprint() LOCKS_EXCLUDED(lock_);
459 // Returns the current capacity, maximum footprint.
460 size_t FootprintLimit() LOCKS_EXCLUDED(lock_);
461 // Update the current capacity.
462 void SetFootprintLimit(size_t bytes) LOCKS_EXCLUDED(lock_);
463 // Releases the thread-local runs assigned to the given thread back to the common set of runs.
464 void RevokeThreadLocalRuns(Thread* thread);
465 // Releases the thread-local runs assigned to all the threads back to the common set of runs.
466 void RevokeAllThreadLocalRuns() LOCKS_EXCLUDED(Locks::thread_list_lock_);
467 // Dumps the page map for debugging.
468 void DumpPageMap(Thread* self);
469
470 // Callbacks for InspectAll that will count the number of bytes
471 // allocated and objects allocated, respectively.
472 static void BytesAllocatedCallback(void* start, void* end, size_t used_bytes, void* arg);
473 static void ObjectsAllocatedCallback(void* start, void* end, size_t used_bytes, void* arg);
474};
475
476} // namespace allocator
477} // namespace gc
478} // namespace art
479
480#endif // ART_RUNTIME_GC_ALLOCATOR_ROSALLOC_H_