mirror of
				https://github.com/python/cpython.git
				synced 2025-10-31 10:26:02 +00:00 
			
		
		
		
	gh-112532: Improve mimalloc page visiting (#114133)
This adds support for visiting abandoned pages in mimalloc and improves the performance of the page visiting code. Abandoned pages contain memory blocks from threads that have exited. At some point, they may be later reclaimed by other threads. We still need to visit those pages in the free-threaded GC because they contain live objects. This also reduces the overhead of visiting mimalloc pages: * Special cases for full, empty, and pages containing only a single block. * Fix free_map to use one bit instead of one byte per block. * Use fast integer division by a constant algorithm when computing block offset from block size and index.
This commit is contained in:
		
							parent
							
								
									e45bae7a45
								
							
						
					
					
						commit
						412920a41e
					
				
					 3 changed files with 134 additions and 34 deletions
				
			
		|  | @ -120,6 +120,8 @@ void       _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* | ||||||
| void       _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld); | void       _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld); | ||||||
| bool       _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segments_tld_t* tld); | bool       _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segments_tld_t* tld); | ||||||
| void       _mi_segment_thread_collect(mi_segments_tld_t* tld); | void       _mi_segment_thread_collect(mi_segments_tld_t* tld); | ||||||
|  | bool       _mi_abandoned_pool_visit_blocks(mi_abandoned_pool_t* pool, uint8_t page_tag, bool visit_blocks, mi_block_visit_fun* visitor, void* arg); | ||||||
|  | 
 | ||||||
| 
 | 
 | ||||||
| #if MI_HUGE_PAGE_ABANDON | #if MI_HUGE_PAGE_ABANDON | ||||||
| void       _mi_segment_huge_page_free(mi_segment_t* segment, mi_page_t* page, mi_block_t* block); | void       _mi_segment_huge_page_free(mi_segment_t* segment, mi_page_t* page, mi_block_t* block); | ||||||
|  | @ -161,6 +163,8 @@ void       _mi_heap_collect_abandon(mi_heap_t* heap); | ||||||
| void       _mi_heap_set_default_direct(mi_heap_t* heap); | void       _mi_heap_set_default_direct(mi_heap_t* heap); | ||||||
| bool       _mi_heap_memid_is_suitable(mi_heap_t* heap, mi_memid_t memid); | bool       _mi_heap_memid_is_suitable(mi_heap_t* heap, mi_memid_t memid); | ||||||
| void       _mi_heap_unsafe_destroy_all(void); | void       _mi_heap_unsafe_destroy_all(void); | ||||||
|  | void       _mi_heap_area_init(mi_heap_area_t* area, mi_page_t* page); | ||||||
|  | bool       _mi_heap_area_visit_blocks(const mi_heap_area_t* area, mi_page_t *page, mi_block_visit_fun* visitor, void* arg); | ||||||
| 
 | 
 | ||||||
| // "stats.c"
 | // "stats.c"
 | ||||||
| void       _mi_stats_done(mi_stats_t* stats); | void       _mi_stats_done(mi_stats_t* stats); | ||||||
|  |  | ||||||
|  | @ -26,7 +26,7 @@ typedef bool (heap_page_visitor_fun)(mi_heap_t* heap, mi_page_queue_t* pq, mi_pa | ||||||
| // Visit all pages in a heap; returns `false` if break was called.
 | // Visit all pages in a heap; returns `false` if break was called.
 | ||||||
| static bool mi_heap_visit_pages(mi_heap_t* heap, heap_page_visitor_fun* fn, void* arg1, void* arg2) | static bool mi_heap_visit_pages(mi_heap_t* heap, heap_page_visitor_fun* fn, void* arg1, void* arg2) | ||||||
| { | { | ||||||
|   if (heap==NULL || heap->page_count==0) return 0; |   if (heap==NULL || heap->page_count==0) return true; | ||||||
| 
 | 
 | ||||||
|   // visit all pages
 |   // visit all pages
 | ||||||
|   #if MI_DEBUG>1 |   #if MI_DEBUG>1 | ||||||
|  | @ -521,11 +521,20 @@ typedef struct mi_heap_area_ex_s { | ||||||
|   mi_page_t*     page; |   mi_page_t*     page; | ||||||
| } mi_heap_area_ex_t; | } mi_heap_area_ex_t; | ||||||
| 
 | 
 | ||||||
| static bool mi_heap_area_visit_blocks(const mi_heap_area_ex_t* xarea, mi_block_visit_fun* visitor, void* arg) { | static void mi_fast_divisor(size_t divisor, size_t* magic, size_t* shift) { | ||||||
|   mi_assert(xarea != NULL); |   mi_assert_internal(divisor > 0 && divisor <= UINT32_MAX); | ||||||
|   if (xarea==NULL) return true; |   *shift = MI_INTPTR_BITS - mi_clz(divisor - 1); | ||||||
|   const mi_heap_area_t* area = &xarea->area; |   *magic = (size_t)(((1ULL << 32) * ((1ULL << *shift) - divisor)) / divisor + 1); | ||||||
|   mi_page_t* page = xarea->page; | } | ||||||
|  | 
 | ||||||
|  | static size_t mi_fast_divide(size_t n, size_t magic, size_t shift) { | ||||||
|  |   mi_assert_internal(n <= UINT32_MAX); | ||||||
|  |   return ((((uint64_t) n * magic) >> 32) + n) >> shift; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | bool _mi_heap_area_visit_blocks(const mi_heap_area_t* area, mi_page_t *page, mi_block_visit_fun* visitor, void* arg) { | ||||||
|  |   mi_assert(area != NULL); | ||||||
|  |   if (area==NULL) return true; | ||||||
|   mi_assert(page != NULL); |   mi_assert(page != NULL); | ||||||
|   if (page == NULL) return true; |   if (page == NULL) return true; | ||||||
| 
 | 
 | ||||||
|  | @ -537,17 +546,39 @@ static bool mi_heap_area_visit_blocks(const mi_heap_area_ex_t* xarea, mi_block_v | ||||||
|   const size_t ubsize = mi_page_usable_block_size(page); // without padding
 |   const size_t ubsize = mi_page_usable_block_size(page); // without padding
 | ||||||
|   size_t   psize; |   size_t   psize; | ||||||
|   uint8_t* pstart = _mi_page_start(_mi_page_segment(page), page, &psize); |   uint8_t* pstart = _mi_page_start(_mi_page_segment(page), page, &psize); | ||||||
|  |   mi_heap_t* heap = mi_page_heap(page); | ||||||
| 
 | 
 | ||||||
|   if (page->capacity == 1) { |   if (page->capacity == 1) { | ||||||
|     // optimize page with one block
 |     // optimize page with one block
 | ||||||
|     mi_assert_internal(page->used == 1 && page->free == NULL); |     mi_assert_internal(page->used == 1 && page->free == NULL); | ||||||
|     return visitor(mi_page_heap(page), area, pstart, ubsize, arg); |     return visitor(heap, area, pstart, ubsize, arg); | ||||||
|  |   } | ||||||
|  | 
 | ||||||
|  |   if (page->used == page->capacity) { | ||||||
|  |     // optimize full pages
 | ||||||
|  |     uint8_t* block = pstart; | ||||||
|  |     for (size_t i = 0; i < page->capacity; i++) { | ||||||
|  |         if (!visitor(heap, area, block, ubsize, arg)) return false; | ||||||
|  |         block += bsize; | ||||||
|  |     } | ||||||
|  |     return true; | ||||||
|   } |   } | ||||||
| 
 | 
 | ||||||
|   // create a bitmap of free blocks.
 |   // create a bitmap of free blocks.
 | ||||||
|   #define MI_MAX_BLOCKS   (MI_SMALL_PAGE_SIZE / sizeof(void*)) |   #define MI_MAX_BLOCKS   (MI_SMALL_PAGE_SIZE / sizeof(void*)) | ||||||
|   uintptr_t free_map[MI_MAX_BLOCKS / sizeof(uintptr_t)]; |   uintptr_t free_map[MI_MAX_BLOCKS / MI_INTPTR_BITS]; | ||||||
|   memset(free_map, 0, sizeof(free_map)); |   size_t bmapsize = (page->capacity + MI_INTPTR_BITS - 1) / MI_INTPTR_BITS; | ||||||
|  |   memset(free_map, 0, bmapsize * sizeof(uintptr_t)); | ||||||
|  | 
 | ||||||
|  |   if (page->capacity % MI_INTPTR_BITS != 0) { | ||||||
|  |     size_t shift = (page->capacity % MI_INTPTR_BITS); | ||||||
|  |     uintptr_t mask = (UINTPTR_MAX << shift); | ||||||
|  |     free_map[bmapsize-1] = mask; | ||||||
|  |   } | ||||||
|  | 
 | ||||||
|  |   // fast repeated division by the block size
 | ||||||
|  |   size_t magic, shift; | ||||||
|  |   mi_fast_divisor(bsize, &magic, &shift); | ||||||
| 
 | 
 | ||||||
|   #if MI_DEBUG>1 |   #if MI_DEBUG>1 | ||||||
|   size_t free_count = 0; |   size_t free_count = 0; | ||||||
|  | @ -559,10 +590,11 @@ static bool mi_heap_area_visit_blocks(const mi_heap_area_ex_t* xarea, mi_block_v | ||||||
|     mi_assert_internal((uint8_t*)block >= pstart && (uint8_t*)block < (pstart + psize)); |     mi_assert_internal((uint8_t*)block >= pstart && (uint8_t*)block < (pstart + psize)); | ||||||
|     size_t offset = (uint8_t*)block - pstart; |     size_t offset = (uint8_t*)block - pstart; | ||||||
|     mi_assert_internal(offset % bsize == 0); |     mi_assert_internal(offset % bsize == 0); | ||||||
|     size_t blockidx = offset / bsize;  // Todo: avoid division?
 |     size_t blockidx = mi_fast_divide(offset, magic, shift); | ||||||
|  |     mi_assert_internal(blockidx == offset / bsize); | ||||||
|     mi_assert_internal(blockidx < MI_MAX_BLOCKS); |     mi_assert_internal(blockidx < MI_MAX_BLOCKS); | ||||||
|     size_t bitidx = (blockidx / sizeof(uintptr_t)); |     size_t bitidx = (blockidx / MI_INTPTR_BITS); | ||||||
|     size_t bit = blockidx - (bitidx * sizeof(uintptr_t)); |     size_t bit = blockidx - (bitidx * MI_INTPTR_BITS); | ||||||
|     free_map[bitidx] |= ((uintptr_t)1 << bit); |     free_map[bitidx] |= ((uintptr_t)1 << bit); | ||||||
|   } |   } | ||||||
|   mi_assert_internal(page->capacity == (free_count + page->used)); |   mi_assert_internal(page->capacity == (free_count + page->used)); | ||||||
|  | @ -571,19 +603,29 @@ static bool mi_heap_area_visit_blocks(const mi_heap_area_ex_t* xarea, mi_block_v | ||||||
|   #if MI_DEBUG>1 |   #if MI_DEBUG>1 | ||||||
|   size_t used_count = 0; |   size_t used_count = 0; | ||||||
|   #endif |   #endif | ||||||
|   for (size_t i = 0; i < page->capacity; i++) { |   uint8_t* block = pstart; | ||||||
|     size_t bitidx = (i / sizeof(uintptr_t)); |   for (size_t i = 0; i < bmapsize; i++) { | ||||||
|     size_t bit = i - (bitidx * sizeof(uintptr_t)); |     if (free_map[i] == 0) { | ||||||
|     uintptr_t m = free_map[bitidx]; |       // every block is in use
 | ||||||
|     if (bit == 0 && m == UINTPTR_MAX) { |       for (size_t j = 0; j < MI_INTPTR_BITS; j++) { | ||||||
|       i += (sizeof(uintptr_t) - 1); // skip a run of free blocks
 |  | ||||||
|     } |  | ||||||
|     else if ((m & ((uintptr_t)1 << bit)) == 0) { |  | ||||||
|         #if MI_DEBUG>1 |         #if MI_DEBUG>1 | ||||||
|         used_count++; |         used_count++; | ||||||
|         #endif |         #endif | ||||||
|       uint8_t* block = pstart + (i * bsize); |         if (!visitor(heap, area, block, ubsize, arg)) return false; | ||||||
|       if (!visitor(mi_page_heap(page), area, block, ubsize, arg)) return false; |         block += bsize; | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |     else { | ||||||
|  |       uintptr_t m = ~free_map[i]; | ||||||
|  |       while (m) { | ||||||
|  |         #if MI_DEBUG>1 | ||||||
|  |         used_count++; | ||||||
|  |         #endif | ||||||
|  |         size_t bitidx = mi_ctz(m); | ||||||
|  |         if (!visitor(heap, area, block + (bitidx * bsize), ubsize, arg)) return false; | ||||||
|  |         m &= m - 1; | ||||||
|  |       } | ||||||
|  |       block += bsize * MI_INTPTR_BITS; | ||||||
|     } |     } | ||||||
|   } |   } | ||||||
|   mi_assert_internal(page->used == used_count); |   mi_assert_internal(page->used == used_count); | ||||||
|  | @ -592,21 +634,24 @@ static bool mi_heap_area_visit_blocks(const mi_heap_area_ex_t* xarea, mi_block_v | ||||||
| 
 | 
 | ||||||
| typedef bool (mi_heap_area_visit_fun)(const mi_heap_t* heap, const mi_heap_area_ex_t* area, void* arg); | typedef bool (mi_heap_area_visit_fun)(const mi_heap_t* heap, const mi_heap_area_ex_t* area, void* arg); | ||||||
| 
 | 
 | ||||||
|  | void _mi_heap_area_init(mi_heap_area_t* area, mi_page_t* page) { | ||||||
|  |   const size_t bsize = mi_page_block_size(page); | ||||||
|  |   const size_t ubsize = mi_page_usable_block_size(page); | ||||||
|  |   area->reserved = page->reserved * bsize; | ||||||
|  |   area->committed = page->capacity * bsize; | ||||||
|  |   area->blocks = _mi_page_start(_mi_page_segment(page), page, NULL); | ||||||
|  |   area->used = page->used;   // number of blocks in use (#553)
 | ||||||
|  |   area->block_size = ubsize; | ||||||
|  |   area->full_block_size = bsize; | ||||||
|  | } | ||||||
| 
 | 
 | ||||||
| static bool mi_heap_visit_areas_page(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* vfun, void* arg) { | static bool mi_heap_visit_areas_page(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* vfun, void* arg) { | ||||||
|   MI_UNUSED(heap); |   MI_UNUSED(heap); | ||||||
|   MI_UNUSED(pq); |   MI_UNUSED(pq); | ||||||
|   mi_heap_area_visit_fun* fun = (mi_heap_area_visit_fun*)vfun; |   mi_heap_area_visit_fun* fun = (mi_heap_area_visit_fun*)vfun; | ||||||
|   mi_heap_area_ex_t xarea; |   mi_heap_area_ex_t xarea; | ||||||
|   const size_t bsize = mi_page_block_size(page); |  | ||||||
|   const size_t ubsize = mi_page_usable_block_size(page); |  | ||||||
|   xarea.page = page; |   xarea.page = page; | ||||||
|   xarea.area.reserved = page->reserved * bsize; |   _mi_heap_area_init(&xarea.area, page); | ||||||
|   xarea.area.committed = page->capacity * bsize; |  | ||||||
|   xarea.area.blocks = _mi_page_start(_mi_page_segment(page), page, NULL); |  | ||||||
|   xarea.area.used = page->used;   // number of blocks in use (#553)
 |  | ||||||
|   xarea.area.block_size = ubsize; |  | ||||||
|   xarea.area.full_block_size = bsize; |  | ||||||
|   return fun(heap, &xarea, arg); |   return fun(heap, &xarea, arg); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | @ -627,7 +672,7 @@ static bool mi_heap_area_visitor(const mi_heap_t* heap, const mi_heap_area_ex_t* | ||||||
|   mi_visit_blocks_args_t* args = (mi_visit_blocks_args_t*)arg; |   mi_visit_blocks_args_t* args = (mi_visit_blocks_args_t*)arg; | ||||||
|   if (!args->visitor(heap, &xarea->area, NULL, xarea->area.block_size, args->arg)) return false; |   if (!args->visitor(heap, &xarea->area, NULL, xarea->area.block_size, args->arg)) return false; | ||||||
|   if (args->visit_blocks) { |   if (args->visit_blocks) { | ||||||
|     return mi_heap_area_visit_blocks(xarea, args->visitor, args->arg); |     return _mi_heap_area_visit_blocks(&xarea->area, xarea->page, args->visitor, args->arg); | ||||||
|   } |   } | ||||||
|   else { |   else { | ||||||
|     return true; |     return true; | ||||||
|  | @ -637,5 +682,6 @@ static bool mi_heap_area_visitor(const mi_heap_t* heap, const mi_heap_area_ex_t* | ||||||
| // Visit all blocks in a heap
 | // Visit all blocks in a heap
 | ||||||
| bool mi_heap_visit_blocks(const mi_heap_t* heap, bool visit_blocks, mi_block_visit_fun* visitor, void* arg) { | bool mi_heap_visit_blocks(const mi_heap_t* heap, bool visit_blocks, mi_block_visit_fun* visitor, void* arg) { | ||||||
|   mi_visit_blocks_args_t args = { visit_blocks, visitor, arg }; |   mi_visit_blocks_args_t args = { visit_blocks, visitor, arg }; | ||||||
|  |   _mi_heap_delayed_free_partial((mi_heap_t *)heap); | ||||||
|   return mi_heap_visit_areas(heap, &mi_heap_area_visitor, &args); |   return mi_heap_visit_areas(heap, &mi_heap_area_visitor, &args); | ||||||
| } | } | ||||||
|  |  | ||||||
|  | @ -1614,3 +1614,53 @@ mi_page_t* _mi_segment_page_alloc(mi_heap_t* heap, size_t block_size, size_t pag | ||||||
|   mi_assert_expensive(page == NULL || mi_segment_is_valid(_mi_page_segment(page),tld)); |   mi_assert_expensive(page == NULL || mi_segment_is_valid(_mi_page_segment(page),tld)); | ||||||
|   return page; |   return page; | ||||||
| } | } | ||||||
|  | 
 | ||||||
|  | /* -----------------------------------------------------------
 | ||||||
|  |    Visit blocks in abandoned segments | ||||||
|  | ----------------------------------------------------------- */ | ||||||
|  | 
 | ||||||
|  | static bool mi_segment_visit_page(mi_segment_t* segment, mi_page_t* page, bool visit_blocks, mi_block_visit_fun* visitor, void* arg) | ||||||
|  | { | ||||||
|  |   mi_heap_area_t area; | ||||||
|  |   _mi_heap_area_init(&area, page); | ||||||
|  |   if (!visitor(NULL, &area, NULL, area.block_size, arg)) return false; | ||||||
|  |   if (visit_blocks) { | ||||||
|  |     return _mi_heap_area_visit_blocks(&area, page, visitor, arg); | ||||||
|  |   } | ||||||
|  |   else { | ||||||
|  |     return true; | ||||||
|  |   } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static bool mi_segment_visit_pages(mi_segment_t* segment, uint8_t page_tag, bool visit_blocks, mi_block_visit_fun* visitor, void* arg) { | ||||||
|  |   const mi_slice_t* end; | ||||||
|  |   mi_slice_t* slice = mi_slices_start_iterate(segment, &end); | ||||||
|  |   while (slice < end) { | ||||||
|  |     if (mi_slice_is_used(slice)) { | ||||||
|  |       mi_page_t* const page = mi_slice_to_page(slice); | ||||||
|  |       if (page->tag == page_tag) { | ||||||
|  |         if (!mi_segment_visit_page(segment, page, visit_blocks, visitor, arg)) return false; | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |     slice = slice + slice->slice_count; | ||||||
|  |   } | ||||||
|  |   return true; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | // Visit all blocks in a abandoned segments
 | ||||||
|  | bool _mi_abandoned_pool_visit_blocks(mi_abandoned_pool_t* pool, uint8_t page_tag, bool visit_blocks, mi_block_visit_fun* visitor, void* arg) { | ||||||
|  |   // Note: this is not safe in any other thread is abandoning or claiming segments from the pool
 | ||||||
|  |   mi_segment_t* segment = mi_tagged_segment_ptr(pool->abandoned); | ||||||
|  |   while (segment != NULL) { | ||||||
|  |     if (!mi_segment_visit_pages(segment, page_tag, visit_blocks, visitor, arg)) return false; | ||||||
|  |     segment = segment->abandoned_next; | ||||||
|  |   } | ||||||
|  | 
 | ||||||
|  |   segment = pool->abandoned_visited; | ||||||
|  |   while (segment != NULL) { | ||||||
|  |     if (!mi_segment_visit_pages(segment, page_tag, visit_blocks, visitor, arg)) return false; | ||||||
|  |     segment = segment->abandoned_next; | ||||||
|  |   } | ||||||
|  | 
 | ||||||
|  |   return true; | ||||||
|  | } | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Sam Gross
						Sam Gross