// // Cforall Version 1.0.0 Copyright (C) 2017 University of Waterloo // // The contents of this file are covered under the licence agreement in the // file "LICENCE" distributed with Cforall. // // heap.c -- // // Author : Peter A. Buhr // Created On : Tue Dec 19 21:58:35 2017 // Last Modified By : Peter A. Buhr // Last Modified On : Fri Oct 18 07:42:09 2019 // Update Count : 556 // #include // sbrk, sysconf #include // true, false #include // snprintf, fileno #include // errno extern "C" { #include // mmap, munmap } // extern "C" // #comment TD : Many of these should be merged into math I believe #include "bits/align.hfa" // libPow2 #include "bits/defs.hfa" // likely, unlikely #include "bits/locks.hfa" // __spinlock_t #include "startup.hfa" // STARTUP_PRIORITY_MEMORY #include "stdlib.hfa" // bsearchl #include "malloc.h" static bool traceHeap = false; inline bool traceHeap() { return traceHeap; } // traceHeap bool traceHeapOn() { bool temp = traceHeap; traceHeap = true; return temp; } // traceHeapOn bool traceHeapOff() { bool temp = traceHeap; traceHeap = false; return temp; } // traceHeapOff static bool checkFree = false; inline bool checkFree() { return checkFree; } // checkFree bool checkFreeOn() { bool temp = checkFree; checkFree = true; return temp; } // checkFreeOn bool checkFreeOff() { bool temp = checkFree; checkFree = false; return temp; } // checkFreeOff // static bool traceHeapTerm = false; // inline bool traceHeapTerm() { // return traceHeapTerm; // } // traceHeapTerm // bool traceHeapTermOn() { // bool temp = traceHeapTerm; // traceHeapTerm = true; // return temp; // } // traceHeapTermOn // bool traceHeapTermOff() { // bool temp = traceHeapTerm; // traceHeapTerm = false; // return temp; // } // traceHeapTermOff enum { __CFA_DEFAULT_MMAP_START__ = (512 * 1024 + 1), __CFA_DEFAULT_HEAP_EXPANSION__ = (1 * 1024 * 1024), }; size_t default_mmap_start() __attribute__(( weak )) { return __CFA_DEFAULT_MMAP_START__; } // default_mmap_start size_t default_heap_expansion() __attribute__(( weak )) { return __CFA_DEFAULT_HEAP_EXPANSION__; } // default_heap_expansion #ifdef __CFA_DEBUG__ static unsigned int allocFree; // running total of allocations minus frees static void checkUnfreed() { if ( allocFree != 0 ) { // DO NOT USE STREAMS AS THEY MAY BE UNAVAILABLE AT THIS POINT. // char helpText[512]; // int len = snprintf( helpText, sizeof(helpText), "CFA warning (UNIX pid:%ld) : program terminating with %u(0x%x) bytes of storage allocated but not freed.\n" // "Possible cause is unfreed storage allocated by the program or system/library routines called from the program.\n", // (long int)getpid(), allocFree, allocFree ); // always print the UNIX pid // __cfaabi_dbg_bits_write( helpText, len ); } // if } // checkUnfreed extern "C" { void heapAppStart() { // called by __cfaabi_appready_startup allocFree = 0; } // heapAppStart void heapAppStop() { // called by __cfaabi_appready_startdown fclose( stdin ); fclose( stdout ); checkUnfreed(); } // heapAppStop } // extern "C" #endif // __CFA_DEBUG__ // statically allocated variables => zero filled. static size_t pageSize; // architecture pagesize static size_t heapExpand; // sbrk advance static size_t mmapStart; // cross over point for mmap static unsigned int maxBucketsUsed; // maximum number of buckets in use // #comment TD : This defined is significantly different from the __ALIGN__ define from locks.hfa #define ALIGN 16 #define SPINLOCK 0 #define LOCKFREE 1 #define BUCKETLOCK SPINLOCK #if BUCKETLOCK == LOCKFREE #include #endif // LOCKFREE // Recursive definitions: HeapManager needs size of bucket array and bucket area needs sizeof HeapManager storage. // Break recusion by hardcoding number of buckets and statically checking number is correct after bucket array defined. enum { NoBucketSizes = 93 }; // number of buckets sizes struct HeapManager { // struct FreeHeader; // forward declaration struct Storage { struct Header { // header union Kind { struct RealHeader { union { struct { // 4-byte word => 8-byte header, 8-byte word => 16-byte header #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ && __SIZEOF_POINTER__ == 4 uint32_t padding; // unused, force home/blocksize to overlay alignment in fake header #endif // __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ && __SIZEOF_POINTER__ == 4 union { // FreeHeader * home; // allocated block points back to home locations (must overlay alignment) void * home; // allocated block points back to home locations (must overlay alignment) size_t blockSize; // size for munmap (must overlay alignment) #if BUCKLOCK == SPINLOCK Storage * next; // freed block points next freed block of same size #endif // SPINLOCK }; #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ && __SIZEOF_POINTER__ == 4 uint32_t padding; // unused, force home/blocksize to overlay alignment in fake header #endif // __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ && __SIZEOF_POINTER__ == 4 }; // future code #if BUCKLOCK == LOCKFREE Stack::Link next; // freed block points next freed block of same size (double-wide) #endif // LOCKFREE }; } real; // RealHeader struct FakeHeader { #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ uint32_t alignment; // low-order bits of home/blockSize used for tricks #endif // __ORDER_LITTLE_ENDIAN__ uint32_t offset; #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ uint32_t alignment; // low-order bits of home/blockSize used for tricks #endif // __ORDER_BIG_ENDIAN__ } fake; // FakeHeader } kind; // Kind } header; // Header char pad[ALIGN - sizeof( Header )]; char data[0]; // storage }; // Storage static_assert( ALIGN >= sizeof( Storage ), "ALIGN < sizeof( Storage )" ); struct FreeHeader { #if BUCKLOCK == SPINLOCK __spinlock_t lock; // must be first field for alignment Storage * freeList; #elif BUCKLOCK == LOCKFREE // future code StackLF freeList; #else #error undefined lock type for bucket lock #endif // SPINLOCK size_t blockSize; // size of allocations on this list }; // FreeHeader // must be first fields for alignment __spinlock_t extlock; // protects allocation-buffer extension FreeHeader freeLists[NoBucketSizes]; // buckets for different allocation sizes void * heapBegin; // start of heap void * heapEnd; // logical end of heap size_t heapRemaining; // amount of storage not allocated in the current chunk }; // HeapManager static inline size_t getKey( const HeapManager.FreeHeader & freeheader ) { return freeheader.blockSize; } #define FASTLOOKUP #define __STATISTICS__ // Powers of 2 are common allocation sizes, so make powers of 2 generate the minimum required size. static const unsigned int bucketSizes[] @= { // different bucket sizes 16, 32, 48, 64, 64 + sizeof(HeapManager.Storage), 96, 112, 128, 128 + sizeof(HeapManager.Storage), 160, 192, 224, 256 + sizeof(HeapManager.Storage), 320, 384, 448, 512 + sizeof(HeapManager.Storage), 640, 768, 896, 1_024 + sizeof(HeapManager.Storage), 1_536, 2_048 + sizeof(HeapManager.Storage), 2_560, 3_072, 3_584, 4_096 + sizeof(HeapManager.Storage), 6_144, 8_192 + sizeof(HeapManager.Storage), 9_216, 10_240, 11_264, 12_288, 13_312, 14_336, 15_360, 16_384 + sizeof(HeapManager.Storage), 18_432, 20_480, 22_528, 24_576, 26_624, 28_672, 30_720, 32_768 + sizeof(HeapManager.Storage), 36_864, 40_960, 45_056, 49_152, 53_248, 57_344, 61_440, 65_536 + sizeof(HeapManager.Storage), 73_728, 81_920, 90_112, 98_304, 106_496, 114_688, 122_880, 131_072 + sizeof(HeapManager.Storage), 147_456, 163_840, 180_224, 196_608, 212_992, 229_376, 245_760, 262_144 + sizeof(HeapManager.Storage), 294_912, 327_680, 360_448, 393_216, 425_984, 458_752, 491_520, 524_288 + sizeof(HeapManager.Storage), 655_360, 786_432, 917_504, 1_048_576 + sizeof(HeapManager.Storage), 1_179_648, 1_310_720, 1_441_792, 1_572_864, 1_703_936, 1_835_008, 1_966_080, 2_097_152 + sizeof(HeapManager.Storage), 2_621_440, 3_145_728, 3_670_016, 4_194_304 + sizeof(HeapManager.Storage) }; static_assert( NoBucketSizes == sizeof(bucketSizes) / sizeof(bucketSizes[0]), "size of bucket array wrong" ); #ifdef FASTLOOKUP enum { LookupSizes = 65_536 + sizeof(HeapManager.Storage) }; // number of fast lookup sizes static unsigned char lookup[LookupSizes]; // O(1) lookup for small sizes #endif // FASTLOOKUP static int mmapFd = -1; // fake or actual fd for anonymous file #ifdef __CFA_DEBUG__ static bool heapBoot = 0; // detect recursion during boot #endif // __CFA_DEBUG__ static HeapManager heapManager __attribute__(( aligned (128) )) @= {}; // size of cache line to prevent false sharing // #comment TD : The return type of this function should be commented static inline bool setMmapStart( size_t value ) { if ( value < pageSize || bucketSizes[NoBucketSizes - 1] < value ) return true; mmapStart = value; // set global // find the closest bucket size less than or equal to the mmapStart size maxBucketsUsed = bsearchl( (unsigned int)mmapStart, bucketSizes, NoBucketSizes ); // binary search assert( maxBucketsUsed < NoBucketSizes ); // subscript failure ? assert( mmapStart <= bucketSizes[maxBucketsUsed] ); // search failure ? return false; } // setMmapStart static void ?{}( HeapManager & manager ) with ( manager ) { pageSize = sysconf( _SC_PAGESIZE ); for ( unsigned int i = 0; i < NoBucketSizes; i += 1 ) { // initialize the free lists freeLists[i].blockSize = bucketSizes[i]; } // for #ifdef FASTLOOKUP unsigned int idx = 0; for ( unsigned int i = 0; i < LookupSizes; i += 1 ) { if ( i > bucketSizes[idx] ) idx += 1; lookup[i] = idx; } // for #endif // FASTLOOKUP if ( setMmapStart( default_mmap_start() ) ) { abort( "HeapManager : internal error, mmap start initialization failure." ); } // if heapExpand = default_heap_expansion(); char * End = (char *)sbrk( 0 ); sbrk( (char *)libCeiling( (long unsigned int)End, libAlign() ) - End ); // move start of heap to multiple of alignment heapBegin = heapEnd = sbrk( 0 ); // get new start point } // HeapManager static void ^?{}( HeapManager & ) { #ifdef __STATISTICS__ // if ( traceHeapTerm() ) { // printStats(); // if ( checkfree() ) checkFree( heapManager, true ); // } // if #endif // __STATISTICS__ } // ~HeapManager static void memory_startup( void ) __attribute__(( constructor( STARTUP_PRIORITY_MEMORY ) )); void memory_startup( void ) { #ifdef __CFA_DEBUG__ if ( unlikely( heapBoot ) ) { // check for recursion during system boot // DO NOT USE STREAMS AS THEY MAY BE UNAVAILABLE AT THIS POINT. abort( "boot() : internal error, recursively invoked during system boot." ); } // if heapBoot = true; #endif // __CFA_DEBUG__ //assert( heapManager.heapBegin != 0 ); //heapManager{}; if ( heapManager.heapBegin == 0 ) heapManager{}; } // memory_startup static void memory_shutdown( void ) __attribute__(( destructor( STARTUP_PRIORITY_MEMORY ) )); void memory_shutdown( void ) { ^heapManager{}; } // memory_shutdown #ifdef __STATISTICS__ static unsigned long long int mmap_storage; // heap statistics counters static unsigned int mmap_calls; static unsigned long long int munmap_storage; static unsigned int munmap_calls; static unsigned long long int sbrk_storage; static unsigned int sbrk_calls; static unsigned long long int malloc_storage; static unsigned int malloc_calls; static unsigned long long int free_storage; static unsigned int free_calls; static unsigned long long int calloc_storage; static unsigned int calloc_calls; static unsigned long long int memalign_storage; static unsigned int memalign_calls; static unsigned long long int cmemalign_storage; static unsigned int cmemalign_calls; static unsigned long long int realloc_storage; static unsigned int realloc_calls; static int statfd; // statistics file descriptor (changed by malloc_stats_fd) // Use "write" because streams may be shutdown when calls are made. static void printStats() { char helpText[512]; __cfaabi_dbg_bits_print_buffer( helpText, sizeof(helpText), "\nHeap statistics:\n" " malloc: calls %u / storage %llu\n" " calloc: calls %u / storage %llu\n" " memalign: calls %u / storage %llu\n" " cmemalign: calls %u / storage %llu\n" " realloc: calls %u / storage %llu\n" " free: calls %u / storage %llu\n" " mmap: calls %u / storage %llu\n" " munmap: calls %u / storage %llu\n" " sbrk: calls %u / storage %llu\n", malloc_calls, malloc_storage, calloc_calls, calloc_storage, memalign_calls, memalign_storage, cmemalign_calls, cmemalign_storage, realloc_calls, realloc_storage, free_calls, free_storage, mmap_calls, mmap_storage, munmap_calls, munmap_storage, sbrk_calls, sbrk_storage ); } // printStats static int printStatsXML( FILE * stream ) { // see malloc_info char helpText[512]; int len = snprintf( helpText, sizeof(helpText), "\n" "\n" "\n" "\n" "\n" "\n" "\n" "\n" "\n" "\n" "\n" "\n" "\n" "", malloc_calls, malloc_storage, calloc_calls, calloc_storage, memalign_calls, memalign_storage, cmemalign_calls, cmemalign_storage, realloc_calls, realloc_storage, free_calls, free_storage, mmap_calls, mmap_storage, munmap_calls, munmap_storage, sbrk_calls, sbrk_storage ); return write( fileno( stream ), helpText, len ); // -1 => error } // printStatsXML #endif // __STATISTICS__ // #comment TD : Is this the samething as Out-of-Memory? static inline void noMemory() { abort( "Heap memory exhausted at %zu bytes.\n" "Possible cause is very large memory allocation and/or large amount of unfreed storage allocated by the program or system/library routines.", ((char *)(sbrk( 0 )) - (char *)(heapManager.heapBegin)) ); } // noMemory static inline void checkAlign( size_t alignment ) { if ( alignment < sizeof(void *) || ! libPow2( alignment ) ) { abort( "Alignment %zu for memory allocation is less than sizeof(void *) and/or not a power of 2.", alignment ); } // if } // checkAlign static inline bool setHeapExpand( size_t value ) { if ( heapExpand < pageSize ) return true; heapExpand = value; return false; } // setHeapExpand static inline void checkHeader( bool check, const char * name, void * addr ) { if ( unlikely( check ) ) { // bad address ? abort( "Attempt to %s storage %p with address outside the heap.\n" "Possible cause is duplicate free on same block or overwriting of memory.", name, addr ); } // if } // checkHeader // #comment TD : function should be commented and/or have a more evocative name // this isn't either a check or a constructor which is what I would expect this function to be static inline void fakeHeader( HeapManager.Storage.Header *& header, size_t & size, size_t & alignment ) { if ( unlikely( (header->kind.fake.alignment & 1) == 1 ) ) { // fake header ? size_t offset = header->kind.fake.offset; alignment = header->kind.fake.alignment & -2; // remove flag from value #ifdef __CFA_DEBUG__ checkAlign( alignment ); // check alignment #endif // __CFA_DEBUG__ header = (HeapManager.Storage.Header *)((char *)header - offset); } // if } // fakeHeader // #comment TD : Why is this a define #define headerAddr( addr ) ((HeapManager.Storage.Header *)( (char *)addr - sizeof(HeapManager.Storage) )) static inline bool headers( const char * name, void * addr, HeapManager.Storage.Header *& header, HeapManager.FreeHeader *& freeElem, size_t & size, size_t & alignment ) with ( heapManager ) { header = headerAddr( addr ); if ( unlikely( heapEnd < addr ) ) { // mmapped ? fakeHeader( header, size, alignment ); size = header->kind.real.blockSize & -3; // mmap size return true; } // if #ifdef __CFA_DEBUG__ checkHeader( addr < heapBegin || header < (HeapManager.Storage.Header *)heapBegin, name, addr ); // bad low address ? #endif // __CFA_DEBUG__ // #comment TD : This code looks weird... // It's called as the first statement of both branches of the last if, with the same parameters in all cases // header may be safe to dereference fakeHeader( header, size, alignment ); #ifdef __CFA_DEBUG__ checkHeader( header < (HeapManager.Storage.Header *)heapBegin || (HeapManager.Storage.Header *)heapEnd < header, name, addr ); // bad address ? (offset could be + or -) #endif // __CFA_DEBUG__ freeElem = (HeapManager.FreeHeader *)((size_t)header->kind.real.home & -3); #ifdef __CFA_DEBUG__ if ( freeElem < &freeLists[0] || &freeLists[NoBucketSizes] <= freeElem ) { abort( "Attempt to %s storage %p with corrupted header.\n" "Possible cause is duplicate free on same block or overwriting of header information.", name, addr ); } // if #endif // __CFA_DEBUG__ size = freeElem->blockSize; return false; } // headers static inline void * extend( size_t size ) with ( heapManager ) { lock( extlock __cfaabi_dbg_ctx2 ); ptrdiff_t rem = heapRemaining - size; if ( rem < 0 ) { // If the size requested is bigger than the current remaining storage, increase the size of the heap. size_t increase = libCeiling( size > heapExpand ? size : heapExpand, libAlign() ); if ( sbrk( increase ) == (void *)-1 ) { unlock( extlock ); errno = ENOMEM; return 0; } // if #ifdef __STATISTICS__ sbrk_calls += 1; sbrk_storage += increase; #endif // __STATISTICS__ #ifdef __CFA_DEBUG__ // Set new memory to garbage so subsequent uninitialized usages might fail. memset( (char *)heapEnd + heapRemaining, '\377', increase ); #endif // __CFA_DEBUG__ rem = heapRemaining + increase - size; } // if HeapManager.Storage * block = (HeapManager.Storage *)heapEnd; heapRemaining = rem; heapEnd = (char *)heapEnd + size; unlock( extlock ); return block; } // extend size_t Bsearchl( unsigned int key, const unsigned int * vals, size_t dim ) { size_t l = 0, m, h = dim; while ( l < h ) { m = (l + h) / 2; if ( (unsigned int &)(vals[m]) < key ) { // cast away const l = m + 1; } else { h = m; } // if } // while return l; } // Bsearchl static inline void * doMalloc( size_t size ) with ( heapManager ) { HeapManager.Storage * block; // pointer to new block of storage // Look up size in the size list. Make sure the user request includes space for the header that must be allocated // along with the block and is a multiple of the alignment size. if ( unlikely( size > ~0ul - sizeof(HeapManager.Storage) ) ) return 0; size_t tsize = size + sizeof(HeapManager.Storage); if ( likely( tsize < mmapStart ) ) { // small size => sbrk size_t posn; #ifdef FASTLOOKUP if ( tsize < LookupSizes ) posn = lookup[tsize]; else #endif // FASTLOOKUP posn = Bsearchl( (unsigned int)tsize, bucketSizes, (size_t)maxBucketsUsed ); HeapManager.FreeHeader * freeElem = &freeLists[posn]; // #ifdef FASTLOOKUP // if ( tsize < LookupSizes ) // freeElem = &freeLists[lookup[tsize]]; // else // #endif // FASTLOOKUP // freeElem = bsearchl( tsize, freeLists, (size_t)maxBucketsUsed ); // binary search // HeapManager.FreeHeader * freeElem = // #ifdef FASTLOOKUP // tsize < LookupSizes ? &freeLists[lookup[tsize]] : // #endif // FASTLOOKUP // bsearchl( tsize, freeLists, (size_t)maxBucketsUsed ); // binary search assert( freeElem <= &freeLists[maxBucketsUsed] ); // subscripting error ? assert( tsize <= freeElem->blockSize ); // search failure ? tsize = freeElem->blockSize; // total space needed for request // Spin until the lock is acquired for this particular size of block. #if defined( SPINLOCK ) lock( freeElem->lock __cfaabi_dbg_ctx2 ); block = freeElem->freeList; // remove node from stack #else block = freeElem->freeList.pop(); #endif // SPINLOCK if ( unlikely( block == 0 ) ) { // no free block ? #if defined( SPINLOCK ) unlock( freeElem->lock ); #endif // SPINLOCK // Freelist for that size was empty, so carve it out of the heap if there's enough left, or get some more // and then carve it off. block = (HeapManager.Storage *)extend( tsize ); // mutual exclusion on call if ( unlikely( block == 0 ) ) return 0; #if defined( SPINLOCK ) } else { freeElem->freeList = block->header.kind.real.next; unlock( freeElem->lock ); #endif // SPINLOCK } // if block->header.kind.real.home = freeElem; // pointer back to free list of apropriate size } else { // large size => mmap if ( unlikely( size > ~0ul - pageSize ) ) return 0; tsize = libCeiling( tsize, pageSize ); // must be multiple of page size #ifdef __STATISTICS__ __atomic_add_fetch( &mmap_calls, 1, __ATOMIC_SEQ_CST ); __atomic_add_fetch( &mmap_storage, tsize, __ATOMIC_SEQ_CST ); #endif // __STATISTICS__ block = (HeapManager.Storage *)mmap( 0, tsize, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, mmapFd, 0 ); if ( block == (HeapManager.Storage *)MAP_FAILED ) { // Do not call strerror( errno ) as it may call malloc. abort( "(HeapManager &)0x%p.doMalloc() : internal error, mmap failure, size:%zu error:%d.", &heapManager, tsize, errno ); } // if #ifdef __CFA_DEBUG__ // Set new memory to garbage so subsequent uninitialized usages might fail. memset( block, '\377', tsize ); #endif // __CFA_DEBUG__ block->header.kind.real.blockSize = tsize; // storage size for munmap } // if void * area = &(block->data); // adjust off header to user bytes #ifdef __CFA_DEBUG__ assert( ((uintptr_t)area & (libAlign() - 1)) == 0 ); // minimum alignment ? __atomic_add_fetch( &allocFree, tsize, __ATOMIC_SEQ_CST ); if ( traceHeap() ) { enum { BufferSize = 64 }; char helpText[BufferSize]; int len = snprintf( helpText, BufferSize, "%p = Malloc( %zu ) (allocated %zu)\n", area, size, tsize ); // int len = snprintf( helpText, BufferSize, "Malloc %p %zu\n", area, size ); __cfaabi_dbg_bits_write( helpText, len ); } // if #endif // __CFA_DEBUG__ return area; } // doMalloc static inline void doFree( void * addr ) with ( heapManager ) { #ifdef __CFA_DEBUG__ if ( unlikely( heapManager.heapBegin == 0 ) ) { abort( "doFree( %p ) : internal error, called before heap is initialized.", addr ); } // if #endif // __CFA_DEBUG__ HeapManager.Storage.Header * header; HeapManager.FreeHeader * freeElem; size_t size, alignment; // not used (see realloc) if ( headers( "free", addr, header, freeElem, size, alignment ) ) { // mmapped ? #ifdef __STATISTICS__ __atomic_add_fetch( &munmap_calls, 1, __ATOMIC_SEQ_CST ); __atomic_add_fetch( &munmap_storage, size, __ATOMIC_SEQ_CST ); #endif // __STATISTICS__ if ( munmap( header, size ) == -1 ) { #ifdef __CFA_DEBUG__ abort( "Attempt to deallocate storage %p not allocated or with corrupt header.\n" "Possible cause is invalid pointer.", addr ); #endif // __CFA_DEBUG__ } // if } else { #ifdef __CFA_DEBUG__ // Set free memory to garbage so subsequent usages might fail. memset( ((HeapManager.Storage *)header)->data, '\377', freeElem->blockSize - sizeof( HeapManager.Storage ) ); #endif // __CFA_DEBUG__ #ifdef __STATISTICS__ free_storage += size; #endif // __STATISTICS__ #if defined( SPINLOCK ) lock( freeElem->lock __cfaabi_dbg_ctx2 ); // acquire spin lock header->kind.real.next = freeElem->freeList; // push on stack freeElem->freeList = (HeapManager.Storage *)header; unlock( freeElem->lock ); // release spin lock #else freeElem->freeList.push( *(HeapManager.Storage *)header ); #endif // SPINLOCK } // if #ifdef __CFA_DEBUG__ __atomic_add_fetch( &allocFree, -size, __ATOMIC_SEQ_CST ); if ( traceHeap() ) { enum { BufferSize = 64 }; char helpText[BufferSize]; int len = snprintf( helpText, sizeof(helpText), "Free( %p ) size:%zu\n", addr, size ); __cfaabi_dbg_bits_write( helpText, len ); } // if #endif // __CFA_DEBUG__ } // doFree size_t checkFree( HeapManager & manager ) with ( manager ) { size_t total = 0; #ifdef __STATISTICS__ __cfaabi_dbg_bits_acquire(); __cfaabi_dbg_bits_print_nolock( "\nBin lists (bin size : free blocks on list)\n" ); #endif // __STATISTICS__ for ( unsigned int i = 0; i < maxBucketsUsed; i += 1 ) { size_t size = freeLists[i].blockSize; #ifdef __STATISTICS__ unsigned int N = 0; #endif // __STATISTICS__ #if defined( SPINLOCK ) for ( HeapManager.Storage * p = freeLists[i].freeList; p != 0; p = p->header.kind.real.next ) { #else for ( HeapManager.Storage * p = freeLists[i].freeList.top(); p != 0; p = p->header.kind.real.next.top ) { #endif // SPINLOCK total += size; #ifdef __STATISTICS__ N += 1; #endif // __STATISTICS__ } // for #ifdef __STATISTICS__ __cfaabi_dbg_bits_print_nolock( "%7zu, %-7u ", size, N ); if ( (i + 1) % 8 == 0 ) __cfaabi_dbg_bits_print_nolock( "\n" ); #endif // __STATISTICS__ } // for #ifdef __STATISTICS__ __cfaabi_dbg_bits_print_nolock( "\ntotal free blocks:%zu\n", total ); __cfaabi_dbg_bits_release(); #endif // __STATISTICS__ return (char *)heapEnd - (char *)heapBegin - total; } // checkFree static inline void * mallocNoStats( size_t size ) { // necessary for malloc statistics //assert( heapManager.heapBegin != 0 ); if ( unlikely( heapManager.heapBegin == 0 ) ) heapManager{}; // called before memory_startup ? void * area = doMalloc( size ); if ( unlikely( area == 0 ) ) errno = ENOMEM; // POSIX return area; } // mallocNoStats static inline void * memalignNoStats( size_t alignment, size_t size ) { // necessary for malloc statistics #ifdef __CFA_DEBUG__ checkAlign( alignment ); // check alignment #endif // __CFA_DEBUG__ // if alignment <= default alignment, do normal malloc as two headers are unnecessary if ( unlikely( alignment <= libAlign() ) ) return mallocNoStats( size ); // Allocate enough storage to guarantee an address on the alignment boundary, and sufficient space before it for // administrative storage. NOTE, WHILE THERE ARE 2 HEADERS, THE FIRST ONE IS IMPLICITLY CREATED BY DOMALLOC. // .-------------v-----------------v----------------v----------, // | Real Header | ... padding ... | Fake Header | data ... | // `-------------^-----------------^-+--------------^----------' // |<--------------------------------' offset/align |<-- alignment boundary // subtract libAlign() because it is already the minimum alignment // add sizeof(Storage) for fake header // #comment TD : this is the only place that calls doMalloc without calling mallocNoStats, why ? char * area = (char *)doMalloc( size + alignment - libAlign() + sizeof(HeapManager.Storage) ); if ( unlikely( area == 0 ) ) return area; // address in the block of the "next" alignment address char * user = (char *)libCeiling( (uintptr_t)(area + sizeof(HeapManager.Storage)), alignment ); // address of header from malloc HeapManager.Storage.Header * realHeader = headerAddr( area ); // address of fake header * before* the alignment location HeapManager.Storage.Header * fakeHeader = headerAddr( user ); // SKULLDUGGERY: insert the offset to the start of the actual storage block and remember alignment fakeHeader->kind.fake.offset = (char *)fakeHeader - (char *)realHeader; // SKULLDUGGERY: odd alignment imples fake header fakeHeader->kind.fake.alignment = alignment | 1; return user; } // memalignNoStats // supported mallopt options #ifndef M_MMAP_THRESHOLD #define M_MMAP_THRESHOLD (-1) #endif // M_TOP_PAD #ifndef M_TOP_PAD #define M_TOP_PAD (-2) #endif // M_TOP_PAD extern "C" { // The malloc() function allocates size bytes and returns a pointer to the allocated memory. The memory is not // initialized. If size is 0, then malloc() returns either NULL, or a unique pointer value that can later be // successfully passed to free(). void * malloc( size_t size ) { #ifdef __STATISTICS__ __atomic_add_fetch( &malloc_calls, 1, __ATOMIC_SEQ_CST ); __atomic_add_fetch( &malloc_storage, size, __ATOMIC_SEQ_CST ); #endif // __STATISTICS__ return mallocNoStats( size ); } // malloc // The calloc() function allocates memory for an array of nmemb elements of size bytes each and returns a pointer to // the allocated memory. The memory is set to zero. If nmemb or size is 0, then calloc() returns either NULL, or a // unique pointer value that can later be successfully passed to free(). void * calloc( size_t noOfElems, size_t elemSize ) { size_t size = noOfElems * elemSize; #ifdef __STATISTICS__ __atomic_add_fetch( &calloc_calls, 1, __ATOMIC_SEQ_CST ); __atomic_add_fetch( &calloc_storage, size, __ATOMIC_SEQ_CST ); #endif // __STATISTICS__ char * area = (char *)mallocNoStats( size ); if ( unlikely( area == 0 ) ) return 0; HeapManager.Storage.Header * header; HeapManager.FreeHeader * freeElem; size_t asize, alignment; bool mapped __attribute__(( unused )) = headers( "calloc", area, header, freeElem, asize, alignment ); #ifndef __CFA_DEBUG__ // Mapped storage is zero filled, but in debug mode mapped memory is scrubbed in doMalloc, so it has to be reset to zero. if ( ! mapped ) #endif // __CFA_DEBUG__ memset( area, '\0', asize - sizeof(HeapManager.Storage) ); // set to zeros header->kind.real.blockSize |= 2; // mark as zero filled return area; } // calloc // #comment TD : Document this function void * cmemalign( size_t alignment, size_t noOfElems, size_t elemSize ) { size_t size = noOfElems * elemSize; #ifdef __STATISTICS__ __atomic_add_fetch( &cmemalign_calls, 1, __ATOMIC_SEQ_CST ); __atomic_add_fetch( &cmemalign_storage, size, __ATOMIC_SEQ_CST ); #endif // __STATISTICS__ char * area = (char *)memalignNoStats( alignment, size ); if ( unlikely( area == 0 ) ) return 0; HeapManager.Storage.Header * header; HeapManager.FreeHeader * freeElem; size_t asize; bool mapped __attribute__(( unused )) = headers( "cmemalign", area, header, freeElem, asize, alignment ); #ifndef __CFA_DEBUG__ // Mapped storage is zero filled, but in debug mode mapped memory is scrubbed in doMalloc, so it has to be reset to zero. if ( ! mapped ) #endif // __CFA_DEBUG__ memset( area, '\0', asize - ( (char *)area - (char *)header ) ); // set to zeros header->kind.real.blockSize |= 2; // mark as zero filled return area; } // cmemalign // The realloc() function changes the size of the memory block pointed to by ptr to size bytes. The contents will be // unchanged in the range from the start of the region up to the minimum of the old and new sizes. If the new size // is larger than the old size, the added memory will not be initialized. If ptr is NULL, then the call is // equivalent to malloc(size), for all values of size; if size is equal to zero, and ptr is not NULL, then the call // is equivalent to free(ptr). Unless ptr is NULL, it must have been returned by an earlier call to malloc(), // calloc() or realloc(). If the area pointed to was moved, a free(ptr) is done. void * realloc( void * addr, size_t size ) { #ifdef __STATISTICS__ __atomic_add_fetch( &realloc_calls, 1, __ATOMIC_SEQ_CST ); #endif // __STATISTICS__ if ( unlikely( addr == 0 ) ) return mallocNoStats( size ); // special cases if ( unlikely( size == 0 ) ) { free( addr ); return 0; } HeapManager.Storage.Header * header; HeapManager.FreeHeader * freeElem; size_t asize, alignment = 0; headers( "realloc", addr, header, freeElem, asize, alignment ); size_t usize = asize - ( (char *)addr - (char *)header ); // compute the amount of user storage in the block if ( usize >= size ) { // already sufficient storage // This case does not result in a new profiler entry because the previous one still exists and it must match with // the free for this memory. Hence, this realloc does not appear in the profiler output. return addr; } // if #ifdef __STATISTICS__ __atomic_add_fetch( &realloc_storage, size, __ATOMIC_SEQ_CST ); #endif // __STATISTICS__ void * area; if ( unlikely( alignment != 0 ) ) { // previous request memalign? area = memalign( alignment, size ); // create new aligned area } else { area = mallocNoStats( size ); // create new area } // if if ( unlikely( area == 0 ) ) return 0; if ( unlikely( header->kind.real.blockSize & 2 ) ) { // previous request zero fill (calloc/cmemalign) ? assert( (header->kind.real.blockSize & 1) == 0 ); bool mapped __attribute__(( unused )) = headers( "realloc", area, header, freeElem, asize, alignment ); #ifndef __CFA_DEBUG__ // Mapped storage is zero filled, but in debug mode mapped memory is scrubbed in doMalloc, so it has to be reset to zero. if ( ! mapped ) #endif // __CFA_DEBUG__ memset( (char *)area + usize, '\0', asize - ( (char *)area - (char *)header ) - usize ); // zero-fill back part header->kind.real.blockSize |= 2; // mark new request as zero fill } // if memcpy( area, addr, usize ); // copy bytes free( addr ); return area; } // realloc // The obsolete function memalign() allocates size bytes and returns a pointer to the allocated memory. The memory // address will be a multiple of alignment, which must be a power of two. void * memalign( size_t alignment, size_t size ) { #ifdef __STATISTICS__ __atomic_add_fetch( &memalign_calls, 1, __ATOMIC_SEQ_CST ); __atomic_add_fetch( &memalign_storage, size, __ATOMIC_SEQ_CST ); #endif // __STATISTICS__ void * area = memalignNoStats( alignment, size ); return area; } // memalign // The function aligned_alloc() is the same as memalign(), except for the added restriction that size should be a // multiple of alignment. void * aligned_alloc( size_t alignment, size_t size ) { return memalign( alignment, size ); } // aligned_alloc // The function posix_memalign() allocates size bytes and places the address of the allocated memory in *memptr. The // address of the allocated memory will be a multiple of alignment, which must be a power of two and a multiple of // sizeof(void *). If size is 0, then posix_memalign() returns either NULL, or a unique pointer value that can later // be successfully passed to free(3). int posix_memalign( void ** memptr, size_t alignment, size_t size ) { if ( alignment < sizeof(void *) || ! libPow2( alignment ) ) return EINVAL; // check alignment * memptr = memalign( alignment, size ); if ( unlikely( * memptr == 0 ) ) return ENOMEM; return 0; } // posix_memalign // The obsolete function valloc() allocates size bytes and returns a pointer to the allocated memory. The memory // address will be a multiple of the page size. It is equivalent to memalign(sysconf(_SC_PAGESIZE),size). void * valloc( size_t size ) { return memalign( pageSize, size ); } // valloc // The free() function frees the memory space pointed to by ptr, which must have been returned by a previous call to // malloc(), calloc() or realloc(). Otherwise, or if free(ptr) has already been called before, undefined behavior // occurs. If ptr is NULL, no operation is performed. void free( void * addr ) { #ifdef __STATISTICS__ __atomic_add_fetch( &free_calls, 1, __ATOMIC_SEQ_CST ); #endif // __STATISTICS__ // #comment TD : To decrease nesting I would but the special case in the // else instead, plus it reads more naturally to have the // short / normal case instead if ( unlikely( addr == 0 ) ) { // special case #ifdef __CFA_DEBUG__ if ( traceHeap() ) { #define nullmsg "Free( 0x0 ) size:0\n" // Do not debug print free( 0 ), as it can cause recursive entry from sprintf. __cfaabi_dbg_bits_write( nullmsg, sizeof(nullmsg) - 1 ); } // if #endif // __CFA_DEBUG__ return; } // exit doFree( addr ); } // free // The mallopt() function adjusts parameters that control the behavior of the memory-allocation functions (see // malloc(3)). The param argument specifies the parameter to be modified, and value specifies the new value for that // parameter. int mallopt( int option, int value ) { choose( option ) { case M_TOP_PAD: if ( setHeapExpand( value ) ) fallthru default; case M_MMAP_THRESHOLD: if ( setMmapStart( value ) ) fallthru default; default: // #comment TD : 1 for unsopported feels wrong return 1; // success, or unsupported } // switch return 0; // error } // mallopt // The malloc_trim() function attempts to release free memory at the top of the heap (by calling sbrk(2) with a // suitable argument). int malloc_trim( size_t ) { return 0; // => impossible to release memory } // malloc_trim // The malloc_usable_size() function returns the number of usable bytes in the block pointed to by ptr, a pointer to // a block of memory allocated by malloc(3) or a related function. size_t malloc_usable_size( void * addr ) { if ( unlikely( addr == 0 ) ) return 0; // null allocation has 0 size HeapManager.Storage.Header * header; HeapManager.FreeHeader * freeElem; size_t size, alignment; headers( "malloc_usable_size", addr, header, freeElem, size, alignment ); size_t usize = size - ( (char *)addr - (char *)header ); // compute the amount of user storage in the block return usize; } // malloc_usable_size // The malloc_alignment() function returns the alignment of the allocation. size_t malloc_alignment( void * addr ) { if ( unlikely( addr == 0 ) ) return libAlign(); // minimum alignment HeapManager.Storage.Header * header = headerAddr( addr ); if ( (header->kind.fake.alignment & 1) == 1 ) { // fake header ? return header->kind.fake.alignment & -2; // remove flag from value } else { return libAlign (); // minimum alignment } // if } // malloc_alignment // The malloc_zero_fill() function returns true if the allocation is zero filled, i.e., initially allocated by calloc(). bool malloc_zero_fill( void * addr ) { if ( unlikely( addr == 0 ) ) return false; // null allocation is not zero fill HeapManager.Storage.Header * header = headerAddr( addr ); if ( (header->kind.fake.alignment & 1) == 1 ) { // fake header ? header = (HeapManager.Storage.Header *)((char *)header - header->kind.fake.offset); } // if return (header->kind.real.blockSize & 2) != 0; // zero filled (calloc/cmemalign) ? } // malloc_zero_fill // The malloc_stats() function prints (on default standard error) statistics about memory allocated by malloc(3) and // related functions. void malloc_stats( void ) { #ifdef __STATISTICS__ printStats(); if ( checkFree() ) checkFree( heapManager ); #endif // __STATISTICS__ } // malloc_stats // The malloc_stats_fd() function changes the file descripter where malloc_stats() writes the statistics. int malloc_stats_fd( int fd ) { #ifdef __STATISTICS__ int temp = statfd; statfd = fd; return temp; #else return -1; #endif // __STATISTICS__ } // malloc_stats_fd // The malloc_info() function exports an XML string that describes the current state of the memory-allocation // implementation in the caller. The string is printed on the file stream stream. The exported string includes // information about all arenas (see malloc(3)). int malloc_info( int options, FILE * stream ) { return printStatsXML( stream ); } // malloc_info // The malloc_get_state() function records the current state of all malloc(3) internal bookkeeping variables (but // not the actual contents of the heap or the state of malloc_hook(3) functions pointers). The state is recorded in // a system-dependent opaque data structure dynamically allocated via malloc(3), and a pointer to that data // structure is returned as the function result. (It is the caller's responsibility to free(3) this memory.) void * malloc_get_state( void ) { return 0; // unsupported } // malloc_get_state // The malloc_set_state() function restores the state of all malloc(3) internal bookkeeping variables to the values // recorded in the opaque data structure pointed to by state. int malloc_set_state( void * ptr ) { return 0; // unsupported } // malloc_set_state } // extern "C" // Local Variables: // // tab-width: 4 // // compile-command: "cfa -nodebug -O2 heap.cfa" // // End: //