- Timestamp:
- Oct 19, 2022, 4:43:26 PM (3 years ago)
- Branches:
- ADT, ast-experimental, master
- Children:
- 1a45263
- Parents:
- 9cd5bd2 (diff), 135143ba (diff)
Note: this is a merge changeset, the changes displayed below correspond to the merge itself.
Use the(diff)
links above to see all the changes relative to each parent. - Location:
- libcfa/src
- Files:
-
- 27 edited
Legend:
- Unmodified
- Added
- Removed
-
libcfa/src/assert.cfa
r9cd5bd2 rdf6cc9d 25 25 26 26 #define CFA_ASSERT_FMT "Cforall Assertion error \"%s\" from program \"%s\" in \"%s\" at line %d in file \"%s\"" 27 #define CFA_WARNING_FMT "Cforall Assertion warning \"%s\" from program \"%s\" in \"%s\" at line %d in file \"%s\"" 27 28 28 29 // called by macro assert in assert.h … … 48 49 abort(); 49 50 } 51 52 // called by macro warnf 53 // would be cool to remove libcfa_public but it's needed for libcfathread 54 void __assert_warn_f( const char assertion[], const char file[], unsigned int line, const char function[], const char fmt[], ... ) libcfa_public { 55 __cfaabi_bits_acquire(); 56 __cfaabi_bits_print_nolock( STDERR_FILENO, CFA_WARNING_FMT ": ", assertion, __progname, function, line, file ); 57 58 va_list args; 59 va_start( args, fmt ); 60 __cfaabi_bits_print_vararg( STDERR_FILENO, fmt, args ); 61 va_end( args ); 62 63 __cfaabi_bits_print_nolock( STDERR_FILENO, "\n" ); 64 __cfaabi_bits_release(); 65 } 50 66 } 51 67 -
libcfa/src/bitmanip.hfa
r9cd5bd2 rdf6cc9d 11 11 // Created On : Sat Mar 14 18:12:27 2020 12 12 // Last Modified By : Peter A. Buhr 13 // Last Modified On : S un Aug 23 21:39:28 202014 // Update Count : 14 013 // Last Modified On : Sat Oct 8 08:28:15 2022 14 // Update Count : 142 15 15 // 16 16 … … 21 21 // Bits are numbered 1-N. 22 22 23 #include <assert.h>24 25 23 #define __bitsizeof( n ) (sizeof(n) * __CHAR_BIT__) 26 24 27 static inline {25 static inline __attribute__((always_inline)) { 28 26 // Count leading 0 bits. 29 27 unsigned int leading0s( unsigned char n ) { return n != 0 ? __builtin_clz( n ) - (__bitsizeof(unsigned int) - __bitsizeof(n)) : __bitsizeof(n); } -
libcfa/src/bits/locks.hfa
r9cd5bd2 rdf6cc9d 13 13 // Created On : Tue Oct 31 15:14:38 2017 14 14 // Last Modified By : Peter A. Buhr 15 // Last Modified On : Mon Sep 19 18:51:53202216 // Update Count : 1 715 // Last Modified On : Tue Sep 20 22:09:50 2022 16 // Update Count : 18 17 17 // 18 18 … … 64 64 #ifndef NOEXPBACK 65 65 // exponential spin 66 for ( volatile unsigned int s; 0 ~ spin ) Pause();66 for ( volatile unsigned int s; 0 ~ spin ) Pause(); 67 67 68 68 // slowly increase by powers of 2 -
libcfa/src/clock.hfa
r9cd5bd2 rdf6cc9d 13 13 // Update Count : 28 14 14 // 15 16 #pragma once 15 17 16 18 #include <time.hfa> -
libcfa/src/common.hfa
r9cd5bd2 rdf6cc9d 10 10 // Created On : Wed Jul 11 17:54:36 2018 11 11 // Last Modified By : Peter A. Buhr 12 // Last Modified On : Wed May 5 14:02:04 202113 // Update Count : 1812 // Last Modified On : Sat Oct 8 08:32:57 2022 13 // Update Count : 23 14 14 // 15 15 … … 31 31 long long int llabs( long long int ); 32 32 } // extern "C" 33 34 static inline { 33 static inline __attribute__((always_inline)) { 35 34 unsigned char abs( signed char v ) { return abs( (int)v ); } 36 35 // use default C routine for int … … 44 43 long double fabsl( long double ); 45 44 } // extern "C" 46 static inline {45 static inline __attribute__((always_inline)) { 47 46 float abs( float x ) { return fabsf( x ); } 48 47 double abs( double x ) { return fabs( x ); } … … 55 54 long double cabsl( long double _Complex ); 56 55 } // extern "C" 57 static inline {56 static inline __attribute__((always_inline)) { 58 57 float abs( float _Complex x ) { return cabsf( x ); } 59 58 double abs( double _Complex x ) { return cabs( x ); } … … 66 65 //--------------------------------------- 67 66 68 static inline {67 static inline __attribute__((always_inline)) { 69 68 char min( char v1, char v2 ) { return v1 < v2 ? v1 : v2; } // optimization 70 69 int min( int v1, int v2 ) { return v1 < v2 ? v1 : v2; } … … 74 73 long long int min( long long int v1, long long int v2 ) { return v1 < v2 ? v1 : v2; } 75 74 unsigned long long int min( unsigned long long int v1, unsigned int v2 ) { return v1 < v2 ? v1 : v2; } 76 forall( T | { int ?<?( T, T ); } ) 75 forall( T | { int ?<?( T, T ); } ) // generic 77 76 T min( T v1, T v2 ) { return v1 < v2 ? v1 : v2; } 78 77 … … 84 83 long long int max( long long int v1, long long int v2 ) { return v1 > v2 ? v1 : v2; } 85 84 unsigned long long int max( unsigned long long int v1, unsigned long long int v2 ) { return v1 > v2 ? v1 : v2; } 86 forall( T | { int ?>?( T, T ); } ) 85 forall( T | { int ?>?( T, T ); } ) // generic 87 86 T max( T v1, T v2 ) { return v1 > v2 ? v1 : v2; } 88 87 -
libcfa/src/concurrency/alarm.cfa
r9cd5bd2 rdf6cc9d 55 55 this.period = period; 56 56 this.thrd = thrd; 57 this. timeval= __kernel_get_time() + alarm;57 this.deadline = __kernel_get_time() + alarm; 58 58 set = false; 59 59 type = User; … … 64 64 this.period = period; 65 65 this.proc = proc; 66 this. timeval= __kernel_get_time() + alarm;66 this.deadline = __kernel_get_time() + alarm; 67 67 set = false; 68 68 type = Kernel; … … 72 72 this.initial = alarm; 73 73 this.period = period; 74 this. timeval= __kernel_get_time() + alarm;74 this.deadline = __kernel_get_time() + alarm; 75 75 set = false; 76 76 type = Callback; … … 85 85 void insert( alarm_list_t * this, alarm_node_t * n ) { 86 86 alarm_node_t * it = & (*this)`first; 87 while( it && (n-> timeval > it->timeval) ) {87 while( it && (n->deadline > it->deadline) ) { 88 88 it = & (*it)`next; 89 89 } … … 116 116 117 117 Time curr = __kernel_get_time(); 118 __cfadbg_print_safe( preemption, " KERNEL: alarm inserting %p (%lu -> %lu).\n", this, curr.tn, this-> timeval.tn );118 __cfadbg_print_safe( preemption, " KERNEL: alarm inserting %p (%lu -> %lu).\n", this, curr.tn, this->deadline.tn ); 119 119 insert( &alarms, this ); 120 __kernel_set_timer( this-> timeval- curr);120 __kernel_set_timer( this->deadline - curr); 121 121 this->set = true; 122 122 } -
libcfa/src/concurrency/alarm.hfa
r9cd5bd2 rdf6cc9d 57 57 }; 58 58 59 Time timeval;// actual time at which the alarm goes off59 Time deadline; // actual time at which the alarm goes off 60 60 enum alarm_type type; // true if this is not a user defined alarm 61 61 bool set :1; // whether or not the alarm has be registered -
libcfa/src/concurrency/io.cfa
r9cd5bd2 rdf6cc9d 201 201 __atomic_unlock(&ctx->cq.lock); 202 202 203 touch_tsc( cltr->sched.io.tscs, ctx->cq.id, ts_prev, ts_next );203 touch_tsc( cltr->sched.io.tscs, ctx->cq.id, ts_prev, ts_next, false ); 204 204 205 205 return true; 206 206 } 207 207 208 bool __cfa_io_drain( processor * proc ) {208 bool __cfa_io_drain( struct processor * proc ) { 209 209 bool local = false; 210 210 bool remote = false; … … 243 243 /* paranoid */ verify( io.tscs[target].t.tv != ULLONG_MAX ); 244 244 HELP: if(target < ctxs_count) { 245 const unsigned long long cutoff = calc_cutoff(ctsc, ctx->cq.id, ctxs_count, io.data, io.tscs, __shard_factor.io);246 const unsigned long long age = moving_average(ctsc, io.tscs[target].t.tv, io.tscs[target].t.ma);245 const __readyQ_avg_t cutoff = calc_cutoff(ctsc, ctx->cq.id, ctxs_count, io.data, io.tscs, __shard_factor.io, false); 246 const __readyQ_avg_t age = moving_average(ctsc, io.tscs[target].t.tv, io.tscs[target].t.ma, false); 247 247 __cfadbg_print_safe(io, "Kernel I/O: Help attempt on %u from %u, age %'llu vs cutoff %'llu, %s\n", target, ctx->cq.id, age, cutoff, age > cutoff ? "yes" : "no"); 248 248 if(age <= cutoff) break HELP; … … 273 273 } 274 274 275 bool __cfa_io_flush( processor * proc ) {275 bool __cfa_io_flush( struct processor * proc ) { 276 276 /* paranoid */ verify( ! __preemption_enabled() ); 277 277 /* paranoid */ verify( proc ); … … 353 353 354 354 disable_interrupts(); 355 processor * proc = __cfaabi_tls.this_processor;355 struct processor * proc = __cfaabi_tls.this_processor; 356 356 io_context$ * ctx = proc->io.ctx; 357 357 /* paranoid */ verify( __cfaabi_tls.this_processor ); … … 433 433 disable_interrupts(); 434 434 __STATS__( true, if(!lazy) io.submit.eagr += 1; ) 435 processor * proc = __cfaabi_tls.this_processor;435 struct processor * proc = __cfaabi_tls.this_processor; 436 436 io_context$ * ctx = proc->io.ctx; 437 437 /* paranoid */ verify( __cfaabi_tls.this_processor ); … … 551 551 enqueue(this.pending, (__outstanding_io&)pa); 552 552 553 wait( pa. sem);553 wait( pa.waitctx ); 554 554 555 555 return pa.ctx; … … 578 578 pa.ctx = ctx; 579 579 580 post( pa. sem);580 post( pa.waitctx ); 581 581 } 582 582 … … 613 613 } 614 614 615 wait( ei. sem);615 wait( ei.waitctx ); 616 616 617 617 __cfadbg_print_safe(io, "Kernel I/O : %u submitted from arbiter\n", have); … … 631 631 __submit_only(&ctx, ei.idxs, ei.have); 632 632 633 post( ei. sem);633 post( ei.waitctx ); 634 634 } 635 635 … … 641 641 642 642 #if defined(CFA_WITH_IO_URING_IDLE) 643 bool __kernel_read( processor * proc, io_future_t & future, iovec & iov, int fd) {643 bool __kernel_read(struct processor * proc, io_future_t & future, iovec & iov, int fd) { 644 644 io_context$ * ctx = proc->io.ctx; 645 645 /* paranoid */ verify( ! __preemption_enabled() ); … … 692 692 } 693 693 694 void __cfa_io_idle( processor * proc ) {694 void __cfa_io_idle( struct processor * proc ) { 695 695 iovec iov; 696 696 __atomic_acquire( &proc->io.ctx->cq.lock ); -
libcfa/src/concurrency/io/types.hfa
r9cd5bd2 rdf6cc9d 107 107 struct __outstanding_io { 108 108 inline Colable; 109 single_sem sem;109 oneshot waitctx; 110 110 }; 111 111 static inline __outstanding_io *& Next( __outstanding_io * n ) { return (__outstanding_io *)Next( (Colable *)n ); } … … 127 127 struct __attribute__((aligned(64))) io_context$ { 128 128 io_arbiter$ * arbiter; 129 processor * proc;129 struct processor * proc; 130 130 131 131 __outstanding_io_queue ext_sq; -
libcfa/src/concurrency/kernel.hfa
r9cd5bd2 rdf6cc9d 136 136 137 137 // Link lists fields 138 inline dlink(processor);138 dlink(processor) link; 139 139 140 140 // special init fields … … 158 158 #endif 159 159 }; 160 P9_EMBEDDED( processor, dlink(processor) ) 160 // P9_EMBEDDED( processor, dlink(processor) ) 161 static inline tytagref( dlink(processor), dlink(processor) ) ?`inner( processor & this ) { 162 dlink(processor) & b = this.link; 163 tytagref( dlink(processor), dlink(processor) ) result = { b }; 164 return result; 165 } 161 166 162 167 void ?{}(processor & this, const char name[], struct cluster & cltr); … … 176 181 177 182 // Aligned timestamps which are used by the ready queue and io subsystem 178 union __attribute__((aligned(64))) __timestamp_t { 179 struct { 180 volatile unsigned long long tv; 181 volatile unsigned long long ma; 182 } t; 183 char __padding[192]; 184 }; 185 186 static inline void ?{}(__timestamp_t & this) { this.t.tv = 0; this.t.ma = 0; } 187 static inline void ^?{}(__timestamp_t &) {} 183 union __attribute__((aligned(64))) __timestamp_t; 184 185 void ?{}(__timestamp_t & this); 186 void ^?{}(__timestamp_t &); 188 187 189 188 -
libcfa/src/concurrency/kernel/cluster.cfa
r9cd5bd2 rdf6cc9d 221 221 static const unsigned __readyq_single_shard = 2; 222 222 223 void ?{}(__timestamp_t & this) { this.t.tv = 0; this.t.ma = 0; } 224 void ^?{}(__timestamp_t &) {} 225 223 226 //----------------------------------------------------------------------- 224 227 // Check that all the intrusive queues in the data structure are still consistent … … 254 257 } 255 258 256 static void assign_list(unsigned & valrq, unsigned & valio, dlist( processor) & list, unsigned count) {257 processor * it = &list`first;259 static void assign_list(unsigned & valrq, unsigned & valio, dlist(struct processor) & list, unsigned count) { 260 struct processor * it = &list`first; 258 261 for(unsigned i = 0; i < count; i++) { 259 262 /* paranoid */ verifyf( it, "Unexpected null iterator, at index %u of %u\n", i, count); … … 278 281 279 282 #if defined(CFA_HAVE_LINUX_IO_URING_H) 280 static void assign_io(io_context$ ** data, size_t count, dlist( processor) & list) {281 processor * it = &list`first;283 static void assign_io(io_context$ ** data, size_t count, dlist(struct processor) & list) { 284 struct processor * it = &list`first; 282 285 while(it) { 283 286 /* paranoid */ verifyf( it, "Unexpected null iterator\n"); -
libcfa/src/concurrency/kernel/cluster.hfa
r9cd5bd2 rdf6cc9d 18 18 #include "device/cpu.hfa" 19 19 #include "kernel/private.hfa" 20 #include "math.hfa" 20 21 21 22 #include <limits.h> 23 #include <inttypes.h> 24 25 #include "clock.hfa" 26 27 #if defined(READYQ_USE_LINEAR_AVG) 28 29 // no conversion needed in this case 30 static inline __readyQ_avg_t __to_readyQ_avg(unsigned long long intsc) { return intsc; } 31 32 // warn normally all ints 33 #define warn_large_before warnf( !strict || old_avg < 33_000_000_000, "Suspiciously large previous average: %'llu (%llx), %'" PRId64 "ms \n", old_avg, old_avg, program()`ms ) 34 #define warn_large_after warnf( !strict || ret < 33_000_000_000, "Suspiciously large new average after %'" PRId64 "ms cputime: %'llu (%llx) from %'llu-%'llu (%'llu, %'llu) and %'llu\n", program()`ms, ret, ret, currtsc, intsc, new_val, new_val / 1000000, old_avg ) 35 36 // 8X linear factor is just 8 * x 37 #define AVG_FACTOR( x ) (8 * (x)) 38 39 #elif defined(READYQ_USE_LOGDBL_AVG) 40 41 // convert to log2 scale but using double 42 static inline __readyQ_avg_t __to_readyQ_avg(unsigned long long intsc) { if(unlikely(0 == intsc)) return 0.0; else return log2(intsc); } 43 44 #define warn_large_before warnf( !strict || old_avg < 35.0, "Suspiciously large previous average: %'lf, %'" PRId64 "ms \n", old_avg, program()`ms ) 45 #define warn_large_after warnf( !strict || ret < 35.3, "Suspiciously large new average after %'" PRId64 "ms cputime: %'lf from %'llu-%'llu (%'llu, %'llu) and %'lf\n", program()`ms, ret, currtsc, intsc, new_val, new_val / 1000000, old_avg ); \ 46 verify(ret >= 0) 47 48 // 8X factor in logscale is log2(8X) = log2(8) + log2(X) = 3 + log2(X) 49 #define AVG_FACTOR( x ) (3.0 + (x)) 50 51 // we need to overload the __atomic_load_n because they don't support double 52 static inline double __atomic_load_n(volatile double * ptr, int mem) { 53 volatile uint64_t * uptr = (volatile uint64_t *)ptr; 54 _Static_assert(sizeof(*uptr) == sizeof(*ptr)); 55 uint64_t ret = 0; 56 ret = __atomic_load_n(uptr, mem); 57 uint64_t *rp = &ret; 58 double ret = *(volatile double *)rp; 59 /* paranoid */ verify( ret == 0 || ret > 3e-100 ); 60 return ret; 61 } 62 63 // we need to overload the __atomic_store_n because they don't support double 64 static inline void __atomic_store_n(volatile double * ptr, double val, int mem) { 65 /* paranoid */ verify( val == 0 || val > 3e-100 ); 66 volatile uint64_t * uptr = (volatile uint64_t *)ptr; 67 _Static_assert(sizeof(*uptr) == sizeof(*ptr)); 68 uint64_t * valp = (uint64_t *)&val; 69 __atomic_store_n(uptr, *valp, mem); 70 } 71 72 #elif defined(READYQ_USE_LOGDBL_AVG) 73 74 //convert to log2 scale but with fix point u32.32 values 75 static inline __readyQ_avg_t __to_readyQ_avg(unsigned long long intsc) { return ulog2_32_32(tsc); } 76 77 // 8X factor, +3 in logscale (see above) is + 0x3.00000000 78 #define AVG_FACTOR( x ) (0x3_00000000ull + (x)) 79 80 #else 81 #error must pick a scheme for averaging 82 #endif 22 83 23 84 //----------------------------------------------------------------------- 24 85 // Calc moving average based on existing average, before and current time. 25 static inline unsigned long long moving_average(unsigned long long currtsc, unsigned long long instsc, unsigned long long old_avg) { 26 /* paranoid */ verifyf( old_avg < 15000000000000, "Suspiciously large previous average: %'llu (%llx)\n", old_avg, old_avg ); 86 static inline __readyQ_avg_t moving_average(unsigned long long currtsc, unsigned long long intsc, __readyQ_avg_t old_avg, bool strict) { 87 (void)strict; // disable the warning around the fact this is unused in release. 88 /* paranoid */ warn_large_before; 27 89 28 const unsigned long long new_val = currtsc > instsc ? currtsc - instsc : 0; 29 const unsigned long long total_weight = 16; 30 const unsigned long long new_weight = 4; 31 const unsigned long long old_weight = total_weight - new_weight; 32 const unsigned long long ret = ((new_weight * new_val) + (old_weight * old_avg)) / total_weight; 90 const unsigned long long new_val = currtsc > intsc ? currtsc - intsc : 0; 91 const __readyQ_avg_t total_weight = 16; 92 const __readyQ_avg_t new_weight = 12; 93 const __readyQ_avg_t old_weight = total_weight - new_weight; 94 const __readyQ_avg_t ret = ((new_weight * __to_readyQ_avg(new_val)) + (old_weight * old_avg)) / total_weight; 95 96 /* paranoid */ warn_large_after; 33 97 return ret; 34 98 } 35 99 36 static inline void touch_tsc(__timestamp_t * tscs, size_t idx, unsigned long long ts_prev, unsigned long long ts_next ) {100 static inline void touch_tsc(__timestamp_t * tscs, size_t idx, unsigned long long ts_prev, unsigned long long ts_next, bool strict) { 37 101 if (ts_next == ULLONG_MAX) return; 38 102 unsigned long long now = rdtscl(); 39 unsigned long longpma = __atomic_load_n(&tscs[ idx ].t.ma, __ATOMIC_RELAXED);103 __readyQ_avg_t pma = __atomic_load_n(&tscs[ idx ].t.ma, __ATOMIC_RELAXED); 40 104 __atomic_store_n(&tscs[ idx ].t.tv, ts_next, __ATOMIC_RELAXED); 41 __atomic_store_n(&tscs[ idx ].t.ma, moving_average(now, ts_prev, pma ), __ATOMIC_RELAXED);105 __atomic_store_n(&tscs[ idx ].t.ma, moving_average(now, ts_prev, pma, strict), __ATOMIC_RELAXED); 42 106 } 43 107 … … 45 109 // Calc age a timestamp should be before needing help. 46 110 forall(Data_t * | { unsigned long long ts(Data_t & this); }) 47 static inline unsigned long longcalc_cutoff(111 static inline __readyQ_avg_t calc_cutoff( 48 112 const unsigned long long ctsc, 49 113 unsigned procid, … … 51 115 Data_t * data, 52 116 __timestamp_t * tscs, 53 const unsigned shard_factor 117 const unsigned shard_factor, 118 bool strict 54 119 ) { 55 120 unsigned start = procid; 56 unsigned long longmax = 0;121 __readyQ_avg_t max = 0; 57 122 for(i; shard_factor) { 58 123 unsigned long long ptsc = ts(data[start + i]); 59 124 if(ptsc != ULLONG_MAX) { 60 125 /* paranoid */ verify( start + i < count ); 61 unsigned long long tsc = moving_average(ctsc, ptsc, tscs[start + i].t.ma);62 if( tsc > max) max = tsc;126 __readyQ_avg_t avg = moving_average(ctsc, ptsc, tscs[start + i].t.ma, strict); 127 if(avg > max) max = avg; 63 128 } 64 129 } 65 return 8 * max;130 return AVG_FACTOR( max ); 66 131 } 67 132 -
libcfa/src/concurrency/kernel/fwd.hfa
r9cd5bd2 rdf6cc9d 276 276 // intented to be use by wait, wait_any, waitfor, etc. rather than used directly 277 277 bool retract( future_t & this, oneshot & wait_ctx ) { 278 struct oneshot * expected = this.ptr;278 struct oneshot * expected = &wait_ctx; 279 279 280 280 // attempt to remove the context so it doesn't get consumed. -
libcfa/src/concurrency/kernel/private.hfa
r9cd5bd2 rdf6cc9d 50 50 #endif 51 51 #endif 52 // #define READYQ_USE_LINEAR_AVG 53 #define READYQ_USE_LOGDBL_AVG 54 // #define READYQ_USE_LOGINT_AVG 55 56 #if defined(READYQ_USE_LINEAR_AVG) 57 typedef unsigned long long __readyQ_avg_t; 58 #elif defined(READYQ_USE_LOGDBL_AVG) 59 typedef double __readyQ_avg_t; 60 #elif defined(READYQ_USE_LOGDBL_AVG) 61 typedef unsigned long long __readyQ_avg_t; 62 #else 63 #error must pick a scheme for averaging 64 #endif 52 65 53 66 extern "C" { … … 65 78 //----------------------------------------------------------------------------- 66 79 // Scheduler 80 union __attribute__((aligned(64))) __timestamp_t { 81 struct { 82 volatile unsigned long long tv; 83 volatile __readyQ_avg_t ma; 84 } t; 85 char __padding[192]; 86 }; 87 67 88 extern "C" { 68 89 void disable_interrupts() OPTIONAL_THREAD; -
libcfa/src/concurrency/kernel/startup.cfa
r9cd5bd2 rdf6cc9d 184 184 185 185 186 extern void heapManagerCtor(); 187 extern void heapManagerDtor(); 188 186 189 //============================================================================================= 187 190 // Kernel Setup logic … … 374 377 proc->local_data = &__cfaabi_tls; 375 378 379 heapManagerCtor(); // initialize heap 380 376 381 __cfa_io_start( proc ); 377 382 register_tls( proc ); … … 425 430 unregister_tls( proc ); 426 431 __cfa_io_stop( proc ); 432 433 heapManagerDtor(); // de-initialize heap 427 434 428 435 return 0p; -
libcfa/src/concurrency/preemption.cfa
r9cd5bd2 rdf6cc9d 104 104 static inline alarm_node_t * get_expired( alarm_list_t * alarms, Time currtime ) { 105 105 if( ! & (*alarms)`first ) return 0p; // If no alarms return null 106 if( (*alarms)`first. timeval>= currtime ) return 0p; // If alarms head not expired return null106 if( (*alarms)`first.deadline >= currtime ) return 0p; // If alarms head not expired return null 107 107 return pop(alarms); // Otherwise just pop head 108 108 } … … 140 140 if( period > 0 ) { 141 141 __cfadbg_print_buffer_local( preemption, " KERNEL: alarm period is %lu.\n", period`ns ); 142 node-> timeval= currtime + period; // Alarm is periodic, add currtime to it (used cached current time)142 node->deadline = currtime + period; // Alarm is periodic, add currtime to it (used cached current time) 143 143 insert( alarms, node ); // Reinsert the node for the next time it triggers 144 144 } … … 147 147 // If there are still alarms pending, reset the timer 148 148 if( & (*alarms)`first ) { 149 Duration delta = (*alarms)`first. timeval- currtime;149 Duration delta = (*alarms)`first.deadline - currtime; 150 150 __kernel_set_timer( delta ); 151 151 } … … 232 232 // available. 233 233 234 //-----------------------------------------------------------------------------235 // Some assembly required236 #define __cfaasm_label(label, when) when: asm volatile goto(".global __cfaasm_" #label "_" #when "\n" "__cfaasm_" #label "_" #when ":":::"memory":when)237 238 234 //---------- 239 235 // special case for preemption since used often 240 __attribute__((optimize("no-reorder-blocks"))) bool __preemption_enabled() libcfa_nopreempt libcfa_public { 241 // create a assembler label before 242 // marked as clobber all to avoid movement 243 __cfaasm_label(check, before); 244 236 bool __preemption_enabled() libcfa_nopreempt libcfa_public { 245 237 // access tls as normal 246 bool enabled = __cfaabi_tls.preemption_state.enabled; 247 248 // Check if there is a pending preemption 249 processor * proc = __cfaabi_tls.this_processor; 250 bool pending = proc ? proc->pending_preemption : false; 251 if( enabled && pending ) proc->pending_preemption = false; 252 253 // create a assembler label after 254 // marked as clobber all to avoid movement 255 __cfaasm_label(check, after); 256 257 // If we can preempt and there is a pending one 258 // this is a good time to yield 259 if( enabled && pending ) { 260 force_yield( __POLL_PREEMPTION ); 261 } 262 return enabled; 263 } 264 265 struct asm_region { 266 void * before; 267 void * after; 268 }; 269 270 static inline bool __cfaasm_in( void * ip, struct asm_region & region ) { 271 return ip >= region.before && ip <= region.after; 238 return __cfaabi_tls.preemption_state.enabled; 272 239 } 273 240 … … 293 260 uintptr_t __cfatls_get( unsigned long int offset ) libcfa_nopreempt libcfa_public; //no inline to avoid problems 294 261 uintptr_t __cfatls_get( unsigned long int offset ) { 295 // create a assembler label before296 // marked as clobber all to avoid movement297 __cfaasm_label(get, before);298 299 262 // access tls as normal (except for pointer arithmetic) 300 263 uintptr_t val = *(uintptr_t*)((uintptr_t)&__cfaabi_tls + offset); 301 264 302 // create a assembler label after303 // marked as clobber all to avoid movement304 __cfaasm_label(get, after);305 306 265 // This is used everywhere, to avoid cost, we DO NOT poll pending preemption 307 266 return val; … … 310 269 extern "C" { 311 270 // Disable interrupts by incrementing the counter 312 void disable_interrupts() libcfa_nopreempt libcfa_public { 313 // create a assembler label before 314 // marked as clobber all to avoid movement 315 __cfaasm_label(dsable, before); 316 317 with( __cfaabi_tls.preemption_state ) { 318 #if GCC_VERSION > 50000 319 static_assert(__atomic_always_lock_free(sizeof(enabled), &enabled), "Must be lock-free"); 320 #endif 321 322 // Set enabled flag to false 323 // should be atomic to avoid preemption in the middle of the operation. 324 // use memory order RELAXED since there is no inter-thread on this variable requirements 325 __atomic_store_n(&enabled, false, __ATOMIC_RELAXED); 326 327 // Signal the compiler that a fence is needed but only for signal handlers 328 __atomic_signal_fence(__ATOMIC_ACQUIRE); 329 330 __attribute__((unused)) unsigned short new_val = disable_count + 1; 331 disable_count = new_val; 332 verify( new_val < 65_000u ); // If this triggers someone is disabling interrupts without enabling them 333 } 334 335 // create a assembler label after 336 // marked as clobber all to avoid movement 337 __cfaasm_label(dsable, after); 338 271 void disable_interrupts() libcfa_nopreempt libcfa_public with( __cfaabi_tls.preemption_state ) { 272 #if GCC_VERSION > 50000 273 static_assert(__atomic_always_lock_free(sizeof(enabled), &enabled), "Must be lock-free"); 274 #endif 275 276 // Set enabled flag to false 277 // should be atomic to avoid preemption in the middle of the operation. 278 // use memory order RELAXED since there is no inter-thread on this variable requirements 279 __atomic_store_n(&enabled, false, __ATOMIC_RELAXED); 280 281 // Signal the compiler that a fence is needed but only for signal handlers 282 __atomic_signal_fence(__ATOMIC_ACQUIRE); 283 284 __attribute__((unused)) unsigned short new_val = disable_count + 1; 285 disable_count = new_val; 286 verify( new_val < 65_000u ); // If this triggers someone is disabling interrupts without enabling them 339 287 } 340 288 … … 379 327 // i.e. on a real processor and not in the kernel 380 328 // (can return true even if no preemption was pending) 381 bool poll_interrupts() libcfa_ public {329 bool poll_interrupts() libcfa_nopreempt libcfa_public { 382 330 // Cache the processor now since interrupts can start happening after the atomic store 383 processor * proc = publicTLS_get( this_processor );331 processor * proc = __cfaabi_tls.this_processor; 384 332 if ( ! proc ) return false; 385 if ( ! __preemption_enabled() ) return false; 386 387 with( __cfaabi_tls.preemption_state ){ 388 // Signal the compiler that a fence is needed but only for signal handlers 389 __atomic_signal_fence(__ATOMIC_RELEASE); 390 if( proc->pending_preemption ) { 391 proc->pending_preemption = false; 392 force_yield( __POLL_PREEMPTION ); 393 } 333 if ( ! __cfaabi_tls.preemption_state.enabled ) return false; 334 335 // Signal the compiler that a fence is needed but only for signal handlers 336 __atomic_signal_fence(__ATOMIC_RELEASE); 337 if( unlikely( proc->pending_preemption ) ) { 338 proc->pending_preemption = false; 339 force_yield( __POLL_PREEMPTION ); 394 340 } 395 341 -
libcfa/src/concurrency/ready_queue.cfa
r9cd5bd2 rdf6cc9d 62 62 //----------------------------------------------------------------------- 63 63 __attribute__((hot)) void push(struct cluster * cltr, struct thread$ * thrd, unpark_hint hint) with (cltr->sched) { 64 processor * const proc = kernelTLS().this_processor;64 struct processor * const proc = kernelTLS().this_processor; 65 65 const bool external = (!proc) || (cltr != proc->cltr); 66 66 const bool remote = hint == UNPARK_REMOTE; … … 116 116 /* paranoid */ verify( kernelTLS().this_processor->rdq.id < lanes_count ); 117 117 118 processor * const proc = kernelTLS().this_processor;118 struct processor * const proc = kernelTLS().this_processor; 119 119 unsigned this = proc->rdq.id; 120 120 /* paranoid */ verify( this < lanes_count ); … … 139 139 /* paranoid */ verify( readyQ.tscs[target].t.tv != ULLONG_MAX ); 140 140 if(target < lanes_count) { 141 const unsigned long long cutoff = calc_cutoff(ctsc, proc->rdq.id, lanes_count, cltr->sched.readyQ.data, cltr->sched.readyQ.tscs, __shard_factor.readyq);142 const unsigned long long age = moving_average(ctsc, readyQ.tscs[target].t.tv, readyQ.tscs[target].t.ma);141 const __readyQ_avg_t cutoff = calc_cutoff(ctsc, proc->rdq.id, lanes_count, cltr->sched.readyQ.data, cltr->sched.readyQ.tscs, __shard_factor.readyq, true); 142 const __readyQ_avg_t age = moving_average(ctsc, readyQ.tscs[target].t.tv, readyQ.tscs[target].t.ma, false); 143 143 __cfadbg_print_safe(ready_queue, "Kernel : Help attempt on %u from %u, age %'llu vs cutoff %'llu, %s\n", target, this, age, cutoff, age > cutoff ? "yes" : "no"); 144 144 if(age > cutoff) { … … 214 214 __STATS( stats.success++; ) 215 215 216 touch_tsc(readyQ.tscs, w, ts_prev, ts_next );216 touch_tsc(readyQ.tscs, w, ts_prev, ts_next, true); 217 217 218 218 thrd->preferred = w / __shard_factor.readyq; -
libcfa/src/containers/array.hfa
r9cd5bd2 rdf6cc9d 27 27 // - Given bug of Trac #247, CFA gives sizeof expressions type unsigned long int, when it 28 28 // should give them type size_t. 29 // 29 // 30 30 // gcc -m32 cfa -m32 given bug gcc -m64 31 31 // ptrdiff_t int int long int … … 39 39 } 40 40 41 static inline const Timmed & ?[?]( const arpk(N, S, Timmed, Tbase) & a, int i ) { 42 assert( i < N ); 43 return (Timmed &) a.strides[i]; 44 } 45 41 46 static inline Timmed & ?[?]( arpk(N, S, Timmed, Tbase) & a, unsigned int i ) { 47 assert( i < N ); 48 return (Timmed &) a.strides[i]; 49 } 50 51 static inline const Timmed & ?[?]( const arpk(N, S, Timmed, Tbase) & a, unsigned int i ) { 42 52 assert( i < N ); 43 53 return (Timmed &) a.strides[i]; … … 49 59 } 50 60 61 static inline const Timmed & ?[?]( const arpk(N, S, Timmed, Tbase) & a, long int i ) { 62 assert( i < N ); 63 return (Timmed &) a.strides[i]; 64 } 65 51 66 static inline Timmed & ?[?]( arpk(N, S, Timmed, Tbase) & a, unsigned long int i ) { 67 assert( i < N ); 68 return (Timmed &) a.strides[i]; 69 } 70 71 static inline const Timmed & ?[?]( const arpk(N, S, Timmed, Tbase) & a, unsigned long int i ) { 52 72 assert( i < N ); 53 73 return (Timmed &) a.strides[i]; … … 83 103 // Make a FOREACH macro 84 104 #define FE_0(WHAT) 85 #define FE_1(WHAT, X) WHAT(X) 105 #define FE_1(WHAT, X) WHAT(X) 86 106 #define FE_2(WHAT, X, ...) WHAT(X)FE_1(WHAT, __VA_ARGS__) 87 107 #define FE_3(WHAT, X, ...) WHAT(X)FE_2(WHAT, __VA_ARGS__) … … 90 110 //... repeat as needed 91 111 92 #define GET_MACRO(_0,_1,_2,_3,_4,_5,NAME,...) NAME 112 #define GET_MACRO(_0,_1,_2,_3,_4,_5,NAME,...) NAME 93 113 #define FOR_EACH(action,...) \ 94 114 GET_MACRO(_0,__VA_ARGS__,FE_5,FE_4,FE_3,FE_2,FE_1,FE_0)(action,__VA_ARGS__) … … 115 135 } 116 136 117 #else 137 #else 118 138 119 139 // Workaround form. Listing all possibilities up to 4 dims. -
libcfa/src/device/cpu.cfa
r9cd5bd2 rdf6cc9d 359 359 int idxs = count_cache_indexes(); 360 360 361 // Do we actually have a cache? 362 if(idxs == 0) { 363 // if not just fake the data structure, it makes things easier. 364 cpu_info.hthrd_count = cpus_c; 365 cpu_info.llc_count = 0; 366 struct cpu_map_entry_t * entries = alloc(cpu_info.hthrd_count); 367 for(i; cpu_info.hthrd_count) { 368 entries[i].self = i; 369 entries[i].start = 0; 370 entries[i].count = cpu_info.hthrd_count; 371 entries[i].cache = 0; 372 } 373 cpu_info.llc_map = entries; 374 return; 375 } 376 361 377 // Count actual cache levels 362 378 unsigned cache_levels = 0; 363 379 unsigned llc = 0; 364 if (idxs != 0) { 365 unsigned char prev = -1u; 366 void first(unsigned idx, unsigned char level, const char * map, size_t len) { 367 /* paranoid */ verifyf(level < prev, "Index %u of cpu 0 has cache levels out of order: %u then %u", idx, (unsigned)prev, (unsigned)level); 368 llc = max(llc, level); 369 prev = level; 370 cache_levels++; 371 } 372 foreach_cacheidx(0, idxs, first); 373 } 380 381 unsigned char prev = -1u; 382 void first(unsigned idx, unsigned char level, const char * map, size_t len) { 383 /* paranoid */ verifyf(level < prev, "Index %u of cpu 0 has cache levels out of order: %u then %u", idx, (unsigned)prev, (unsigned)level); 384 llc = max(llc, level); 385 prev = level; 386 cache_levels++; 387 } 388 foreach_cacheidx(0, idxs, first); 374 389 375 390 // Read in raw data -
libcfa/src/heap.cfa
r9cd5bd2 rdf6cc9d 10 10 // Created On : Tue Dec 19 21:58:35 2017 11 11 // Last Modified By : Peter A. Buhr 12 // Last Modified On : Fri Apr 29 19:05:03202213 // Update Count : 1 16712 // Last Modified On : Thu Oct 13 22:21:52 2022 13 // Update Count : 1557 14 14 // 15 15 16 #include <stdio.h> 16 17 #include <string.h> // memset, memcpy 17 18 #include <limits.h> // ULONG_MAX … … 21 22 #include <malloc.h> // memalign, malloc_usable_size 22 23 #include <sys/mman.h> // mmap, munmap 24 extern "C" { 23 25 #include <sys/sysinfo.h> // get_nprocs 26 } // extern "C" 24 27 25 28 #include "bits/align.hfa" // libAlign 26 29 #include "bits/defs.hfa" // likely, unlikely 27 #include " bits/locks.hfa" // __spinlock_t30 #include "concurrency/kernel/fwd.hfa" // __POLL_PREEMPTION 28 31 #include "startup.hfa" // STARTUP_PRIORITY_MEMORY 29 #include "math.hfa" // min32 #include "math.hfa" // ceiling, min 30 33 #include "bitmanip.hfa" // is_pow2, ceiling2 31 34 32 #define FASTLOOKUP 33 #define __STATISTICS__ 35 // supported mallopt options 36 #ifndef M_MMAP_THRESHOLD 37 #define M_MMAP_THRESHOLD (-1) 38 #endif // M_MMAP_THRESHOLD 39 40 #ifndef M_TOP_PAD 41 #define M_TOP_PAD (-2) 42 #endif // M_TOP_PAD 43 44 #define FASTLOOKUP // use O(1) table lookup from allocation size to bucket size 45 #define RETURNSPIN // toggle spinlock / lockfree stack 46 #define OWNERSHIP // return freed memory to owner thread 47 48 #define CACHE_ALIGN 64 49 #define CALIGN __attribute__(( aligned(CACHE_ALIGN) )) 50 51 #define TLSMODEL __attribute__(( tls_model("initial-exec") )) 52 53 //#define __STATISTICS__ 54 55 enum { 56 // The default extension heap amount in units of bytes. When the current heap reaches the brk address, the brk 57 // address is extended by the extension amount. 58 __CFA_DEFAULT_HEAP_EXPANSION__ = 10 * 1024 * 1024, 59 60 // The mmap crossover point during allocation. Allocations less than this amount are allocated from buckets; values 61 // greater than or equal to this value are mmap from the operating system. 62 __CFA_DEFAULT_MMAP_START__ = 512 * 1024 + 1, 63 64 // The default unfreed storage amount in units of bytes. When the uC++ program ends it subtracts this amount from 65 // the malloc/free counter to adjust for storage the program does not free. 66 __CFA_DEFAULT_HEAP_UNFREED__ = 0 67 }; // enum 68 69 70 //####################### Heap Trace/Print #################### 34 71 35 72 … … 55 92 static bool prtFree = false; 56 93 57 staticbool prtFree() {94 bool prtFree() { 58 95 return prtFree; 59 96 } // prtFree 60 97 61 staticbool prtFreeOn() {98 bool prtFreeOn() { 62 99 bool temp = prtFree; 63 100 prtFree = true; … … 65 102 } // prtFreeOn 66 103 67 staticbool prtFreeOff() {104 bool prtFreeOff() { 68 105 bool temp = prtFree; 69 106 prtFree = false; … … 72 109 73 110 74 enum { 75 // The default extension heap amount in units of bytes. When the current heap reaches the brk address, the brk 76 // address is extended by the extension amount. 77 __CFA_DEFAULT_HEAP_EXPANSION__ = 10 * 1024 * 1024, 78 79 // The mmap crossover point during allocation. Allocations less than this amount are allocated from buckets; values 80 // greater than or equal to this value are mmap from the operating system. 81 __CFA_DEFAULT_MMAP_START__ = 512 * 1024 + 1, 82 83 // The default unfreed storage amount in units of bytes. When the uC++ program ends it subtracts this amount from 84 // the malloc/free counter to adjust for storage the program does not free. 85 __CFA_DEFAULT_HEAP_UNFREED__ = 0 86 }; // enum 111 //######################### Spin Lock ######################### 112 113 114 // pause to prevent excess processor bus usage 115 #if defined( __i386 ) || defined( __x86_64 ) 116 #define Pause() __asm__ __volatile__ ( "pause" : : : ) 117 #elif defined(__ARM_ARCH) 118 #define Pause() __asm__ __volatile__ ( "YIELD" : : : ) 119 #else 120 #error unsupported architecture 121 #endif 122 123 typedef volatile uintptr_t SpinLock_t CALIGN; // aligned addressable word-size 124 125 static inline __attribute__((always_inline)) void lock( volatile SpinLock_t & slock ) { 126 enum { SPIN_START = 4, SPIN_END = 64 * 1024, }; 127 unsigned int spin = SPIN_START; 128 129 for ( unsigned int i = 1;; i += 1 ) { 130 if ( slock == 0 && __atomic_test_and_set( &slock, __ATOMIC_SEQ_CST ) == 0 ) break; // Fence 131 for ( volatile unsigned int s = 0; s < spin; s += 1 ) Pause(); // exponential spin 132 spin += spin; // powers of 2 133 //if ( i % 64 == 0 ) spin += spin; // slowly increase by powers of 2 134 if ( spin > SPIN_END ) spin = SPIN_END; // cap spinning 135 } // for 136 } // spin_lock 137 138 static inline __attribute__((always_inline)) void unlock( volatile SpinLock_t & slock ) { 139 __atomic_clear( &slock, __ATOMIC_SEQ_CST ); // Fence 140 } // spin_unlock 87 141 88 142 … … 120 174 unsigned int free_calls, free_null_calls; 121 175 unsigned long long int free_storage_request, free_storage_alloc; 122 unsigned int away_pulls, away_pushes;123 unsigned long long int away_storage_request, away_storage_alloc;176 unsigned int return_pulls, return_pushes; 177 unsigned long long int return_storage_request, return_storage_alloc; 124 178 unsigned int mmap_calls, mmap_0_calls; // no zero calls 125 179 unsigned long long int mmap_storage_request, mmap_storage_alloc; … … 131 185 132 186 static_assert( sizeof(HeapStatistics) == CntTriples * sizeof(StatsOverlay), 133 187 "Heap statistics counter-triplets does not match with array size" ); 134 188 135 189 static void HeapStatisticsCtor( HeapStatistics & stats ) { … … 203 257 static_assert( libAlign() >= sizeof( Storage ), "minimum alignment < sizeof( Storage )" ); 204 258 205 struct FreeHeader {206 size_t blockSize __attribute__(( aligned 259 struct __attribute__(( aligned (8) )) FreeHeader { 260 size_t blockSize __attribute__(( aligned(8) )); // size of allocations on this list 207 261 #if BUCKETLOCK == SPINLOCK 208 __spinlock_t lock; 209 Storage * freeList; 262 #ifdef OWNERSHIP 263 #ifdef RETURNSPIN 264 SpinLock_t returnLock; 265 #endif // RETURNSPIN 266 Storage * returnList; // other thread return list 267 #endif // OWNERSHIP 268 Storage * freeList; // thread free list 210 269 #else 211 270 StackLF(Storage) freeList; 212 271 #endif // BUCKETLOCK 213 } __attribute__(( aligned (8) )); // FreeHeader 272 Heap * homeManager; // heap owner (free storage to bucket, from bucket to heap) 273 }; // FreeHeader 214 274 215 275 FreeHeader freeLists[NoBucketSizes]; // buckets for different allocation sizes 216 217 __spinlock_t extlock; // protects allocation-buffer extension 218 void * heapBegin; // start of heap 219 void * heapEnd; // logical end of heap 220 size_t heapRemaining; // amount of storage not allocated in the current chunk 276 void * heapBuffer; // start of free storage in buffer 277 size_t heapReserve; // amount of remaining free storage in buffer 278 279 #if defined( __STATISTICS__ ) || defined( __CFA_DEBUG__ ) 280 Heap * nextHeapManager; // intrusive link of existing heaps; traversed to collect statistics or check unfreed storage 281 #endif // __STATISTICS__ || __CFA_DEBUG__ 282 Heap * nextFreeHeapManager; // intrusive link of free heaps from terminated threads; reused by new threads 283 284 #ifdef __CFA_DEBUG__ 285 int64_t allocUnfreed; // running total of allocations minus frees; can be negative 286 #endif // __CFA_DEBUG__ 287 288 #ifdef __STATISTICS__ 289 HeapStatistics stats; // local statistic table for this heap 290 #endif // __STATISTICS__ 221 291 }; // Heap 222 292 223 293 #if BUCKETLOCK == LOCKFREE 224 static inline { 294 inline __attribute__((always_inline)) 295 static { 225 296 Link(Heap.Storage) * ?`next( Heap.Storage * this ) { return &this->header.kind.real.next; } 226 297 void ?{}( Heap.FreeHeader & ) {} … … 229 300 #endif // LOCKFREE 230 301 231 static inline size_t getKey( const Heap.FreeHeader & freeheader ) { return freeheader.blockSize; } 302 303 struct HeapMaster { 304 SpinLock_t extLock; // protects allocation-buffer extension 305 SpinLock_t mgrLock; // protects freeHeapManagersList, heapManagersList, heapManagersStorage, heapManagersStorageEnd 306 307 void * heapBegin; // start of heap 308 void * heapEnd; // logical end of heap 309 size_t heapRemaining; // amount of storage not allocated in the current chunk 310 size_t pageSize; // architecture pagesize 311 size_t heapExpand; // sbrk advance 312 size_t mmapStart; // cross over point for mmap 313 unsigned int maxBucketsUsed; // maximum number of buckets in use 314 315 Heap * heapManagersList; // heap-list head 316 Heap * freeHeapManagersList; // free-list head 317 318 // Heap superblocks are not linked; heaps in superblocks are linked via intrusive links. 319 Heap * heapManagersStorage; // next heap to use in heap superblock 320 Heap * heapManagersStorageEnd; // logical heap outside of superblock's end 321 322 #ifdef __STATISTICS__ 323 HeapStatistics stats; // global stats for thread-local heaps to add there counters when exiting 324 unsigned long int threads_started, threads_exited; // counts threads that have started and exited 325 unsigned long int reused_heap, new_heap; // counts reusability of heaps 326 unsigned int sbrk_calls; 327 unsigned long long int sbrk_storage; 328 int stats_fd; 329 #endif // __STATISTICS__ 330 }; // HeapMaster 232 331 233 332 234 333 #ifdef FASTLOOKUP 235 enum { LookupSizes = 65_536 + sizeof(Heap.Storage) }; 334 enum { LookupSizes = 65_536 + sizeof(Heap.Storage) }; // number of fast lookup sizes 236 335 static unsigned char lookup[LookupSizes]; // O(1) lookup for small sizes 237 336 #endif // FASTLOOKUP 238 337 239 static const off_t mmapFd = -1; // fake or actual fd for anonymous file 240 #ifdef __CFA_DEBUG__ 241 static bool heapBoot = 0; // detect recursion during boot 242 #endif // __CFA_DEBUG__ 338 static volatile bool heapMasterBootFlag = false; // trigger for first heap 339 static HeapMaster heapMaster @= {}; // program global 340 341 static void heapMasterCtor(); 342 static void heapMasterDtor(); 343 static Heap * getHeap(); 243 344 244 345 … … 268 369 static_assert( NoBucketSizes == sizeof(bucketSizes) / sizeof(bucketSizes[0] ), "size of bucket array wrong" ); 269 370 270 // The constructor for heapManager is called explicitly in memory_startup. 271 static Heap heapManager __attribute__(( aligned (128) )) @= {}; // size of cache line to prevent false sharing 371 372 // extern visibility, used by runtime kernel 373 libcfa_public size_t __page_size; // architecture pagesize 374 libcfa_public int __map_prot; // common mmap/mprotect protection 375 376 377 // Thread-local storage is allocated lazily when the storage is accessed. 378 static __thread size_t PAD1 CALIGN TLSMODEL __attribute__(( unused )); // protect false sharing 379 static __thread Heap * volatile heapManager CALIGN TLSMODEL; 380 static __thread size_t PAD2 CALIGN TLSMODEL __attribute__(( unused )); // protect further false sharing 381 382 383 // declare helper functions for HeapMaster 384 void noMemory(); // forward, called by "builtin_new" when malloc returns 0 385 386 387 // generic Bsearchl does not inline, so substitute with hand-coded binary-search. 388 inline __attribute__((always_inline)) 389 static size_t Bsearchl( unsigned int key, const unsigned int vals[], size_t dim ) { 390 size_t l = 0, m, h = dim; 391 while ( l < h ) { 392 m = (l + h) / 2; 393 if ( (unsigned int &)(vals[m]) < key ) { // cast away const 394 l = m + 1; 395 } else { 396 h = m; 397 } // if 398 } // while 399 return l; 400 } // Bsearchl 401 402 403 void heapMasterCtor() with( heapMaster ) { 404 // Singleton pattern to initialize heap master 405 406 verify( bucketSizes[0] == (16 + sizeof(Heap.Storage)) ); 407 408 __page_size = sysconf( _SC_PAGESIZE ); 409 __map_prot = PROT_READ | PROT_WRITE | PROT_EXEC; 410 411 ?{}( extLock ); 412 ?{}( mgrLock ); 413 414 char * end = (char *)sbrk( 0 ); 415 heapBegin = heapEnd = sbrk( (char *)ceiling2( (long unsigned int)end, libAlign() ) - end ); // move start of heap to multiple of alignment 416 heapRemaining = 0; 417 heapExpand = malloc_expansion(); 418 mmapStart = malloc_mmap_start(); 419 420 // find the closest bucket size less than or equal to the mmapStart size 421 maxBucketsUsed = Bsearchl( mmapStart, bucketSizes, NoBucketSizes ); // binary search 422 423 verify( (mmapStart >= pageSize) && (bucketSizes[NoBucketSizes - 1] >= mmapStart) ); 424 verify( maxBucketsUsed < NoBucketSizes ); // subscript failure ? 425 verify( mmapStart <= bucketSizes[maxBucketsUsed] ); // search failure ? 426 427 heapManagersList = 0p; 428 freeHeapManagersList = 0p; 429 430 heapManagersStorage = 0p; 431 heapManagersStorageEnd = 0p; 432 433 #ifdef __STATISTICS__ 434 HeapStatisticsCtor( stats ); // clear statistic counters 435 threads_started = threads_exited = 0; 436 reused_heap = new_heap = 0; 437 sbrk_calls = sbrk_storage = 0; 438 stats_fd = STDERR_FILENO; 439 #endif // __STATISTICS__ 440 441 #ifdef FASTLOOKUP 442 for ( unsigned int i = 0, idx = 0; i < LookupSizes; i += 1 ) { 443 if ( i > bucketSizes[idx] ) idx += 1; 444 lookup[i] = idx; 445 verify( i <= bucketSizes[idx] ); 446 verify( (i <= 32 && idx == 0) || (i > bucketSizes[idx - 1]) ); 447 } // for 448 #endif // FASTLOOKUP 449 450 heapMasterBootFlag = true; 451 } // heapMasterCtor 452 453 454 #define NO_MEMORY_MSG "**** Error **** insufficient heap memory available to allocate %zd new bytes." 455 456 Heap * getHeap() with( heapMaster ) { 457 Heap * heap; 458 if ( freeHeapManagersList ) { // free heap for reused ? 459 heap = freeHeapManagersList; 460 freeHeapManagersList = heap->nextFreeHeapManager; 461 462 #ifdef __STATISTICS__ 463 reused_heap += 1; 464 #endif // __STATISTICS__ 465 } else { // free heap not found, create new 466 // Heap size is about 12K, FreeHeader (128 bytes because of cache alignment) * NoBucketSizes (91) => 128 heaps * 467 // 12K ~= 120K byte superblock. Where 128-heap superblock handles a medium sized multi-processor server. 468 size_t remaining = heapManagersStorageEnd - heapManagersStorage; // remaining free heaps in superblock 469 if ( ! heapManagersStorage || remaining != 0 ) { 470 // Each block of heaps is a multiple of the number of cores on the computer. 471 int HeapDim = get_nprocs(); // get_nprocs_conf does not work 472 size_t size = HeapDim * sizeof( Heap ); 473 474 heapManagersStorage = (Heap *)mmap( 0, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0 ); 475 if ( unlikely( heapManagersStorage == (Heap *)MAP_FAILED ) ) { // failed ? 476 if ( errno == ENOMEM ) abort( NO_MEMORY_MSG, size ); // no memory 477 // Do not call strerror( errno ) as it may call malloc. 478 abort( "**** Error **** attempt to allocate block of heaps of size %zu bytes and mmap failed with errno %d.", size, errno ); 479 } // if 480 heapManagersStorageEnd = &heapManagersStorage[HeapDim]; // outside array 481 } // if 482 483 heap = heapManagersStorage; 484 heapManagersStorage = heapManagersStorage + 1; // bump next heap 485 486 #if defined( __STATISTICS__ ) || defined( __CFA_DEBUG__ ) 487 heap->nextHeapManager = heapManagersList; 488 #endif // __STATISTICS__ || __CFA_DEBUG__ 489 heapManagersList = heap; 490 491 #ifdef __STATISTICS__ 492 new_heap += 1; 493 #endif // __STATISTICS__ 494 495 with( *heap ) { 496 for ( unsigned int j = 0; j < NoBucketSizes; j += 1 ) { // initialize free lists 497 #ifdef OWNERSHIP 498 #ifdef RETURNSPIN 499 ?{}( freeLists[j].returnLock ); 500 #endif // RETURNSPIN 501 freeLists[j].returnList = 0p; 502 #endif // OWNERSHIP 503 freeLists[j].freeList = 0p; 504 freeLists[j].homeManager = heap; 505 freeLists[j].blockSize = bucketSizes[j]; 506 } // for 507 508 heapBuffer = 0p; 509 heapReserve = 0; 510 nextFreeHeapManager = 0p; 511 #ifdef __CFA_DEBUG__ 512 allocUnfreed = 0; 513 #endif // __CFA_DEBUG__ 514 } // with 515 } // if 516 517 return heap; 518 } // getHeap 519 520 521 void heapManagerCtor() libcfa_public { 522 if ( unlikely( ! heapMasterBootFlag ) ) heapMasterCtor(); 523 524 lock( heapMaster.mgrLock ); // protect heapMaster counters 525 526 // get storage for heap manager 527 528 heapManager = getHeap(); 529 530 #ifdef __STATISTICS__ 531 HeapStatisticsCtor( heapManager->stats ); // heap local 532 heapMaster.threads_started += 1; 533 #endif // __STATISTICS__ 534 535 unlock( heapMaster.mgrLock ); 536 } // heapManagerCtor 537 538 539 void heapManagerDtor() libcfa_public { 540 lock( heapMaster.mgrLock ); 541 542 // place heap on list of free heaps for reusability 543 heapManager->nextFreeHeapManager = heapMaster.freeHeapManagersList; 544 heapMaster.freeHeapManagersList = heapManager; 545 546 #ifdef __STATISTICS__ 547 heapMaster.threads_exited += 1; 548 #endif // __STATISTICS__ 549 550 // Do not set heapManager to NULL because it is used after Cforall is shutdown but before the program shuts down. 551 552 unlock( heapMaster.mgrLock ); 553 } // heapManagerDtor 272 554 273 555 274 556 //####################### Memory Allocation Routines Helpers #################### 275 557 276 277 #ifdef __CFA_DEBUG__278 static size_t allocUnfreed; // running total of allocations minus frees279 280 static void prtUnfreed() {281 if ( allocUnfreed != 0 ) {282 // DO NOT USE STREAMS AS THEY MAY BE UNAVAILABLE AT THIS POINT.283 char helpText[512];284 __cfaabi_bits_print_buffer( STDERR_FILENO, helpText, sizeof(helpText),285 "CFA warning (UNIX pid:%ld) : program terminating with %zu(0x%zx) bytes of storage allocated but not freed.\n"286 "Possible cause is unfreed storage allocated by the program or system/library routines called from the program.\n",287 (long int)getpid(), allocUnfreed, allocUnfreed ); // always print the UNIX pid288 } // if289 } // prtUnfreed290 558 291 559 extern int cfa_main_returned; // from interpose.cfa 292 560 extern "C" { 561 void memory_startup( void ) { 562 if ( ! heapMasterBootFlag ) heapManagerCtor(); // sanity check 563 } // memory_startup 564 565 void memory_shutdown( void ) { 566 heapManagerDtor(); 567 } // memory_shutdown 568 293 569 void heapAppStart() { // called by __cfaabi_appready_startup 294 allocUnfreed = 0; 570 verify( heapManager ); 571 #ifdef __CFA_DEBUG__ 572 heapManager->allocUnfreed = 0; // clear prior allocation counts 573 #endif // __CFA_DEBUG__ 574 575 #ifdef __STATISTICS__ 576 HeapStatisticsCtor( heapManager->stats ); // clear prior statistic counters 577 #endif // __STATISTICS__ 295 578 } // heapAppStart 296 579 297 580 void heapAppStop() { // called by __cfaabi_appready_startdown 298 fclose( stdin ); fclose( stdout ); 299 if ( cfa_main_returned ) prtUnfreed(); // do not check unfreed storage if exit called 581 fclose( stdin ); fclose( stdout ); // free buffer storage 582 if ( ! cfa_main_returned ) return; // do not check unfreed storage if exit called 583 584 #ifdef __CFA_DEBUG__ 585 // allocUnfreed is set to 0 when a heap is created and it accumulates any unfreed storage during its multiple thread 586 // usages. At the end, add up each heap allocUnfreed value across all heaps to get the total unfreed storage. 587 int64_t allocUnfreed = 0; 588 for ( Heap * heap = heapMaster.heapManagersList; heap; heap = heap->nextHeapManager ) { 589 allocUnfreed += heap->allocUnfreed; 590 } // for 591 592 allocUnfreed -= malloc_unfreed(); // subtract any user specified unfreed storage 593 if ( allocUnfreed > 0 ) { 594 // DO NOT USE STREAMS AS THEY MAY BE UNAVAILABLE AT THIS POINT. 595 char helpText[512]; 596 __cfaabi_bits_print_buffer( STDERR_FILENO, helpText, sizeof(helpText), 597 "CFA warning (UNIX pid:%ld) : program terminating with %ju(0x%jx) bytes of storage allocated but not freed.\n" 598 "Possible cause is unfreed storage allocated by the program or system/library routines called from the program.\n", 599 (long int)getpid(), allocUnfreed, allocUnfreed ); // always print the UNIX pid 600 } // if 601 #endif // __CFA_DEBUG__ 300 602 } // heapAppStop 301 603 } // extern "C" 302 #endif // __CFA_DEBUG__303 604 304 605 305 606 #ifdef __STATISTICS__ 306 607 static HeapStatistics stats; // zero filled 307 static unsigned int sbrk_calls;308 static unsigned long long int sbrk_storage;309 // Statistics file descriptor (changed by malloc_stats_fd).310 static int stats_fd = STDERR_FILENO; // default stderr311 608 312 609 #define prtFmt \ … … 321 618 " realloc >0 calls %'u; 0 calls %'u; storage %'llu / %'llu bytes\n" \ 322 619 " free !null calls %'u; null calls %'u; storage %'llu / %'llu bytes\n" \ 323 " sbrk calls %'u; storage %'llu bytes\n" \ 324 " mmap calls %'u; storage %'llu / %'llu bytes\n" \ 325 " munmap calls %'u; storage %'llu / %'llu bytes\n" \ 620 " return pulls %'u; pushes %'u; storage %'llu / %'llu bytes\n" \ 621 " sbrk calls %'u; storage %'llu bytes\n" \ 622 " mmap calls %'u; storage %'llu / %'llu bytes\n" \ 623 " munmap calls %'u; storage %'llu / %'llu bytes\n" \ 624 " threads started %'lu; exited %'lu\n" \ 625 " heaps new %'lu; reused %'lu\n" 326 626 327 627 // Use "write" because streams may be shutdown when calls are made. 328 static int printStats( ) {// see malloc_stats628 static int printStats( HeapStatistics & stats ) with( heapMaster, stats ) { // see malloc_stats 329 629 char helpText[sizeof(prtFmt) + 1024]; // space for message and values 330 return __cfaabi_bits_print_buffer( STDERR_FILENO, helpText, sizeof(helpText), prtFmt, 331 stats.malloc_calls, stats.malloc_0_calls, stats.malloc_storage_request, stats.malloc_storage_alloc, 332 stats.aalloc_calls, stats.aalloc_0_calls, stats.aalloc_storage_request, stats.aalloc_storage_alloc, 333 stats.calloc_calls, stats.calloc_0_calls, stats.calloc_storage_request, stats.calloc_storage_alloc, 334 stats.memalign_calls, stats.memalign_0_calls, stats.memalign_storage_request, stats.memalign_storage_alloc, 335 stats.amemalign_calls, stats.amemalign_0_calls, stats.amemalign_storage_request, stats.amemalign_storage_alloc, 336 stats.cmemalign_calls, stats.cmemalign_0_calls, stats.cmemalign_storage_request, stats.cmemalign_storage_alloc, 337 stats.resize_calls, stats.resize_0_calls, stats.resize_storage_request, stats.resize_storage_alloc, 338 stats.realloc_calls, stats.realloc_0_calls, stats.realloc_storage_request, stats.realloc_storage_alloc, 339 stats.free_calls, stats.free_null_calls, stats.free_storage_request, stats.free_storage_alloc, 630 return __cfaabi_bits_print_buffer( stats_fd, helpText, sizeof(helpText), prtFmt, 631 malloc_calls, malloc_0_calls, malloc_storage_request, malloc_storage_alloc, 632 aalloc_calls, aalloc_0_calls, aalloc_storage_request, aalloc_storage_alloc, 633 calloc_calls, calloc_0_calls, calloc_storage_request, calloc_storage_alloc, 634 memalign_calls, memalign_0_calls, memalign_storage_request, memalign_storage_alloc, 635 amemalign_calls, amemalign_0_calls, amemalign_storage_request, amemalign_storage_alloc, 636 cmemalign_calls, cmemalign_0_calls, cmemalign_storage_request, cmemalign_storage_alloc, 637 resize_calls, resize_0_calls, resize_storage_request, resize_storage_alloc, 638 realloc_calls, realloc_0_calls, realloc_storage_request, realloc_storage_alloc, 639 free_calls, free_null_calls, free_storage_request, free_storage_alloc, 640 return_pulls, return_pushes, return_storage_request, return_storage_alloc, 340 641 sbrk_calls, sbrk_storage, 341 stats.mmap_calls, stats.mmap_storage_request, stats.mmap_storage_alloc, 342 stats.munmap_calls, stats.munmap_storage_request, stats.munmap_storage_alloc 642 mmap_calls, mmap_storage_request, mmap_storage_alloc, 643 munmap_calls, munmap_storage_request, munmap_storage_alloc, 644 threads_started, threads_exited, 645 new_heap, reused_heap 343 646 ); 344 647 } // printStats … … 358 661 "<total type=\"realloc\" >0 count=\"%'u;\" 0 count=\"%'u;\" size=\"%'llu / %'llu\"/> bytes\n" \ 359 662 "<total type=\"free\" !null=\"%'u;\" 0 null=\"%'u;\" size=\"%'llu / %'llu\"/> bytes\n" \ 663 "<total type=\"return\" pulls=\"%'u;\" 0 pushes=\"%'u;\" size=\"%'llu / %'llu\"/> bytes\n" \ 360 664 "<total type=\"sbrk\" count=\"%'u;\" size=\"%'llu\"/> bytes\n" \ 361 665 "<total type=\"mmap\" count=\"%'u;\" size=\"%'llu / %'llu\" / > bytes\n" \ 362 666 "<total type=\"munmap\" count=\"%'u;\" size=\"%'llu / %'llu\"/> bytes\n" \ 667 "<total type=\"threads\" started=\"%'lu;\" exited=\"%'lu\"/>\n" \ 668 "<total type=\"heaps\" new=\"%'lu;\" reused=\"%'lu\"/>\n" \ 363 669 "</malloc>" 364 670 365 static int printStatsXML( FILE * stream ) {// see malloc_info671 static int printStatsXML( HeapStatistics & stats, FILE * stream ) with( heapMaster, stats ) { // see malloc_info 366 672 char helpText[sizeof(prtFmtXML) + 1024]; // space for message and values 367 673 return __cfaabi_bits_print_buffer( fileno( stream ), helpText, sizeof(helpText), prtFmtXML, 368 stats.malloc_calls, stats.malloc_0_calls, stats.malloc_storage_request, stats.malloc_storage_alloc, 369 stats.aalloc_calls, stats.aalloc_0_calls, stats.aalloc_storage_request, stats.aalloc_storage_alloc, 370 stats.calloc_calls, stats.calloc_0_calls, stats.calloc_storage_request, stats.calloc_storage_alloc, 371 stats.memalign_calls, stats.memalign_0_calls, stats.memalign_storage_request, stats.memalign_storage_alloc, 372 stats.amemalign_calls, stats.amemalign_0_calls, stats.amemalign_storage_request, stats.amemalign_storage_alloc, 373 stats.cmemalign_calls, stats.cmemalign_0_calls, stats.cmemalign_storage_request, stats.cmemalign_storage_alloc, 374 stats.resize_calls, stats.resize_0_calls, stats.resize_storage_request, stats.resize_storage_alloc, 375 stats.realloc_calls, stats.realloc_0_calls, stats.realloc_storage_request, stats.realloc_storage_alloc, 376 stats.free_calls, stats.free_null_calls, stats.free_storage_request, stats.free_storage_alloc, 674 malloc_calls, malloc_0_calls, malloc_storage_request, malloc_storage_alloc, 675 aalloc_calls, aalloc_0_calls, aalloc_storage_request, aalloc_storage_alloc, 676 calloc_calls, calloc_0_calls, calloc_storage_request, calloc_storage_alloc, 677 memalign_calls, memalign_0_calls, memalign_storage_request, memalign_storage_alloc, 678 amemalign_calls, amemalign_0_calls, amemalign_storage_request, amemalign_storage_alloc, 679 cmemalign_calls, cmemalign_0_calls, cmemalign_storage_request, cmemalign_storage_alloc, 680 resize_calls, resize_0_calls, resize_storage_request, resize_storage_alloc, 681 realloc_calls, realloc_0_calls, realloc_storage_request, realloc_storage_alloc, 682 free_calls, free_null_calls, free_storage_request, free_storage_alloc, 683 return_pulls, return_pushes, return_storage_request, return_storage_alloc, 377 684 sbrk_calls, sbrk_storage, 378 stats.mmap_calls, stats.mmap_storage_request, stats.mmap_storage_alloc, 379 stats.munmap_calls, stats.munmap_storage_request, stats.munmap_storage_alloc 685 mmap_calls, mmap_storage_request, mmap_storage_alloc, 686 munmap_calls, munmap_storage_request, munmap_storage_alloc, 687 threads_started, threads_exited, 688 new_heap, reused_heap 380 689 ); 381 690 } // printStatsXML 691 692 static HeapStatistics & collectStats( HeapStatistics & stats ) with( heapMaster ) { 693 lock( mgrLock ); 694 695 stats += heapMaster.stats; 696 for ( Heap * heap = heapManagersList; heap; heap = heap->nextHeapManager ) { 697 stats += heap->stats; 698 } // for 699 700 unlock( mgrLock ); 701 return stats; 702 } // collectStats 382 703 #endif // __STATISTICS__ 383 704 384 705 385 // statically allocated variables => zero filled. 386 static size_t heapExpand; // sbrk advance 387 static size_t mmapStart; // cross over point for mmap 388 static unsigned int maxBucketsUsed; // maximum number of buckets in use 389 // extern visibility, used by runtime kernel 390 // would be cool to remove libcfa_public but it's needed for libcfathread 391 libcfa_public size_t __page_size; // architecture pagesize 392 libcfa_public int __map_prot; // common mmap/mprotect protection 393 394 395 // thunk problem 396 size_t Bsearchl( unsigned int key, const unsigned int * vals, size_t dim ) { 397 size_t l = 0, m, h = dim; 398 while ( l < h ) { 399 m = (l + h) / 2; 400 if ( (unsigned int &)(vals[m]) < key ) { // cast away const 401 l = m + 1; 402 } else { 403 h = m; 404 } // if 405 } // while 406 return l; 407 } // Bsearchl 408 409 410 static inline bool setMmapStart( size_t value ) { // true => mmapped, false => sbrk 706 static bool setMmapStart( size_t value ) with( heapMaster ) { // true => mmapped, false => sbrk 411 707 if ( value < __page_size || bucketSizes[NoBucketSizes - 1] < value ) return false; 412 708 mmapStart = value; // set global 413 709 414 710 // find the closest bucket size less than or equal to the mmapStart size 415 maxBucketsUsed = Bsearchl( (unsigned int)mmapStart, bucketSizes, NoBucketSizes ); // binary search416 assert( maxBucketsUsed < NoBucketSizes ); // subscript failure ?417 assert( mmapStart <= bucketSizes[maxBucketsUsed] ); // search failure ?711 maxBucketsUsed = Bsearchl( mmapStart, bucketSizes, NoBucketSizes ); // binary search 712 verify( maxBucketsUsed < NoBucketSizes ); // subscript failure ? 713 verify( mmapStart <= bucketSizes[maxBucketsUsed] ); // search failure ? 418 714 return true; 419 715 } // setMmapStart … … 438 734 439 735 440 static inline void checkAlign( size_t alignment ) { 736 inline __attribute__((always_inline)) 737 static void checkAlign( size_t alignment ) { 441 738 if ( unlikely( alignment < libAlign() || ! is_pow2( alignment ) ) ) { 442 739 abort( "**** Error **** alignment %zu for memory allocation is less than %d and/or not a power of 2.", alignment, libAlign() ); … … 445 742 446 743 447 static inline void checkHeader( bool check, const char name[], void * addr ) { 744 inline __attribute__((always_inline)) 745 static void checkHeader( bool check, const char name[], void * addr ) { 448 746 if ( unlikely( check ) ) { // bad address ? 449 747 abort( "**** Error **** attempt to %s storage %p with address outside the heap.\n" … … 470 768 471 769 472 static inline void fakeHeader( Heap.Storage.Header *& header, size_t & alignment ) { 770 inline __attribute__((always_inline)) 771 static void fakeHeader( Heap.Storage.Header *& header, size_t & alignment ) { 473 772 if ( unlikely( AlignmentBit( header ) ) ) { // fake header ? 474 773 alignment = ClearAlignmentBit( header ); // clear flag from value … … 483 782 484 783 485 static inline bool headers( const char name[] __attribute__(( unused )), void * addr, Heap.Storage.Header *& header, 486 Heap.FreeHeader *& freeHead, size_t & size, size_t & alignment ) with( heapManager ) { 784 inline __attribute__((always_inline)) 785 static bool headers( const char name[] __attribute__(( unused )), void * addr, Heap.Storage.Header *& header, 786 Heap.FreeHeader *& freeHead, size_t & size, size_t & alignment ) with( heapMaster, *heapManager ) { 487 787 header = HeaderAddr( addr ); 488 788 … … 509 809 checkHeader( header < (Heap.Storage.Header *)heapBegin || (Heap.Storage.Header *)heapEnd < header, name, addr ); // bad address ? (offset could be + or -) 510 810 811 Heap * homeManager; 511 812 if ( unlikely( freeHead == 0p || // freed and only free-list node => null link 512 813 // freed and link points at another free block not to a bucket in the bucket array. 513 freeHead < &freeLists[0] || &freeLists[NoBucketSizes] <= freeHead ) ) { 814 (homeManager = freeHead->homeManager, freeHead < &homeManager->freeLists[0] || 815 &homeManager->freeLists[NoBucketSizes] <= freeHead ) ) ) { 514 816 abort( "**** Error **** attempt to %s storage %p with corrupted header.\n" 515 817 "Possible cause is duplicate free on same block or overwriting of header information.", … … 521 823 } // headers 522 824 523 // #ifdef __CFA_DEBUG__ 524 // #if __SIZEOF_POINTER__ == 4 525 // #define MASK 0xdeadbeef 526 // #else 527 // #define MASK 0xdeadbeefdeadbeef 528 // #endif 529 // #define STRIDE size_t 530 531 // static void * Memset( void * addr, STRIDE size ) { // debug only 532 // if ( size % sizeof(STRIDE) != 0 ) abort( "Memset() : internal error, size %zd not multiple of %zd.", size, sizeof(STRIDE) ); 533 // if ( (STRIDE)addr % sizeof(STRIDE) != 0 ) abort( "Memset() : internal error, addr %p not multiple of %zd.", addr, sizeof(STRIDE) ); 534 535 // STRIDE * end = (STRIDE *)addr + size / sizeof(STRIDE); 536 // for ( STRIDE * p = (STRIDE *)addr; p < end; p += 1 ) *p = MASK; 537 // return addr; 538 // } // Memset 539 // #endif // __CFA_DEBUG__ 540 541 542 #define NO_MEMORY_MSG "insufficient heap memory available for allocating %zd new bytes." 543 544 static inline void * extend( size_t size ) with( heapManager ) { 545 lock( extlock __cfaabi_dbg_ctx2 ); 825 826 static void * master_extend( size_t size ) with( heapMaster ) { 827 lock( extLock ); 546 828 547 829 ptrdiff_t rem = heapRemaining - size; … … 549 831 // If the size requested is bigger than the current remaining storage, increase the size of the heap. 550 832 551 size_t increase = ceiling2( size > heapExpand ? size : heapExpand, __page_size);833 size_t increase = ceiling2( size > heapExpand ? size : heapExpand, libAlign() ); 552 834 // Do not call abort or strerror( errno ) as they may call malloc. 553 if ( sbrk( increase ) == (void *)-1 ) { // failed, no memory ? 554 unlock( extlock ); 555 __cfaabi_bits_print_nolock( STDERR_FILENO, NO_MEMORY_MSG, size ); 556 _exit( EXIT_FAILURE ); // give up 835 if ( unlikely( sbrk( increase ) == (void *)-1 ) ) { // failed, no memory ? 836 unlock( extLock ); 837 abort( NO_MEMORY_MSG, size ); // no memory 557 838 } // if 558 839 559 840 // Make storage executable for thunks. 560 841 if ( mprotect( (char *)heapEnd + heapRemaining, increase, __map_prot ) ) { 561 unlock( extlock ); 562 __cfaabi_bits_print_nolock( STDERR_FILENO, "extend() : internal error, mprotect failure, heapEnd:%p size:%zd, errno:%d.\n", heapEnd, increase, errno ); 563 _exit( EXIT_FAILURE ); 564 } // if 842 unlock( extLock ); 843 abort( "**** Error **** attempt to make heap storage executable for thunks and mprotect failed with errno %d.", errno ); 844 } // if 845 846 rem = heapRemaining + increase - size; 565 847 566 848 #ifdef __STATISTICS__ … … 568 850 sbrk_storage += increase; 569 851 #endif // __STATISTICS__ 570 571 #ifdef __CFA_DEBUG__572 // Set new memory to garbage so subsequent uninitialized usages might fail.573 memset( (char *)heapEnd + heapRemaining, '\xde', increase );574 //Memset( (char *)heapEnd + heapRemaining, increase );575 #endif // __CFA_DEBUG__576 577 rem = heapRemaining + increase - size;578 852 } // if 579 853 … … 581 855 heapRemaining = rem; 582 856 heapEnd = (char *)heapEnd + size; 583 unlock( extlock ); 857 858 unlock( extLock ); 584 859 return block; 585 } // extend 586 587 588 static inline void * doMalloc( size_t size ) with( heapManager ) { 589 Heap.Storage * block; // pointer to new block of storage 860 } // master_extend 861 862 863 __attribute__(( noinline )) 864 static void * manager_extend( size_t size ) with( *heapManager ) { 865 ptrdiff_t rem = heapReserve - size; 866 867 if ( unlikely( rem < 0 ) ) { // negative 868 // If the size requested is bigger than the current remaining reserve, use the current reserve to populate 869 // smaller freeLists, and increase the reserve. 870 871 rem = heapReserve; // positive 872 873 if ( rem >= bucketSizes[0] ) { // minimal size ? otherwise ignore 874 size_t bucket; 875 #ifdef FASTLOOKUP 876 if ( likely( rem < LookupSizes ) ) bucket = lookup[rem]; 877 #endif // FASTLOOKUP 878 bucket = Bsearchl( rem, bucketSizes, heapMaster.maxBucketsUsed ); 879 verify( 0 <= bucket && bucket <= heapMaster.maxBucketsUsed ); 880 Heap.FreeHeader * freeHead = &(freeLists[bucket]); 881 882 // The remaining storage many not be bucket size, whereas all other allocations are. Round down to previous 883 // bucket size in this case. 884 if ( unlikely( freeHead->blockSize > (size_t)rem ) ) freeHead -= 1; 885 Heap.Storage * block = (Heap.Storage *)heapBuffer; 886 887 block->header.kind.real.next = freeHead->freeList; // push on stack 888 freeHead->freeList = block; 889 } // if 890 891 size_t increase = ceiling( size > ( heapMaster.heapExpand / 10 ) ? size : ( heapMaster.heapExpand / 10 ), libAlign() ); 892 heapBuffer = master_extend( increase ); 893 rem = increase - size; 894 } // if 895 896 Heap.Storage * block = (Heap.Storage *)heapBuffer; 897 heapReserve = rem; 898 heapBuffer = (char *)heapBuffer + size; 899 900 return block; 901 } // manager_extend 902 903 904 #define BOOT_HEAP_MANAGER \ 905 if ( unlikely( ! heapMasterBootFlag ) ) { \ 906 heapManagerCtor(); /* trigger for first heap */ \ 907 } /* if */ 908 909 #ifdef __STATISTICS__ 910 #define STAT_NAME __counter 911 #define STAT_PARM , unsigned int STAT_NAME 912 #define STAT_ARG( name ) , name 913 #define STAT_0_CNT( counter ) stats.counters[counter].calls_0 += 1 914 #else 915 #define STAT_NAME 916 #define STAT_PARM 917 #define STAT_ARG( name ) 918 #define STAT_0_CNT( counter ) 919 #endif // __STATISTICS__ 920 921 #define PROLOG( counter, ... ) \ 922 BOOT_HEAP_MANAGER; \ 923 if ( unlikely( size == 0 ) || /* 0 BYTE ALLOCATION RETURNS NULL POINTER */ \ 924 unlikely( size > ULONG_MAX - sizeof(Heap.Storage) ) ) { /* error check */ \ 925 STAT_0_CNT( counter ); \ 926 __VA_ARGS__; \ 927 return 0p; \ 928 } /* if */ 929 930 931 #define SCRUB_SIZE 1024lu 932 // Do not use '\xfe' for scrubbing because dereferencing an address composed of it causes a SIGSEGV *without* a valid IP 933 // pointer in the interrupt frame. 934 #define SCRUB '\xff' 935 936 static void * doMalloc( size_t size STAT_PARM ) libcfa_nopreempt with( *heapManager ) { 937 PROLOG( STAT_NAME ); 938 939 verify( heapManager ); 940 Heap.Storage * block; // pointer to new block of storage 590 941 591 942 // Look up size in the size list. Make sure the user request includes space for the header that must be allocated 592 943 // along with the block and is a multiple of the alignment size. 593 594 944 size_t tsize = size + sizeof(Heap.Storage); 595 945 596 if ( likely( tsize < mmapStart ) ) { // small size => sbrk 597 size_t posn; 946 #ifdef __STATISTICS__ 947 stats.counters[STAT_NAME].calls += 1; 948 stats.counters[STAT_NAME].request += size; 949 #endif // __STATISTICS__ 950 951 #ifdef __CFA_DEBUG__ 952 allocUnfreed += size; 953 #endif // __CFA_DEBUG__ 954 955 if ( likely( tsize < heapMaster.mmapStart ) ) { // small size => sbrk 956 size_t bucket; 598 957 #ifdef FASTLOOKUP 599 if ( tsize < LookupSizes ) posn= lookup[tsize];958 if ( likely( tsize < LookupSizes ) ) bucket = lookup[tsize]; 600 959 else 601 960 #endif // FASTLOOKUP 602 posn = Bsearchl( (unsigned int)tsize, bucketSizes, (size_t)maxBucketsUsed ); 603 Heap.FreeHeader * freeElem = &freeLists[posn]; 604 verify( freeElem <= &freeLists[maxBucketsUsed] ); // subscripting error ? 605 verify( tsize <= freeElem->blockSize ); // search failure ? 606 tsize = freeElem->blockSize; // total space needed for request 961 bucket = Bsearchl( tsize, bucketSizes, heapMaster.maxBucketsUsed ); 962 verify( 0 <= bucket && bucket <= heapMaster.maxBucketsUsed ); 963 Heap.FreeHeader * freeHead = &freeLists[bucket]; 964 965 verify( freeHead <= &freeLists[heapMaster.maxBucketsUsed] ); // subscripting error ? 966 verify( tsize <= freeHead->blockSize ); // search failure ? 967 968 tsize = freeHead->blockSize; // total space needed for request 969 #ifdef __STATISTICS__ 970 stats.counters[STAT_NAME].alloc += tsize; 971 #endif // __STATISTICS__ 607 972 608 973 // Spin until the lock is acquired for this particular size of block. 609 974 610 975 #if BUCKETLOCK == SPINLOCK 611 lock( freeElem->lock __cfaabi_dbg_ctx2 ); 612 block = freeElem->freeList; // remove node from stack 976 block = freeHead->freeList; // remove node from stack 613 977 #else 614 block = pop( free Elem->freeList );978 block = pop( freeHead->freeList ); 615 979 #endif // BUCKETLOCK 616 980 if ( unlikely( block == 0p ) ) { // no free block ? 981 #ifdef OWNERSHIP 982 // Freelist for that size is empty, so carve it out of the heap, if there is enough left, or get some more 983 // and then carve it off. 984 #ifdef RETURNSPIN 617 985 #if BUCKETLOCK == SPINLOCK 618 unlock( freeElem->lock ); 986 lock( freeHead->returnLock ); 987 block = freeHead->returnList; 988 freeHead->returnList = 0p; 989 unlock( freeHead->returnLock ); 990 #else 991 block = __atomic_exchange_n( &freeHead->returnList, nullptr, __ATOMIC_SEQ_CST ); 992 #endif // RETURNSPIN 993 994 if ( likely( block == 0p ) ) { // return list also empty? 995 #endif // OWNERSHIP 996 // Do not leave kernel thread as manager_extend accesses heapManager. 997 disable_interrupts(); 998 block = (Heap.Storage *)manager_extend( tsize ); // mutual exclusion on call 999 enable_interrupts( false ); 1000 1001 // OK TO BE PREEMPTED HERE AS heapManager IS NO LONGER ACCESSED. 1002 1003 #ifdef __CFA_DEBUG__ 1004 // Scrub new memory so subsequent uninitialized usages might fail. Only scrub the first 1024 bytes. 1005 memset( block->data, SCRUB, min( SCRUB_SIZE, tsize - sizeof(Heap.Storage) ) ); 1006 #endif // __CFA_DEBUG__ 619 1007 #endif // BUCKETLOCK 620 621 // Freelist for that size was empty, so carve it out of the heap if there's enough left, or get some more 622 // and then carve it off. 623 624 block = (Heap.Storage *)extend( tsize ); // mutual exclusion on call 625 #if BUCKETLOCK == SPINLOCK 1008 #ifdef OWNERSHIP 1009 } else { // merge returnList into freeHead 1010 #ifdef __STATISTICS__ 1011 stats.return_pulls += 1; 1012 #endif // __STATISTICS__ 1013 1014 // OK TO BE PREEMPTED HERE AS heapManager IS NO LONGER ACCESSED. 1015 1016 freeHead->freeList = block->header.kind.real.next; 1017 } // if 1018 #endif // OWNERSHIP 626 1019 } else { 627 freeElem->freeList = block->header.kind.real.next; 628 unlock( freeElem->lock ); 629 #endif // BUCKETLOCK 630 } // if 631 632 block->header.kind.real.home = freeElem; // pointer back to free list of apropriate size 1020 // Memory is scrubbed in doFree. 1021 freeHead->freeList = block->header.kind.real.next; 1022 } // if 1023 1024 block->header.kind.real.home = freeHead; // pointer back to free list of apropriate size 633 1025 } else { // large size => mmap 634 1026 if ( unlikely( size > ULONG_MAX - __page_size ) ) return 0p; 635 1027 tsize = ceiling2( tsize, __page_size ); // must be multiple of page size 636 1028 #ifdef __STATISTICS__ 637 __atomic_add_fetch( &stats.mmap_calls, 1, __ATOMIC_SEQ_CST ); 638 __atomic_add_fetch( &stats.mmap_storage_request, size, __ATOMIC_SEQ_CST ); 639 __atomic_add_fetch( &stats.mmap_storage_alloc, tsize, __ATOMIC_SEQ_CST ); 1029 stats.counters[STAT_NAME].alloc += tsize; 1030 stats.mmap_calls += 1; 1031 stats.mmap_storage_request += size; 1032 stats.mmap_storage_alloc += tsize; 640 1033 #endif // __STATISTICS__ 641 1034 642 block = (Heap.Storage *)mmap( 0, tsize, __map_prot, MAP_PRIVATE | MAP_ANONYMOUS, mmapFd, 0 ); 643 if ( block == (Heap.Storage *)MAP_FAILED ) { // failed ? 1035 disable_interrupts(); 1036 block = (Heap.Storage *)mmap( 0, tsize, __map_prot, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0 ); 1037 enable_interrupts( false ); 1038 1039 // OK TO BE PREEMPTED HERE AS heapManager IS NO LONGER ACCESSED. 1040 1041 if ( unlikely( block == (Heap.Storage *)MAP_FAILED ) ) { // failed ? 644 1042 if ( errno == ENOMEM ) abort( NO_MEMORY_MSG, tsize ); // no memory 645 1043 // Do not call strerror( errno ) as it may call malloc. 646 abort( "(Heap &)0x%p.doMalloc() : internal error, mmap failure, size:%zu errno:%d.", &heapManager, tsize, errno ); 647 } //if 1044 abort( "**** Error **** attempt to allocate large object (> %zu) of size %zu bytes and mmap failed with errno %d.", size, heapMaster.mmapStart, errno ); 1045 } // if 1046 block->header.kind.real.blockSize = MarkMmappedBit( tsize ); // storage size for munmap 1047 648 1048 #ifdef __CFA_DEBUG__ 649 // S et new memory to garbage so subsequent uninitialized usages might fail.650 memset( block, '\xde', tsize );651 //Memset( block, tsize);1049 // Scrub new memory so subsequent uninitialized usages might fail. Only scrub the first 1024 bytes. The rest of 1050 // the storage set to 0 by mmap. 1051 memset( block->data, SCRUB, min( SCRUB_SIZE, tsize - sizeof(Heap.Storage) ) ); 652 1052 #endif // __CFA_DEBUG__ 653 block->header.kind.real.blockSize = MarkMmappedBit( tsize ); // storage size for munmap654 1053 } // if 655 1054 … … 659 1058 660 1059 #ifdef __CFA_DEBUG__ 661 __atomic_add_fetch( &allocUnfreed, tsize, __ATOMIC_SEQ_CST );662 1060 if ( traceHeap() ) { 663 1061 char helpText[64]; … … 667 1065 #endif // __CFA_DEBUG__ 668 1066 1067 // poll_interrupts(); // call rollforward 1068 669 1069 return addr; 670 1070 } // doMalloc 671 1071 672 1072 673 static inline void doFree( void * addr ) with( heapManager ) { 1073 static void doFree( void * addr ) libcfa_nopreempt with( *heapManager ) { 1074 verify( addr ); 1075 1076 // detect free after thread-local storage destruction and use global stats in that case 1077 1078 Heap.Storage.Header * header; 1079 Heap.FreeHeader * freeHead; 1080 size_t size, alignment; 1081 1082 bool mapped = headers( "free", addr, header, freeHead, size, alignment ); 1083 #if defined( __STATISTICS__ ) || defined( __CFA_DEBUG__ ) 1084 size_t rsize = header->kind.real.size; // optimization 1085 #endif // __STATISTICS__ || __CFA_DEBUG__ 1086 1087 #ifdef __STATISTICS__ 1088 stats.free_storage_request += rsize; 1089 stats.free_storage_alloc += size; 1090 #endif // __STATISTICS__ 1091 674 1092 #ifdef __CFA_DEBUG__ 675 if ( unlikely( heapManager.heapBegin == 0p ) ) { 676 abort( "doFree( %p ) : internal error, called before heap is initialized.", addr ); 677 } // if 1093 allocUnfreed -= rsize; 678 1094 #endif // __CFA_DEBUG__ 679 1095 680 Heap.Storage.Header * header; 681 Heap.FreeHeader * freeElem; 682 size_t size, alignment; // not used (see realloc) 683 684 if ( headers( "free", addr, header, freeElem, size, alignment ) ) { // mmapped ? 1096 if ( unlikely( mapped ) ) { // mmapped ? 685 1097 #ifdef __STATISTICS__ 686 __atomic_add_fetch( &stats.munmap_calls, 1, __ATOMIC_SEQ_CST );687 __atomic_add_fetch( &stats.munmap_storage_request, header->kind.real.size, __ATOMIC_SEQ_CST );688 __atomic_add_fetch( &stats.munmap_storage_alloc, size, __ATOMIC_SEQ_CST );1098 stats.munmap_calls += 1; 1099 stats.munmap_storage_request += rsize; 1100 stats.munmap_storage_alloc += size; 689 1101 #endif // __STATISTICS__ 690 if ( munmap( header, size ) == -1 ) { 691 abort( "Attempt to deallocate storage %p not allocated or with corrupt header.\n" 692 "Possible cause is invalid pointer.", 693 addr ); 1102 1103 // OK TO BE PREEMPTED HERE AS heapManager IS NO LONGER ACCESSED. 1104 1105 // Does not matter where this storage is freed. 1106 if ( unlikely( munmap( header, size ) == -1 ) ) { 1107 // Do not call strerror( errno ) as it may call malloc. 1108 abort( "**** Error **** attempt to deallocate large object %p and munmap failed with errno %d.\n" 1109 "Possible cause is invalid delete pointer: either not allocated or with corrupt header.", 1110 addr, errno ); 694 1111 } // if 695 1112 } else { 696 1113 #ifdef __CFA_DEBUG__ 697 // Set free memory to garbage so subsequent usages might fail. 698 memset( ((Heap.Storage *)header)->data, '\xde', freeElem->blockSize - sizeof( Heap.Storage ) ); 699 //Memset( ((Heap.Storage *)header)->data, freeElem->blockSize - sizeof( Heap.Storage ) ); 1114 // memset is NOT always inlined! 1115 disable_interrupts(); 1116 // Scrub old memory so subsequent usages might fail. Only scrub the first/last SCRUB_SIZE bytes. 1117 char * data = ((Heap.Storage *)header)->data; // data address 1118 size_t dsize = size - sizeof(Heap.Storage); // data size 1119 if ( dsize <= SCRUB_SIZE * 2 ) { 1120 memset( data, SCRUB, dsize ); // scrub all 1121 } else { 1122 memset( data, SCRUB, SCRUB_SIZE ); // scrub front 1123 memset( data + dsize - SCRUB_SIZE, SCRUB, SCRUB_SIZE ); // scrub back 1124 } // if 1125 enable_interrupts( false ); 700 1126 #endif // __CFA_DEBUG__ 701 1127 702 #ifdef __STATISTICS__ 703 __atomic_add_fetch( &stats.free_calls, 1, __ATOMIC_SEQ_CST ); 704 __atomic_add_fetch( &stats.free_storage_request, header->kind.real.size, __ATOMIC_SEQ_CST ); 705 __atomic_add_fetch( &stats.free_storage_alloc, size, __ATOMIC_SEQ_CST ); 706 #endif // __STATISTICS__ 707 708 #if BUCKETLOCK == SPINLOCK 709 lock( freeElem->lock __cfaabi_dbg_ctx2 ); // acquire spin lock 710 header->kind.real.next = freeElem->freeList; // push on stack 711 freeElem->freeList = (Heap.Storage *)header; 712 unlock( freeElem->lock ); // release spin lock 713 #else 714 push( freeElem->freeList, *(Heap.Storage *)header ); 715 #endif // BUCKETLOCK 1128 if ( likely( heapManager == freeHead->homeManager ) ) { // belongs to this thread 1129 header->kind.real.next = freeHead->freeList; // push on stack 1130 freeHead->freeList = (Heap.Storage *)header; 1131 } else { // return to thread owner 1132 verify( heapManager ); 1133 1134 #ifdef OWNERSHIP 1135 #ifdef RETURNSPIN 1136 lock( freeHead->returnLock ); 1137 header->kind.real.next = freeHead->returnList; // push to bucket return list 1138 freeHead->returnList = (Heap.Storage *)header; 1139 unlock( freeHead->returnLock ); 1140 #else // lock free 1141 header->kind.real.next = freeHead->returnList; // link new node to top node 1142 // CAS resets header->kind.real.next = freeHead->returnList on failure 1143 while ( ! __atomic_compare_exchange_n( &freeHead->returnList, &header->kind.real.next, header, 1144 false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST ) ); 1145 #endif // RETURNSPIN 1146 1147 #else // no OWNERSHIP 1148 1149 freeHead = &heap->freeLists[ClearStickyBits( header->kind.real.home ) - &freeHead->homeManager->freeLists[0]]; 1150 header->kind.real.next = freeHead->freeList; // push on stack 1151 freeHead->freeList = (Heap.Storage *)header; 1152 #endif // ! OWNERSHIP 1153 1154 #ifdef __U_STATISTICS__ 1155 stats.return_pushes += 1; 1156 stats.return_storage_request += rsize; 1157 stats.return_storage_alloc += size; 1158 #endif // __U_STATISTICS__ 1159 1160 // OK TO BE PREEMPTED HERE AS heapManager IS NO LONGER ACCESSED. 1161 } // if 716 1162 } // if 717 1163 718 1164 #ifdef __CFA_DEBUG__ 719 __atomic_add_fetch( &allocUnfreed, -size, __ATOMIC_SEQ_CST );720 1165 if ( traceHeap() ) { 721 1166 char helpText[64]; … … 724 1169 } // if 725 1170 #endif // __CFA_DEBUG__ 1171 1172 // poll_interrupts(); // call rollforward 726 1173 } // doFree 727 1174 728 1175 729 s tatic size_t prtFree( Heap & manager ) with( manager ) {1176 size_t prtFree( Heap & manager ) with( manager ) { 730 1177 size_t total = 0; 731 1178 #ifdef __STATISTICS__ … … 733 1180 __cfaabi_bits_print_nolock( STDERR_FILENO, "\nBin lists (bin size : free blocks on list)\n" ); 734 1181 #endif // __STATISTICS__ 735 for ( unsigned int i = 0; i < maxBucketsUsed; i += 1 ) {1182 for ( unsigned int i = 0; i < heapMaster.maxBucketsUsed; i += 1 ) { 736 1183 size_t size = freeLists[i].blockSize; 737 1184 #ifdef __STATISTICS__ … … 764 1211 __cfaabi_bits_release(); 765 1212 #endif // __STATISTICS__ 766 return (char *)heap End - (char *)heapBegin - total;1213 return (char *)heapMaster.heapEnd - (char *)heapMaster.heapBegin - total; 767 1214 } // prtFree 768 1215 769 1216 770 static void ?{}( Heap & manager ) with( manager ) { 771 __page_size = sysconf( _SC_PAGESIZE ); 772 __map_prot = PROT_READ | PROT_WRITE | PROT_EXEC; 773 774 for ( unsigned int i = 0; i < NoBucketSizes; i += 1 ) { // initialize the free lists 775 freeLists[i].blockSize = bucketSizes[i]; 776 } // for 777 778 #ifdef FASTLOOKUP 779 unsigned int idx = 0; 780 for ( unsigned int i = 0; i < LookupSizes; i += 1 ) { 781 if ( i > bucketSizes[idx] ) idx += 1; 782 lookup[i] = idx; 783 } // for 784 #endif // FASTLOOKUP 785 786 if ( ! setMmapStart( malloc_mmap_start() ) ) { 787 abort( "Heap : internal error, mmap start initialization failure." ); 788 } // if 789 heapExpand = malloc_expansion(); 790 791 char * end = (char *)sbrk( 0 ); 792 heapBegin = heapEnd = sbrk( (char *)ceiling2( (long unsigned int)end, __page_size ) - end ); // move start of heap to multiple of alignment 793 } // Heap 794 795 796 static void ^?{}( Heap & ) { 797 #ifdef __STATISTICS__ 798 if ( traceHeapTerm() ) { 799 printStats(); 800 // prtUnfreed() called in heapAppStop() 801 } // if 802 #endif // __STATISTICS__ 803 } // ~Heap 804 805 806 static void memory_startup( void ) __attribute__(( constructor( STARTUP_PRIORITY_MEMORY ) )); 807 void memory_startup( void ) { 808 #ifdef __CFA_DEBUG__ 809 if ( heapBoot ) { // check for recursion during system boot 810 abort( "boot() : internal error, recursively invoked during system boot." ); 811 } // if 812 heapBoot = true; 813 #endif // __CFA_DEBUG__ 814 815 //verify( heapManager.heapBegin != 0 ); 816 //heapManager{}; 817 if ( heapManager.heapBegin == 0p ) heapManager{}; // sanity check 818 } // memory_startup 819 820 static void memory_shutdown( void ) __attribute__(( destructor( STARTUP_PRIORITY_MEMORY ) )); 821 void memory_shutdown( void ) { 822 ^heapManager{}; 823 } // memory_shutdown 824 825 826 static inline void * mallocNoStats( size_t size ) { // necessary for malloc statistics 827 verify( heapManager.heapBegin != 0p ); // called before memory_startup ? 828 if ( unlikely( size ) == 0 ) return 0p; // 0 BYTE ALLOCATION RETURNS NULL POINTER 829 830 #if __SIZEOF_POINTER__ == 8 831 verify( size < ((typeof(size_t))1 << 48) ); 832 #endif // __SIZEOF_POINTER__ == 8 833 return doMalloc( size ); 834 } // mallocNoStats 835 836 837 static inline void * memalignNoStats( size_t alignment, size_t size ) { 838 if ( unlikely( size ) == 0 ) return 0p; // 0 BYTE ALLOCATION RETURNS NULL POINTER 839 840 #ifdef __CFA_DEBUG__ 1217 #ifdef __STATISTICS__ 1218 static void incCalls( intptr_t statName ) libcfa_nopreempt { 1219 heapManager->stats.counters[statName].calls += 1; 1220 } // incCalls 1221 1222 static void incZeroCalls( intptr_t statName ) libcfa_nopreempt { 1223 heapManager->stats.counters[statName].calls_0 += 1; 1224 } // incZeroCalls 1225 #endif // __STATISTICS__ 1226 1227 #ifdef __CFA_DEBUG__ 1228 static void incUnfreed( intptr_t offset ) libcfa_nopreempt { 1229 heapManager->allocUnfreed += offset; 1230 } // incUnfreed 1231 #endif // __CFA_DEBUG__ 1232 1233 1234 static void * memalignNoStats( size_t alignment, size_t size STAT_PARM ) { 841 1235 checkAlign( alignment ); // check alignment 842 #endif // __CFA_DEBUG__ 843 844 // if alignment <= default alignment, do normal malloc as two headers are unnecessary 845 if ( unlikely( alignment <= libAlign() ) ) return mallocNoStats( size ); 1236 1237 // if alignment <= default alignment or size == 0, do normal malloc as two headers are unnecessary 1238 if ( unlikely( alignment <= libAlign() || size == 0 ) ) return doMalloc( size STAT_ARG( STAT_NAME ) ); 846 1239 847 1240 // Allocate enough storage to guarantee an address on the alignment boundary, and sufficient space before it for … … 854 1247 // subtract libAlign() because it is already the minimum alignment 855 1248 // add sizeof(Storage) for fake header 856 char * addr = (char *)mallocNoStats( size + alignment - libAlign() + sizeof(Heap.Storage) ); 1249 size_t offset = alignment - libAlign() + sizeof(Heap.Storage); 1250 char * addr = (char *)doMalloc( size + offset STAT_ARG( STAT_NAME ) ); 857 1251 858 1252 // address in the block of the "next" alignment address … … 860 1254 861 1255 // address of header from malloc 862 Heap.Storage.Header * RealHeader = HeaderAddr( addr ); 863 RealHeader->kind.real.size = size; // correct size to eliminate above alignment offset 864 // address of fake header * before* the alignment location 1256 Heap.Storage.Header * realHeader = HeaderAddr( addr ); 1257 realHeader->kind.real.size = size; // correct size to eliminate above alignment offset 1258 #ifdef __CFA_DEBUG__ 1259 incUnfreed( -offset ); // adjustment off the offset from call to doMalloc 1260 #endif // __CFA_DEBUG__ 1261 1262 // address of fake header *before* the alignment location 865 1263 Heap.Storage.Header * fakeHeader = HeaderAddr( user ); 1264 866 1265 // SKULLDUGGERY: insert the offset to the start of the actual storage block and remember alignment 867 fakeHeader->kind.fake.offset = (char *)fakeHeader - (char *) RealHeader;1266 fakeHeader->kind.fake.offset = (char *)fakeHeader - (char *)realHeader; 868 1267 // SKULLDUGGERY: odd alignment implies fake header 869 1268 fakeHeader->kind.fake.alignment = MarkAlignmentBit( alignment ); … … 880 1279 // then malloc() returns a unique pointer value that can later be successfully passed to free(). 881 1280 void * malloc( size_t size ) libcfa_public { 882 #ifdef __STATISTICS__ 883 if ( likely( size > 0 ) ) { 884 __atomic_add_fetch( &stats.malloc_calls, 1, __ATOMIC_SEQ_CST ); 885 __atomic_add_fetch( &stats.malloc_storage_request, size, __ATOMIC_SEQ_CST ); 886 } else { 887 __atomic_add_fetch( &stats.malloc_0_calls, 1, __ATOMIC_SEQ_CST ); 888 } // if 889 #endif // __STATISTICS__ 890 891 return mallocNoStats( size ); 1281 return doMalloc( size STAT_ARG( MALLOC ) ); 892 1282 } // malloc 893 1283 … … 895 1285 // Same as malloc() except size bytes is an array of dim elements each of elemSize bytes. 896 1286 void * aalloc( size_t dim, size_t elemSize ) libcfa_public { 897 size_t size = dim * elemSize; 898 #ifdef __STATISTICS__ 899 if ( likely( size > 0 ) ) { 900 __atomic_add_fetch( &stats.aalloc_calls, 1, __ATOMIC_SEQ_CST ); 901 __atomic_add_fetch( &stats.aalloc_storage_request, size, __ATOMIC_SEQ_CST ); 902 } else { 903 __atomic_add_fetch( &stats.aalloc_0_calls, 1, __ATOMIC_SEQ_CST ); 904 } // if 905 #endif // __STATISTICS__ 906 907 return mallocNoStats( size ); 1287 return doMalloc( dim * elemSize STAT_ARG( AALLOC ) ); 908 1288 } // aalloc 909 1289 … … 912 1292 void * calloc( size_t dim, size_t elemSize ) libcfa_public { 913 1293 size_t size = dim * elemSize; 914 if ( unlikely( size ) == 0 ) { // 0 BYTE ALLOCATION RETURNS NULL POINTER 915 #ifdef __STATISTICS__ 916 __atomic_add_fetch( &stats.calloc_0_calls, 1, __ATOMIC_SEQ_CST ); 917 #endif // __STATISTICS__ 918 return 0p; 919 } // if 920 #ifdef __STATISTICS__ 921 __atomic_add_fetch( &stats.calloc_calls, 1, __ATOMIC_SEQ_CST ); 922 __atomic_add_fetch( &stats.calloc_storage_request, dim * elemSize, __ATOMIC_SEQ_CST ); 923 #endif // __STATISTICS__ 924 925 char * addr = (char *)mallocNoStats( size ); 1294 char * addr = (char *)doMalloc( size STAT_ARG( CALLOC ) ); 1295 1296 if ( unlikely( addr == NULL ) ) return NULL; // stop further processing if 0p is returned 926 1297 927 1298 Heap.Storage.Header * header; 928 Heap.FreeHeader * free Elem;1299 Heap.FreeHeader * freeHead; 929 1300 size_t bsize, alignment; 930 1301 … … 932 1303 bool mapped = 933 1304 #endif // __CFA_DEBUG__ 934 headers( "calloc", addr, header, free Elem, bsize, alignment );1305 headers( "calloc", addr, header, freeHead, bsize, alignment ); 935 1306 936 1307 #ifndef __CFA_DEBUG__ 937 1308 // Mapped storage is zero filled, but in debug mode mapped memory is scrubbed in doMalloc, so it has to be reset to zero. 938 if ( ! mapped)1309 if ( likely( ! mapped ) ) 939 1310 #endif // __CFA_DEBUG__ 940 1311 // <-------0000000000000000000000000000UUUUUUUUUUUUUUUUUUUUUUUUU> bsize (bucket size) U => undefined … … 952 1323 // call to malloc(), alloc(), calloc() or realloc(). If the area pointed to was moved, a free(oaddr) is done. 953 1324 void * resize( void * oaddr, size_t size ) libcfa_public { 954 // If size is equal to 0, either NULL or a pointer suitable to be passed to free() is returned. 955 if ( unlikely( size == 0 ) ) { // special cases 956 #ifdef __STATISTICS__ 957 __atomic_add_fetch( &stats.resize_0_calls, 1, __ATOMIC_SEQ_CST ); 958 #endif // __STATISTICS__ 959 free( oaddr ); 960 return 0p; 961 } // if 962 #ifdef __STATISTICS__ 963 __atomic_add_fetch( &stats.resize_calls, 1, __ATOMIC_SEQ_CST ); 964 #endif // __STATISTICS__ 965 966 if ( unlikely( oaddr == 0p ) ) { 967 #ifdef __STATISTICS__ 968 __atomic_add_fetch( &stats.resize_storage_request, size, __ATOMIC_SEQ_CST ); 969 #endif // __STATISTICS__ 970 return mallocNoStats( size ); 971 } // if 1325 if ( unlikely( oaddr == 0p ) ) { // => malloc( size ) 1326 return doMalloc( size STAT_ARG( RESIZE ) ); 1327 } // if 1328 1329 PROLOG( RESIZE, doFree( oaddr ) ); // => free( oaddr ) 972 1330 973 1331 Heap.Storage.Header * header; 974 Heap.FreeHeader * free Elem;1332 Heap.FreeHeader * freeHead; 975 1333 size_t bsize, oalign; 976 headers( "resize", oaddr, header, free Elem, bsize, oalign );1334 headers( "resize", oaddr, header, freeHead, bsize, oalign ); 977 1335 978 1336 size_t odsize = DataStorage( bsize, oaddr, header ); // data storage available in bucket … … 980 1338 if ( oalign == libAlign() && size <= odsize && odsize <= size * 2 ) { // allow 50% wasted storage for smaller size 981 1339 ClearZeroFillBit( header ); // no alignment and turn off 0 fill 1340 #ifdef __CFA_DEBUG__ 1341 incUnfreed( size - header->kind.real.size ); // adjustment off the size difference 1342 #endif // __CFA_DEBUG__ 982 1343 header->kind.real.size = size; // reset allocation size 1344 #ifdef __STATISTICS__ 1345 incCalls( RESIZE ); 1346 #endif // __STATISTICS__ 983 1347 return oaddr; 984 1348 } // if 985 1349 986 #ifdef __STATISTICS__987 __atomic_add_fetch( &stats.resize_storage_request, size, __ATOMIC_SEQ_CST );988 #endif // __STATISTICS__989 990 1350 // change size, DO NOT preserve STICKY PROPERTIES. 991 free( oaddr ); 992 return mallocNoStats( size ); // create new area 1351 doFree( oaddr ); // free previous storage 1352 1353 return doMalloc( size STAT_ARG( RESIZE ) ); // create new area 993 1354 } // resize 994 1355 … … 997 1358 // the old and new sizes. 998 1359 void * realloc( void * oaddr, size_t size ) libcfa_public { 999 // If size is equal to 0, either NULL or a pointer suitable to be passed to free() is returned. 1000 if ( unlikely( size == 0 ) ) { // special cases 1001 #ifdef __STATISTICS__ 1002 __atomic_add_fetch( &stats.realloc_0_calls, 1, __ATOMIC_SEQ_CST ); 1003 #endif // __STATISTICS__ 1004 free( oaddr ); 1005 return 0p; 1006 } // if 1007 #ifdef __STATISTICS__ 1008 __atomic_add_fetch( &stats.realloc_calls, 1, __ATOMIC_SEQ_CST ); 1009 #endif // __STATISTICS__ 1010 1011 if ( unlikely( oaddr == 0p ) ) { 1012 #ifdef __STATISTICS__ 1013 __atomic_add_fetch( &stats.realloc_storage_request, size, __ATOMIC_SEQ_CST ); 1014 #endif // __STATISTICS__ 1015 return mallocNoStats( size ); 1016 } // if 1360 if ( unlikely( oaddr == 0p ) ) { // => malloc( size ) 1361 return doMalloc( size STAT_ARG( REALLOC ) ); 1362 } // if 1363 1364 PROLOG( REALLOC, doFree( oaddr ) ); // => free( oaddr ) 1017 1365 1018 1366 Heap.Storage.Header * header; 1019 Heap.FreeHeader * free Elem;1367 Heap.FreeHeader * freeHead; 1020 1368 size_t bsize, oalign; 1021 headers( "realloc", oaddr, header, free Elem, bsize, oalign );1369 headers( "realloc", oaddr, header, freeHead, bsize, oalign ); 1022 1370 1023 1371 size_t odsize = DataStorage( bsize, oaddr, header ); // data storage available in bucket … … 1025 1373 bool ozfill = ZeroFillBit( header ); // old allocation zero filled 1026 1374 if ( unlikely( size <= odsize ) && odsize <= size * 2 ) { // allow up to 50% wasted storage 1027 header->kind.real.size = size; // reset allocation size 1375 #ifdef __CFA_DEBUG__ 1376 incUnfreed( size - header->kind.real.size ); // adjustment off the size difference 1377 #endif // __CFA_DEBUG__ 1378 header->kind.real.size = size; // reset allocation size 1028 1379 if ( unlikely( ozfill ) && size > osize ) { // previous request zero fill and larger ? 1029 1380 memset( (char *)oaddr + osize, '\0', size - osize ); // initialize added storage 1030 1381 } // if 1382 #ifdef __STATISTICS__ 1383 incCalls( REALLOC ); 1384 #endif // __STATISTICS__ 1031 1385 return oaddr; 1032 1386 } // if 1033 1387 1034 #ifdef __STATISTICS__1035 __atomic_add_fetch( &stats.realloc_storage_request, size, __ATOMIC_SEQ_CST );1036 #endif // __STATISTICS__1037 1038 1388 // change size and copy old content to new storage 1039 1389 1040 1390 void * naddr; 1041 if ( likely( oalign == libAlign() ) ) { // previous request not aligned ?1042 naddr = mallocNoStats( size );// create new area1391 if ( likely( oalign <= libAlign() ) ) { // previous request not aligned ? 1392 naddr = doMalloc( size STAT_ARG( REALLOC ) ); // create new area 1043 1393 } else { 1044 naddr = memalignNoStats( oalign, size ); // create new aligned area 1045 } // if 1046 1047 headers( "realloc", naddr, header, freeElem, bsize, oalign ); 1394 naddr = memalignNoStats( oalign, size STAT_ARG( REALLOC ) ); // create new aligned area 1395 } // if 1396 1397 headers( "realloc", naddr, header, freeHead, bsize, oalign ); 1398 // To preserve prior fill, the entire bucket must be copied versus the size. 1048 1399 memcpy( naddr, oaddr, min( osize, size ) ); // copy bytes 1049 free( oaddr );1400 doFree( oaddr ); // free previous storage 1050 1401 1051 1402 if ( unlikely( ozfill ) ) { // previous request zero fill ? … … 1067 1418 // Same as malloc() except the memory address is a multiple of alignment, which must be a power of two. (obsolete) 1068 1419 void * memalign( size_t alignment, size_t size ) libcfa_public { 1069 #ifdef __STATISTICS__ 1070 if ( likely( size > 0 ) ) { 1071 __atomic_add_fetch( &stats.memalign_calls, 1, __ATOMIC_SEQ_CST ); 1072 __atomic_add_fetch( &stats.memalign_storage_request, size, __ATOMIC_SEQ_CST ); 1073 } else { 1074 __atomic_add_fetch( &stats.memalign_0_calls, 1, __ATOMIC_SEQ_CST ); 1075 } // if 1076 #endif // __STATISTICS__ 1077 1078 return memalignNoStats( alignment, size ); 1420 return memalignNoStats( alignment, size STAT_ARG( MEMALIGN ) ); 1079 1421 } // memalign 1080 1422 … … 1082 1424 // Same as aalloc() with memory alignment. 1083 1425 void * amemalign( size_t alignment, size_t dim, size_t elemSize ) libcfa_public { 1084 size_t size = dim * elemSize; 1085 #ifdef __STATISTICS__ 1086 if ( likely( size > 0 ) ) { 1087 __atomic_add_fetch( &stats.cmemalign_calls, 1, __ATOMIC_SEQ_CST ); 1088 __atomic_add_fetch( &stats.cmemalign_storage_request, size, __ATOMIC_SEQ_CST ); 1089 } else { 1090 __atomic_add_fetch( &stats.cmemalign_0_calls, 1, __ATOMIC_SEQ_CST ); 1091 } // if 1092 #endif // __STATISTICS__ 1093 1094 return memalignNoStats( alignment, size ); 1426 return memalignNoStats( alignment, dim * elemSize STAT_ARG( AMEMALIGN ) ); 1095 1427 } // amemalign 1096 1428 … … 1099 1431 void * cmemalign( size_t alignment, size_t dim, size_t elemSize ) libcfa_public { 1100 1432 size_t size = dim * elemSize; 1101 if ( unlikely( size ) == 0 ) { // 0 BYTE ALLOCATION RETURNS NULL POINTER 1102 #ifdef __STATISTICS__ 1103 __atomic_add_fetch( &stats.cmemalign_0_calls, 1, __ATOMIC_SEQ_CST ); 1104 #endif // __STATISTICS__ 1105 return 0p; 1106 } // if 1107 #ifdef __STATISTICS__ 1108 __atomic_add_fetch( &stats.cmemalign_calls, 1, __ATOMIC_SEQ_CST ); 1109 __atomic_add_fetch( &stats.cmemalign_storage_request, dim * elemSize, __ATOMIC_SEQ_CST ); 1110 #endif // __STATISTICS__ 1111 1112 char * addr = (char *)memalignNoStats( alignment, size ); 1433 char * addr = (char *)memalignNoStats( alignment, size STAT_ARG( CMEMALIGN ) ); 1434 1435 if ( unlikely( addr == NULL ) ) return NULL; // stop further processing if 0p is returned 1113 1436 1114 1437 Heap.Storage.Header * header; 1115 Heap.FreeHeader * free Elem;1438 Heap.FreeHeader * freeHead; 1116 1439 size_t bsize; 1117 1440 … … 1119 1442 bool mapped = 1120 1443 #endif // __CFA_DEBUG__ 1121 headers( "cmemalign", addr, header, free Elem, bsize, alignment );1444 headers( "cmemalign", addr, header, freeHead, bsize, alignment ); 1122 1445 1123 1446 // Mapped storage is zero filled, but in debug mode mapped memory is scrubbed in doMalloc, so it has to be reset to zero. … … 1169 1492 // 0p, no operation is performed. 1170 1493 void free( void * addr ) libcfa_public { 1494 // verify( heapManager ); 1495 1171 1496 if ( unlikely( addr == 0p ) ) { // special case 1172 1497 #ifdef __STATISTICS__ 1173 __atomic_add_fetch( &stats.free_null_calls, 1, __ATOMIC_SEQ_CST ); 1498 if ( heapManager ) 1499 incZeroCalls( FREE ); 1174 1500 #endif // __STATISTICS__ 1175 1176 // #ifdef __CFA_DEBUG__1177 // if ( traceHeap() ) {1178 // #define nullmsg "Free( 0x0 ) size:0\n"1179 // // Do not debug print free( 0p ), as it can cause recursive entry from sprintf.1180 // __cfaabi_dbg_write( nullmsg, sizeof(nullmsg) - 1 );1181 // } // if1182 // #endif // __CFA_DEBUG__1183 1501 return; 1184 } // exit 1185 1186 doFree( addr ); 1502 } // if 1503 1504 #ifdef __STATISTICS__ 1505 incCalls( FREE ); 1506 #endif // __STATISTICS__ 1507 1508 doFree( addr ); // handles heapManager == nullptr 1187 1509 } // free 1188 1510 … … 1227 1549 if ( unlikely( addr == 0p ) ) return 0; // null allocation has 0 size 1228 1550 Heap.Storage.Header * header; 1229 Heap.FreeHeader * free Elem;1551 Heap.FreeHeader * freeHead; 1230 1552 size_t bsize, alignment; 1231 1553 1232 headers( "malloc_usable_size", addr, header, free Elem, bsize, alignment );1554 headers( "malloc_usable_size", addr, header, freeHead, bsize, alignment ); 1233 1555 return DataStorage( bsize, addr, header ); // data storage in bucket 1234 1556 } // malloc_usable_size … … 1238 1560 void malloc_stats( void ) libcfa_public { 1239 1561 #ifdef __STATISTICS__ 1240 printStats(); 1241 if ( prtFree() ) prtFree( heapManager ); 1562 HeapStatistics stats; 1563 HeapStatisticsCtor( stats ); 1564 if ( printStats( collectStats( stats ) ) == -1 ) { 1565 #else 1566 #define MALLOC_STATS_MSG "malloc_stats statistics disabled.\n" 1567 if ( write( STDERR_FILENO, MALLOC_STATS_MSG, sizeof( MALLOC_STATS_MSG ) - 1 /* size includes '\0' */ ) == -1 ) { 1242 1568 #endif // __STATISTICS__ 1569 abort( "**** Error **** write failed in malloc_stats" ); 1570 } // if 1243 1571 } // malloc_stats 1244 1572 … … 1247 1575 int malloc_stats_fd( int fd __attribute__(( unused )) ) libcfa_public { 1248 1576 #ifdef __STATISTICS__ 1249 int temp = stats_fd;1250 stats_fd = fd;1577 int temp = heapMaster.stats_fd; 1578 heapMaster.stats_fd = fd; 1251 1579 return temp; 1252 1580 #else … … 1262 1590 if ( options != 0 ) { errno = EINVAL; return -1; } 1263 1591 #ifdef __STATISTICS__ 1264 return printStatsXML( stream ); 1592 HeapStatistics stats; 1593 HeapStatisticsCtor( stats ); 1594 return printStatsXML( collectStats( stats ), stream ); // returns bytes written or -1 1265 1595 #else 1266 1596 return 0; // unsupported … … 1275 1605 choose( option ) { 1276 1606 case M_TOP_PAD: 1277 heap Expand = ceiling2( value, __page_size );1607 heapMaster.heapExpand = ceiling2( value, __page_size ); 1278 1608 return 1; 1279 1609 case M_MMAP_THRESHOLD: … … 1319 1649 // Must have CFA linkage to overload with C linkage realloc. 1320 1650 void * resize( void * oaddr, size_t nalign, size_t size ) libcfa_public { 1321 // If size is equal to 0, either NULL or a pointer suitable to be passed to free() is returned. 1322 if ( unlikely( size == 0 ) ) { // special cases 1323 #ifdef __STATISTICS__ 1324 __atomic_add_fetch( &stats.resize_0_calls, 1, __ATOMIC_SEQ_CST ); 1325 #endif // __STATISTICS__ 1326 free( oaddr ); 1327 return 0p; 1651 if ( unlikely( oaddr == 0p ) ) { // => malloc( size ) 1652 return memalignNoStats( nalign, size STAT_ARG( RESIZE ) ); 1328 1653 } // if 1329 1654 1330 if ( unlikely( nalign < libAlign() ) ) nalign = libAlign(); // reset alignment to minimum 1331 #ifdef __CFA_DEBUG__ 1332 else checkAlign( nalign ); // check alignment 1333 #endif // __CFA_DEBUG__ 1334 1335 if ( unlikely( oaddr == 0p ) ) { 1336 #ifdef __STATISTICS__ 1337 __atomic_add_fetch( &stats.resize_calls, 1, __ATOMIC_SEQ_CST ); 1338 __atomic_add_fetch( &stats.resize_storage_request, size, __ATOMIC_SEQ_CST ); 1339 #endif // __STATISTICS__ 1340 return memalignNoStats( nalign, size ); 1341 } // if 1655 PROLOG( RESIZE, doFree( oaddr ) ); // => free( oaddr ) 1342 1656 1343 1657 // Attempt to reuse existing alignment. … … 1347 1661 1348 1662 if ( unlikely( isFakeHeader ) ) { 1663 checkAlign( nalign ); // check alignment 1349 1664 oalign = ClearAlignmentBit( header ); // old alignment 1350 1665 if ( unlikely( (uintptr_t)oaddr % nalign == 0 // lucky match ? … … 1353 1668 ) ) { 1354 1669 HeaderAddr( oaddr )->kind.fake.alignment = MarkAlignmentBit( nalign ); // update alignment (could be the same) 1355 Heap.FreeHeader * free Elem;1670 Heap.FreeHeader * freeHead; 1356 1671 size_t bsize, oalign; 1357 headers( "resize", oaddr, header, free Elem, bsize, oalign );1672 headers( "resize", oaddr, header, freeHead, bsize, oalign ); 1358 1673 size_t odsize = DataStorage( bsize, oaddr, header ); // data storage available in bucket 1359 1674 … … 1361 1676 HeaderAddr( oaddr )->kind.fake.alignment = MarkAlignmentBit( nalign ); // update alignment (could be the same) 1362 1677 ClearZeroFillBit( header ); // turn off 0 fill 1678 #ifdef __CFA_DEBUG__ 1679 incUnfreed( size - header->kind.real.size ); // adjustment off the size difference 1680 #endif // __CFA_DEBUG__ 1363 1681 header->kind.real.size = size; // reset allocation size 1682 #ifdef __STATISTICS__ 1683 incCalls( RESIZE ); 1684 #endif // __STATISTICS__ 1364 1685 return oaddr; 1365 1686 } // if … … 1370 1691 } // if 1371 1692 1372 #ifdef __STATISTICS__1373 __atomic_add_fetch( &stats.resize_storage_request, size, __ATOMIC_SEQ_CST );1374 #endif // __STATISTICS__1375 1376 1693 // change size, DO NOT preserve STICKY PROPERTIES. 1377 free( oaddr );1378 return memalignNoStats( nalign, size );// create new aligned area1694 doFree( oaddr ); // free previous storage 1695 return memalignNoStats( nalign, size STAT_ARG( RESIZE ) ); // create new aligned area 1379 1696 } // resize 1380 1697 1381 1698 1382 1699 void * realloc( void * oaddr, size_t nalign, size_t size ) libcfa_public { 1383 // If size is equal to 0, either NULL or a pointer suitable to be passed to free() is returned. 1384 if ( unlikely( size == 0 ) ) { // special cases 1385 #ifdef __STATISTICS__ 1386 __atomic_add_fetch( &stats.realloc_0_calls, 1, __ATOMIC_SEQ_CST ); 1387 #endif // __STATISTICS__ 1388 free( oaddr ); 1389 return 0p; 1700 if ( unlikely( oaddr == 0p ) ) { // => malloc( size ) 1701 return memalignNoStats( nalign, size STAT_ARG( REALLOC ) ); 1390 1702 } // if 1391 1703 1392 if ( unlikely( nalign < libAlign() ) ) nalign = libAlign(); // reset alignment to minimum 1393 #ifdef __CFA_DEBUG__ 1394 else checkAlign( nalign ); // check alignment 1395 #endif // __CFA_DEBUG__ 1396 1397 if ( unlikely( oaddr == 0p ) ) { 1398 #ifdef __STATISTICS__ 1399 __atomic_add_fetch( &stats.realloc_calls, 1, __ATOMIC_SEQ_CST ); 1400 __atomic_add_fetch( &stats.realloc_storage_request, size, __ATOMIC_SEQ_CST ); 1401 #endif // __STATISTICS__ 1402 return memalignNoStats( nalign, size ); 1403 } // if 1704 PROLOG( REALLOC, doFree( oaddr ) ); // => free( oaddr ) 1404 1705 1405 1706 // Attempt to reuse existing alignment. … … 1408 1709 size_t oalign; 1409 1710 if ( unlikely( isFakeHeader ) ) { 1711 checkAlign( nalign ); // check alignment 1410 1712 oalign = ClearAlignmentBit( header ); // old alignment 1411 1713 if ( unlikely( (uintptr_t)oaddr % nalign == 0 // lucky match ? … … 1421 1723 } // if 1422 1724 1423 #ifdef __STATISTICS__ 1424 __atomic_add_fetch( &stats.realloc_calls, 1, __ATOMIC_SEQ_CST ); 1425 __atomic_add_fetch( &stats.realloc_storage_request, size, __ATOMIC_SEQ_CST ); 1426 #endif // __STATISTICS__ 1427 1428 Heap.FreeHeader * freeElem; 1725 Heap.FreeHeader * freeHead; 1429 1726 size_t bsize; 1430 headers( "realloc", oaddr, header, free Elem, bsize, oalign );1727 headers( "realloc", oaddr, header, freeHead, bsize, oalign ); 1431 1728 1432 1729 // change size and copy old content to new storage … … 1435 1732 bool ozfill = ZeroFillBit( header ); // old allocation zero filled 1436 1733 1437 void * naddr = memalignNoStats( nalign, size );// create new aligned area1438 1439 headers( "realloc", naddr, header, free Elem, bsize, oalign );1734 void * naddr = memalignNoStats( nalign, size STAT_ARG( REALLOC ) ); // create new aligned area 1735 1736 headers( "realloc", naddr, header, freeHead, bsize, oalign ); 1440 1737 memcpy( naddr, oaddr, min( osize, size ) ); // copy bytes 1441 free( oaddr );1738 doFree( oaddr ); // free previous storage 1442 1739 1443 1740 if ( unlikely( ozfill ) ) { // previous request zero fill ? … … 1451 1748 1452 1749 1750 void * reallocarray( void * oaddr, size_t nalign, size_t dim, size_t elemSize ) __THROW { 1751 return realloc( oaddr, nalign, dim * elemSize ); 1752 } // reallocarray 1753 1754 1453 1755 // Local Variables: // 1454 1756 // tab-width: 4 // -
libcfa/src/heap.hfa
r9cd5bd2 rdf6cc9d 10 10 // Created On : Tue May 26 11:23:55 2020 11 11 // Last Modified By : Peter A. Buhr 12 // Last Modified On : T hu Apr 21 22:52:25 202213 // Update Count : 2 112 // Last Modified On : Tue Oct 4 19:08:55 2022 13 // Update Count : 23 14 14 // 15 15 … … 30 30 bool checkFreeOff(); 31 31 32 // supported mallopt options33 #ifndef M_MMAP_THRESHOLD34 #define M_MMAP_THRESHOLD (-1)35 #endif // M_MMAP_THRESHOLD36 37 #ifndef M_TOP_PAD38 #define M_TOP_PAD (-2)39 #endif // M_TOP_PAD40 41 32 extern "C" { 42 33 // New allocation operations. … … 49 40 size_t malloc_size( void * addr ); 50 41 int malloc_stats_fd( int fd ); 51 size_t malloc_usable_size( void * addr );52 42 size_t malloc_expansion(); // heap expansion size (bytes) 53 43 size_t malloc_mmap_start(); // crossover allocation size from sbrk to mmap -
libcfa/src/math.hfa
r9cd5bd2 rdf6cc9d 10 10 // Created On : Mon Apr 18 23:37:04 2016 11 11 // Last Modified By : Peter A. Buhr 12 // Last Modified On : Thu Apr 15 11:47:56 202113 // Update Count : 13 212 // Last Modified On : Sat Oct 8 08:40:42 2022 13 // Update Count : 136 14 14 // 15 15 … … 22 22 23 23 #include "common.hfa" 24 #include "bits/debug.hfa" 24 25 25 26 //---------------------- General ---------------------- 26 27 27 static inline {28 static inline __attribute__((always_inline)) { 28 29 float ?%?( float x, float y ) { return fmodf( x, y ); } 29 30 float fmod( float x, float y ) { return fmodf( x, y ); } … … 63 64 //---------------------- Exponential ---------------------- 64 65 65 static inline {66 static inline __attribute__((always_inline)) { 66 67 float exp( float x ) { return expf( x ); } 67 68 // extern "C" { double exp( double ); } … … 92 93 //---------------------- Logarithm ---------------------- 93 94 94 static inline {95 static inline __attribute__((always_inline)) { 95 96 float log( float x ) { return logf( x ); } 96 97 // extern "C" { double log( double ); } … … 147 148 } // distribution 148 149 150 static inline unsigned long long log2_u32_32( unsigned long long val ) { 151 enum { 152 TABLE_BITS = 6, 153 TABLE_SIZE = (1 << TABLE_BITS) + 2, 154 }; 155 // for(i; TABLE_SIZE) { 156 // table[i] = (unsigned long long)(log2(1.0 + i / pow(2, TABLE_BITS)) * pow(2, 32))); 157 // } 158 static const unsigned long long table[] = { 159 0x0000000000, 0x0005b9e5a1, 0x000b5d69ba, 0x0010eb389f, 160 0x001663f6fa, 0x001bc84240, 0x002118b119, 0x002655d3c4, 161 0x002b803473, 0x00309857a0, 0x00359ebc5b, 0x003a93dc98, 162 0x003f782d72, 0x00444c1f6b, 0x0049101eac, 0x004dc4933a, 163 0x005269e12f, 0x00570068e7, 0x005b888736, 0x006002958c, 164 0x00646eea24, 0x0068cdd829, 0x006d1fafdc, 0x007164beb4, 165 0x00759d4f80, 0x0079c9aa87, 0x007dea15a3, 0x0081fed45c, 166 0x0086082806, 0x008a064fd5, 0x008df988f4, 0x0091e20ea1, 167 0x0095c01a39, 0x009993e355, 0x009d5d9fd5, 0x00a11d83f4, 168 0x00a4d3c25e, 0x00a8808c38, 0x00ac241134, 0x00afbe7fa0, 169 0x00b3500472, 0x00b6d8cb53, 0x00ba58feb2, 0x00bdd0c7c9, 170 0x00c1404ead, 0x00c4a7ba58, 0x00c80730b0, 0x00cb5ed695, 171 0x00ceaecfea, 0x00d1f73f9c, 0x00d53847ac, 0x00d8720935, 172 0x00dba4a47a, 0x00ded038e6, 0x00e1f4e517, 0x00e512c6e5, 173 0x00e829fb69, 0x00eb3a9f01, 0x00ee44cd59, 0x00f148a170, 174 0x00f446359b, 0x00f73da38d, 0x00fa2f045e, 0x00fd1a708b, 175 0x0100000000, 0x0102dfca16, 176 }; 177 _Static_assert((sizeof(table) / sizeof(table[0])) == TABLE_SIZE, "TABLE_SIZE should be accurate"); 178 // starting from val = (2 ** i)*(1 + f) where 0 <= f < 1 179 // log identities mean log2(val) = log2((2 ** i)*(1 + f)) = log2(2**i) + log2(1+f) 180 // 181 // getting i is easy to do using builtin_clz (count leading zero) 182 // 183 // we want to calculate log2(1+f) independently to have a many bits of precision as possible. 184 // val = (2 ** i)*(1 + f) = 2 ** i + f * 2 ** i 185 // isolating f we get 186 // val - 2 ** i = f * 2 ** i 187 // (val - 2 ** i) / 2 ** i = f 188 // 189 // we want to interpolate from the table to get the values 190 // and compromise by doing quadratic interpolation (rather than higher degree interpolation) 191 // 192 // for the interpolation we want to shift everything the fist sample point 193 // so our parabola becomes x = 0 194 // this further simplifies the equations 195 // 196 // the consequence is that we need f in 2 forms: 197 // - finding the index of x0 198 // - finding the distance between f and x0 199 // 200 // since sample points are equidistant we can significantly simplify the equations 201 202 // get i 203 const unsigned long long bits = sizeof(val) * __CHAR_BIT__; 204 const unsigned long long lz = __builtin_clzl(val); 205 const unsigned long long i = bits - 1 - lz; 206 207 // get the fractinal part as a u32.32 208 const unsigned long long frac = (val << (lz + 1)) >> 32; 209 210 // get high order bits for the index into the table 211 const unsigned long long idx0 = frac >> (32 - TABLE_BITS); 212 213 // get the x offset, i.e., the difference between the first sample point and the actual fractional part 214 const long long udx = frac - (idx0 << (32 - TABLE_BITS)); 215 /* paranoid */ verify((idx0 + 2) < TABLE_SIZE); 216 217 const long long y0 = table[idx0 + 0]; 218 const long long y1 = table[idx0 + 1]; 219 const long long y2 = table[idx0 + 2]; 220 221 // from there we can quadraticly interpolate to get the data, using the lagrange polynomial 222 // normally it would look like: 223 // double r0 = y0 * ((x - x1) / (x0 - x1)) * ((x - x2) / (x0 - x2)); 224 // double r1 = y1 * ((x - x0) / (x1 - x0)) * ((x - x2) / (x1 - x2)); 225 // double r2 = y2 * ((x - x0) / (x2 - x0)) * ((x - x1) / (x2 - x1)); 226 // but since the spacing between sample points is fixed, we can simplify it and extract common expressions 227 const long long f1 = (y1 - y0); 228 const long long f2 = (y2 - y0); 229 const long long a = f2 - (f1 * 2l); 230 const long long b = (f1 * 2l) - a; 231 232 // Now we can compute it in the form (ax + b)x + c (which avoid repeating steps) 233 long long sum = ((a*udx) >> (32 - TABLE_BITS)) + b; 234 sum = (sum*udx) >> (32 - TABLE_BITS + 1); 235 sum = y0 + sum; 236 237 return (i << 32) + (sum); 238 } 239 149 240 //---------------------- Trigonometric ---------------------- 150 241 151 static inline {242 static inline __attribute__((always_inline)) { 152 243 float sin( float x ) { return sinf( x ); } 153 244 // extern "C" { double sin( double ); } … … 204 295 //---------------------- Hyperbolic ---------------------- 205 296 206 static inline {297 static inline __attribute__((always_inline)) { 207 298 float sinh( float x ) { return sinhf( x ); } 208 299 // extern "C" { double sinh( double ); } … … 250 341 //---------------------- Error / Gamma ---------------------- 251 342 252 static inline {343 static inline __attribute__((always_inline)) { 253 344 float erf( float x ) { return erff( x ); } 254 345 // extern "C" { double erf( double ); } … … 279 370 //---------------------- Nearest Integer ---------------------- 280 371 281 static inline{372 inline __attribute__((always_inline)) static { 282 373 signed char floor( signed char n, signed char align ) { return n / align * align; } 283 374 unsigned char floor( unsigned char n, unsigned char align ) { return n / align * align; } … … 307 398 // forall( T | { T ?+?( T, T ); T ?-?( T, T ); T ?%?( T, T ); } ) 308 399 // T ceiling_div( T n, T align ) { verify( is_pow2( align ) );return (n + (align - 1)) / align; } 309 400 310 401 // gcc notices the div/mod pair and saves both so only one div. 311 402 signed char ceiling( signed char n, signed char align ) { return floor( n + (n % align != 0 ? align - 1 : 0), align ); } … … 376 467 //---------------------- Manipulation ---------------------- 377 468 378 static inline {469 static inline __attribute__((always_inline)) { 379 470 float copysign( float x, float y ) { return copysignf( x, y ); } 380 471 // extern "C" { double copysign( double, double ); } … … 418 509 //--------------------------------------- 419 510 420 static inline {511 static inline __attribute__((always_inline)) { 421 512 forall( T | { void ?{}( T &, one_t ); T ?+?( T, T ); T ?-?( T, T );T ?*?( T, T ); } ) 422 513 T lerp( T x, T y, T a ) { return x * ((T){1} - a) + y * a; } -
libcfa/src/parseargs.cfa
r9cd5bd2 rdf6cc9d 1 // 2 // Cforall Version 1.0.0 Copyright (C) 2022 University of Waterloo 3 // 4 // The contents of this file are covered under the licence agreement in the 5 // file "LICENCE" distributed with Cforall. 6 // 7 // parseargs.cfa 8 // implementation of arguments parsing (argc, argv) 9 // 10 // Author : Thierry Delisle 11 // Created On : Wed Oct 12 15:28:01 2022 12 // Last Modified By : 13 // Last Modified On : 14 // Update Count : 15 // 16 1 17 #include "parseargs.hfa" 2 18 19 #include <assert.h> 20 #include <ctype.h> 3 21 #include <stdint.h> 4 22 #include <string.h> 5 23 #include <errno.h> 6 24 #include <unistd.h> 7 #include <assert.h>8 25 9 26 extern "C" { … … 33 50 extern char ** cfa_args_envp __attribute__((weak)); 34 51 35 static void usage(char * cmd, cfa_option options[], size_t opt_count, const char * usage, FILE * out) __attribute__ ((noreturn)); 52 forall([N]) 53 static void usage(char * cmd, const array( cfa_option, N ) & options, const char * usage, FILE * out) __attribute__ ((noreturn)); 36 54 //----------------------------------------------------------------------------- 37 55 // checking 38 static void check_args(cfa_option options[], size_t opt_count) { 39 for(i; opt_count) { 40 for(j; opt_count) { 56 forall([N]) 57 static void check_args( const array( cfa_option, N ) & options ) { 58 for(i; N) { 59 for(j; N) { 41 60 if(i == j) continue; 42 61 … … 53 72 //----------------------------------------------------------------------------- 54 73 // Parsing args 55 void parse_args( cfa_option options[], size_t opt_count, const char * usage, char ** & left ) { 56 if( 0p != &cfa_args_argc ) { 57 parse_args(cfa_args_argc, cfa_args_argv, options, opt_count, usage, left ); 58 } 59 else { 60 char * temp = ""; 61 parse_args(0, &temp, options, opt_count, usage, left ); 62 } 63 } 64 65 void parse_args( 66 int argc, 67 char * argv[], 68 cfa_option options[], 69 size_t opt_count, 70 const char * usage, 71 char ** & left 72 ) { 73 check_args(options, opt_count); 74 75 int maxv = 'h'; 76 assert( opt_count > 0 ); 77 char optstring[opt_count * 3] = { '\0' }; 78 { 79 int idx = 0; 80 for(i; opt_count) { 81 if (options[i].short_name) { 82 maxv = max(options[i].short_name, maxv); 83 optstring[idx] = options[i].short_name; 84 idx++; 85 if( ((intptr_t)options[i].parse) != ((intptr_t)parse_settrue) 86 && ((intptr_t)options[i].parse) != ((intptr_t)parse_setfalse) ) { 87 optstring[idx] = ':'; 74 forall([opt_count]) { 75 void parse_args( const array( cfa_option, opt_count ) & options, const char * usage, char ** & left ) { 76 if( 0p != &cfa_args_argc ) { 77 parse_args(cfa_args_argc, cfa_args_argv, options, usage, left ); 78 } 79 else { 80 char * temp = ""; 81 parse_args(0, &temp, options, usage, left ); 82 } 83 } 84 85 void parse_args( 86 int argc, 87 char * argv[], 88 const array( cfa_option, opt_count ) & options, 89 const char * usage, 90 char ** & left 91 ) { 92 check_args(options); 93 94 int maxv = 'h'; 95 char optstring[(opt_count * 3) + 2] = { '\0' }; 96 { 97 int idx = 0; 98 for(i; opt_count) { 99 if (options[i].short_name) { 100 maxv = max(options[i].short_name, maxv); 101 optstring[idx] = options[i].short_name; 102 idx++; 103 if( ((intptr_t)options[i].parse) != ((intptr_t)parse_settrue) 104 && ((intptr_t)options[i].parse) != ((intptr_t)parse_setfalse) ) { 105 optstring[idx] = ':'; 106 idx++; 107 } 108 } 109 } 110 optstring[idx+0] = 'h'; 111 optstring[idx+1] = '\0'; 112 } 113 114 struct option optarr[opt_count + 2]; 115 { 116 int idx = 0; 117 for(i; opt_count) { 118 if(options[i].long_name) { 119 // we don't have the mutable keyword here, which is really what we would want 120 int & val_ref = (int &)(const int &)options[i].val; 121 val_ref = (options[i].short_name != '\0') ? ((int)options[i].short_name) : ++maxv; 122 123 optarr[idx].name = options[i].long_name; 124 optarr[idx].flag = 0p; 125 optarr[idx].val = options[i].val; 126 if( ((intptr_t)options[i].parse) == ((intptr_t)parse_settrue) 127 || ((intptr_t)options[i].parse) == ((intptr_t)parse_setfalse) ) { 128 optarr[idx].has_arg = no_argument; 129 } else { 130 optarr[idx].has_arg = required_argument; 131 } 88 132 idx++; 89 133 } 90 134 } 91 } 92 optstring[idx+0] = 'h'; 93 optstring[idx+1] = '\0'; 94 } 95 96 struct option optarr[opt_count + 2]; 97 { 98 int idx = 0; 99 for(i; opt_count) { 100 if(options[i].long_name) { 101 options[i].val = (options[i].short_name != '\0') ? ((int)options[i].short_name) : ++maxv; 102 optarr[idx].name = options[i].long_name; 103 optarr[idx].flag = 0p; 104 optarr[idx].val = options[i].val; 105 if( ((intptr_t)options[i].parse) == ((intptr_t)parse_settrue) 106 || ((intptr_t)options[i].parse) == ((intptr_t)parse_setfalse) ) { 107 optarr[idx].has_arg = no_argument; 108 } else { 109 optarr[idx].has_arg = required_argument; 110 } 111 idx++; 135 optarr[idx+0].[name, has_arg, flag, val] = ["help", no_argument, 0, 'h']; 136 optarr[idx+1].[name, has_arg, flag, val] = [0, no_argument, 0, 0]; 137 } 138 139 FILE * out = stderr; 140 NEXT_ARG: 141 for() { 142 int idx = 0; 143 int opt = getopt_long(argc, argv, optstring, optarr, &idx); 144 switch(opt) { 145 case -1: 146 if(&left != 0p) left = argv + optind; 147 return; 148 case 'h': 149 out = stdout; 150 case '?': 151 usage(argv[0], options, usage, out); 152 default: 153 for(i; opt_count) { 154 if(opt == options[i].val) { 155 const char * arg = optarg ? optarg : ""; 156 if( arg[0] == '=' ) { arg++; } 157 // work around for some weird bug 158 void * variable = options[i].variable; 159 bool (*parse_func)(const char *, void * ) = options[i].parse; 160 bool success = parse_func( arg, variable ); 161 if(success) continue NEXT_ARG; 162 163 fprintf(out, "Argument '%s' for option %c could not be parsed\n\n", arg, (char)opt); 164 usage(argv[0], options, usage, out); 165 } 166 } 167 abort("Internal parse arg error\n"); 112 168 } 113 } 114 optarr[idx+0].[name, has_arg, flag, val] = ["help", no_argument, 0, 'h']; 115 optarr[idx+1].[name, has_arg, flag, val] = [0, no_argument, 0, 0]; 116 } 117 118 FILE * out = stderr; 119 NEXT_ARG: 120 for() { 121 int idx = 0; 122 int opt = getopt_long(argc, argv, optstring, optarr, &idx); 123 switch(opt) { 124 case -1: 125 if(&left != 0p) left = argv + optind; 126 return; 127 case 'h': 128 out = stdout; 129 case '?': 130 usage(argv[0], options, opt_count, usage, out); 131 default: 132 for(i; opt_count) { 133 if(opt == options[i].val) { 134 const char * arg = optarg ? optarg : ""; 135 if( arg[0] == '=' ) { arg++; } 136 bool success = options[i].parse( arg, options[i].variable ); 137 if(success) continue NEXT_ARG; 138 139 fprintf(out, "Argument '%s' for option %c could not be parsed\n\n", arg, (char)opt); 140 usage(argv[0], options, opt_count, usage, out); 141 } 142 } 143 abort("Internal parse arg error\n"); 144 } 145 146 } 169 170 } 171 } 172 } 173 174 static inline int next_newline(const char * str) { 175 int ret; 176 const char * ptr = strstr(str, "\n"); 177 if(!ptr) return MAX; 178 179 /* paranoid */ verify( str <= ptr); 180 intptr_t low = (intptr_t)str; 181 intptr_t hi = (intptr_t)ptr; 182 ret = hi - low; 183 184 return ret; 147 185 } 148 186 … … 150 188 // Print usage 151 189 static void printopt(FILE * out, int width, int max, char sn, const char * ln, const char * help) { 190 // check how wide we should be printing 191 // this includes all options and the help message 152 192 int hwidth = max - (11 + width); 153 193 if(hwidth <= 0) hwidth = max; 154 194 155 char sname[4] = { ' ', ' ', ' ', '\0' }; 156 if(sn != '\0') { 157 sname[0] = '-'; 158 sname[1] = sn; 159 sname[2] = ','; 160 } 161 162 fprintf(out, " %s --%-*s %.*s\n", sname, width, ln, hwidth, help); 163 for() { 164 help += min(strlen(help), hwidth); 165 if('\0' == *help) break; 166 fprintf(out, "%*s%.*s\n", width + 11, "", hwidth, help); 167 } 195 // check which pieces we have 196 bool has_ln = ln && strcmp("", ln); 197 bool has_help = help && strcmp("", help); 198 199 // print the small name if present 200 if(sn != '\0') fprintf(out, " -%c", sn); 201 else fprintf(out, " "); 202 203 // print a comma if we have both short and long names 204 if(sn != '\0' && has_ln) fprintf(out, ", "); 205 else fprintf(out, " "); 206 207 // print the long name if present 208 if(has_ln) fprintf(out, "--%-*s", width, ln); 209 else if(has_help) fprintf(out, " %-*s", width, ""); 210 211 if(has_help) { 212 // print the help 213 // We need to wrap at the max width, and also indent newlines so everything is nice and pretty 214 215 // for each line to print 216 for() { 217 //find out if there is a newline 218 int nextnl = next_newline(help); 219 int real = min(min(strlen(help), hwidth), nextnl); 220 221 fprintf(out, " %.*s", real, help); 222 // printf("%d %d\n", real, nextnl); 223 help += real; 224 if( nextnl == real ) help++; 225 if('\0' == *help) break; 226 fprintf(out, "\n%*s", width + 8, ""); 227 } 228 } 229 fprintf(out, "\n"); 168 230 } 169 231 170 232 void print_args_usage(cfa_option options[], size_t opt_count, const char * usage, bool error) __attribute__ ((noreturn)) { 171 usage(cfa_args_argv[0], options, opt_count, usage, error ? stderr : stdout); 233 const array( cfa_option, opt_count ) & arr = (const array( cfa_option, opt_count ) &) *options; 234 usage(cfa_args_argv[0], arr, usage, error ? stderr : stdout); 172 235 } 173 236 174 237 void print_args_usage(int , char * argv[], cfa_option options[], size_t opt_count, const char * usage, bool error) __attribute__ ((noreturn)) { 175 usage(argv[0], options, opt_count, usage, error ? stderr : stdout); 176 } 177 178 static void usage(char * cmd, cfa_option options[], size_t opt_count, const char * help, FILE * out) __attribute__((noreturn)) { 238 const array( cfa_option, opt_count ) & arr = (const array( cfa_option, opt_count ) &) *options; 239 usage(argv[0], arr, usage, error ? stderr : stdout); 240 } 241 242 forall( [N] ) { 243 void print_args_usage( const array(cfa_option, N ) & options, const char * usage, bool error) { 244 usage(cfa_args_argv[0], options, usage, error ? stderr : stdout); 245 } 246 247 void print_args_usage(int argc, char * argv[], const array( cfa_option, N ) & options, const char * usage, bool error) { 248 usage(argv[0], options, usage, error ? stderr : stdout); 249 } 250 } 251 252 forall([N]) 253 static void usage(char * cmd, const array( cfa_option, N ) & options, const char * help, FILE * out) __attribute__((noreturn)) { 179 254 int width = 0; 180 255 { 181 for(i; opt_count) {256 for(i; N) { 182 257 if(options[i].long_name) { 183 258 int w = strlen(options[i].long_name); … … 198 273 fprintf(out, "Usage:\n %s %s\n", cmd, help); 199 274 200 for(i; opt_count) {275 for(i; N) { 201 276 printopt(out, width, max_width, options[i].short_name, options[i].long_name, options[i].help); 202 277 } … … 272 347 bool parse(const char * arg, int & value) { 273 348 char * end; 274 int r = strtoll(arg, &end, 10); 349 350 errno = 0; 351 long long int r = strtoll(arg, &end, 0); 352 if(errno) return false; 275 353 if(*end != '\0') return false; 354 if(r > (int)MAX) return false; 355 if(r < (int)MIN) return false; 276 356 277 357 value = r; … … 279 359 } 280 360 361 static unsigned long long int strict_strtoull( const char * arg, int base) { 362 errno = 0; 363 { 364 const char * in = arg; 365 for() { 366 if('\0' == *in) { 367 errno = EINVAL; 368 return 0; 369 } 370 if(!isspace(*in)) break; 371 in++; 372 } 373 if(!isdigit(*in)) { 374 errno = EINVAL; 375 return 0; 376 } 377 } 378 379 *char end; 380 unsigned long long int r = strtoull(arg, &end, base); 381 if(*end != '\0') errno = EINVAL; 382 if(errno) return 0; 383 return r; 384 } 385 281 386 bool parse(const char * arg, unsigned & value) { 282 char * end; 283 unsigned long long int r = strtoull(arg, &end, 10); 284 if(*end != '\0') return false; 387 unsigned long long int r = strict_strtoull(arg, 0); 388 if(errno) return false; 285 389 if(r > (unsigned)MAX) return false; 286 390 … … 290 394 291 395 bool parse(const char * arg, unsigned long & value) { 292 char * end; 293 unsigned long long int r = strtoull(arg, &end, 10); 294 if(*end != '\0') return false; 396 unsigned long long int r = strict_strtoull(arg, 0); 397 if(errno) return false; 295 398 if(r > (unsigned long)MAX) return false; 296 399 … … 300 403 301 404 bool parse(const char * arg, unsigned long long & value) { 302 char * end; 303 unsigned long long int r = strtoull(arg, &end, 10); 304 if(*end != '\0') return false; 305 if(r > (unsigned long long)MAX) return false; 306 307 value = r; 308 return true; 405 unsigned long long int r = strict_strtoull(arg, 0); 406 if(errno) return false; 407 if(r > (unsigned long long)MAX) return false; 408 409 value = r; 410 return true; 309 411 } 310 412 -
libcfa/src/parseargs.hfa
r9cd5bd2 rdf6cc9d 1 // 2 // Cforall Version 1.0.0 Copyright (C) 2022 University of Waterloo 3 // 4 // The contents of this file are covered under the licence agreement in the 5 // file "LICENCE" distributed with Cforall. 6 // 7 // parseargs.cfa -- PUBLIC 8 // API for arguments parsing (argc, argv) 9 // 10 // Author : Thierry Delisle 11 // Created On : Wed Oct 12 15:28:01 2022 12 // Last Modified By : 13 // Last Modified On : 14 // Update Count : 15 // 1 16 #pragma once 2 17 18 #include <array.hfa> 19 3 20 struct cfa_option { 4 5 6 7 8 9 21 int val; // reserved 22 char short_name; 23 const char * long_name; 24 const char * help; 25 void * variable; 26 bool (*parse)(const char *, void * ); 10 27 }; 11 28 … … 16 33 forall(T & | { bool parse(const char *, T & ); }) 17 34 static inline void ?{}( cfa_option & this, char short_name, const char * long_name, const char * help, T & variable ) { 18 19 20 21 22 23 35 this.val = 0; 36 this.short_name = short_name; 37 this.long_name = long_name; 38 this.help = help; 39 this.variable = (void*)&variable; 40 this.parse = (bool (*)(const char *, void * ))parse; 24 41 } 25 42 26 43 forall(T &) 27 44 static inline void ?{}( cfa_option & this, char short_name, const char * long_name, const char * help, T & variable, bool (*parse)(const char *, T & )) { 28 29 30 31 32 33 45 this.val = 0; 46 this.short_name = short_name; 47 this.long_name = long_name; 48 this.help = help; 49 this.variable = (void*)&variable; 50 this.parse = (bool (*)(const char *, void * ))parse; 34 51 } 35 52 … … 37 54 void parse_args( int argc, char * argv[], cfa_option options[], size_t opt_count, const char * usage, char ** & left ); 38 55 56 forall( [N] ) { 57 void parse_args( const array( cfa_option, N ) & options, const char * usage, char ** & left ); 58 void parse_args( int argc, char * argv[], const array( cfa_option, N ) & options, const char * usage, char ** & left ); 59 } 60 39 61 void print_args_usage(cfa_option options[], size_t opt_count, const char * usage, bool error) __attribute__ ((noreturn)); 40 62 void print_args_usage(int argc, char * argv[], cfa_option options[], size_t opt_count, const char * usage, bool error) __attribute__ ((noreturn)); 63 64 forall( [N] ) { 65 void print_args_usage( const array(cfa_option, N ) & options, const char * usage, bool error) __attribute__ ((noreturn)); 66 void print_args_usage(int argc, char * argv[], const array( cfa_option, N ) & options, const char * usage, bool error) __attribute__ ((noreturn)); 67 } 41 68 42 69 bool parse_yesno (const char *, bool & ); -
libcfa/src/startup.cfa
r9cd5bd2 rdf6cc9d 10 10 // Created On : Tue Jul 24 16:21:57 2018 11 11 // Last Modified By : Peter A. Buhr 12 // Last Modified On : Mon Jan 17 16:41:54202213 // Update Count : 5 512 // Last Modified On : Thu Oct 6 13:51:57 2022 13 // Update Count : 57 14 14 // 15 15 … … 24 24 25 25 extern "C" { 26 void __cfaabi_memory_startup( void ) __attribute__(( constructor( STARTUP_PRIORITY_MEMORY ) )); 27 void __cfaabi_memory_startup( void ) { 28 extern void memory_startup(); 29 memory_startup(); 30 } // __cfaabi_memory_startup 31 32 void __cfaabi_memory_shutdown( void ) __attribute__(( destructor( STARTUP_PRIORITY_MEMORY ) )); 33 void __cfaabi_memory_shutdown( void ) { 34 extern void memory_shutdown(); 35 memory_shutdown(); 36 } // __cfaabi_memory_shutdown 37 26 38 void __cfaabi_appready_startup( void ) __attribute__(( constructor( STARTUP_PRIORITY_APPREADY ) )); 27 39 void __cfaabi_appready_startup( void ) { 28 40 tzset(); // initialize time global variables 29 #ifdef __CFA_DEBUG__30 41 extern void heapAppStart(); 31 42 heapAppStart(); 32 #endif // __CFA_DEBUG__33 43 } // __cfaabi_appready_startup 34 44 35 45 void __cfaabi_appready_shutdown( void ) __attribute__(( destructor( STARTUP_PRIORITY_APPREADY ) )); 36 46 void __cfaabi_appready_shutdown( void ) { 37 #ifdef __CFA_DEBUG__38 47 extern void heapAppStop(); 39 48 heapAppStop(); 40 #endif // __CFA_DEBUG__41 49 } // __cfaabi_appready_shutdown 42 50 -
libcfa/src/stdhdr/assert.h
r9cd5bd2 rdf6cc9d 10 10 // Created On : Mon Jul 4 23:25:26 2016 11 11 // Last Modified By : Peter A. Buhr 12 // Last Modified On : Tue Feb 4 12:58:49 202013 // Update Count : 1 512 // Last Modified On : Sun Oct 9 21:28:22 2022 13 // Update Count : 16 14 14 // 15 15 … … 27 27 #define assertf( expr, fmt, ... ) ((expr) ? ((void)0) : __assert_fail_f(__VSTRINGIFY__(expr), __FILE__, __LINE__, __PRETTY_FUNCTION__, fmt, ## __VA_ARGS__ )) 28 28 29 void __assert_warn_f( const char assertion[], const char file[], unsigned int line, const char function[], const char fmt[], ... ) __attribute__((format( printf, 5, 6) )); 29 30 void __assert_fail_f( const char assertion[], const char file[], unsigned int line, const char function[], const char fmt[], ... ) __attribute__((noreturn, format( printf, 5, 6) )); 30 31 #endif 31 32 32 #if !defined(NDEBUG) && (defined(__CFA_DEBUG__) || defined(__CFA_VERIFY__)) 33 #if ! defined(NDEBUG) && (defined(__CFA_DEBUG__) || defined(__CFA_VERIFY__)) 34 #define __CFA_WITH_VERIFY__ 33 35 #define verify(x) assert(x) 34 36 #define verifyf(x, ...) assertf(x, __VA_ARGS__) 35 37 #define verifyfail(...) 36 #define __CFA_WITH_VERIFY__38 #define warnf( expr, fmt, ... ) ({ static bool check_once##__LINE__ = false; if( false == check_once##__LINE__ && false == (expr)) { check_once##__LINE__ = true; __assert_warn_f(__VSTRINGIFY__(expr), __FILE__, __LINE__, __PRETTY_FUNCTION__, fmt, ## __VA_ARGS__ ); } }) 37 39 #else 38 40 #define verify(x) 39 41 #define verifyf(x, ...) 40 42 #define verifyfail(...) 43 #define warnf( expr, fmt, ... ) 41 44 #endif 42 45 -
libcfa/src/time.hfa
r9cd5bd2 rdf6cc9d 10 10 // Created On : Wed Mar 14 23:18:57 2018 11 11 // Last Modified By : Peter A. Buhr 12 // Last Modified On : Wed Apr 21 06:32:31 202113 // Update Count : 66 712 // Last Modified On : Sat Oct 8 09:07:48 2022 13 // Update Count : 668 14 14 // 15 15 … … 27 27 //######################### Duration ######################### 28 28 29 static inline {29 static inline __attribute__((always_inline)) { 30 30 void ?{}( Duration & dur, timeval t ) with( dur ) { tn = (int64_t)t.tv_sec * TIMEGRAN + t.tv_usec * 1000; } 31 31 void ?{}( Duration & dur, timespec t ) with( dur ) { tn = (int64_t)t.tv_sec * TIMEGRAN + t.tv_nsec; } … … 115 115 //######################### C timeval ######################### 116 116 117 static inline {117 static inline __attribute__((always_inline)) { 118 118 void ?{}( timeval & t ) {} 119 119 void ?{}( timeval & t, time_t sec, suseconds_t usec ) { t.tv_sec = sec; t.tv_usec = usec; } … … 130 130 //######################### C timespec ######################### 131 131 132 static inline {132 static inline __attribute__((always_inline)) { 133 133 void ?{}( timespec & t ) {} 134 134 void ?{}( timespec & t, time_t sec, __syscall_slong_t nsec ) { t.tv_sec = sec; t.tv_nsec = nsec; } … … 145 145 //######################### C itimerval ######################### 146 146 147 static inline {147 static inline __attribute__((always_inline)) { 148 148 void ?{}( itimerval & itv, Duration alarm ) with( itv ) { 149 149 // itimerval contains durations but but uses time data-structure timeval. … … 162 162 163 163 void ?{}( Time & time, int year, int month = 1, int day = 1, int hour = 0, int min = 0, int sec = 0, int64_t nsec = 0 ); 164 static inline {164 static inline __attribute__((always_inline)) { 165 165 void ?{}( Time & time, timeval t ) with( time ) { tn = (int64_t)t.tv_sec * TIMEGRAN + t.tv_usec * 1000; } 166 166 void ?{}( Time & time, timespec t ) with( time ) { tn = (int64_t)t.tv_sec * TIMEGRAN + t.tv_nsec; } … … 194 194 195 195 char * yy_mm_dd( Time time, char * buf ); 196 static inline char * ?`ymd( Time time, char * buf ) { // short form196 static inline __attribute__((always_inline)) char * ?`ymd( Time time, char * buf ) { // short form 197 197 return yy_mm_dd( time, buf ); 198 198 } // ymd 199 199 200 200 char * mm_dd_yy( Time time, char * buf ); 201 static inline char * ?`mdy( Time time, char * buf ) { // short form201 static inline __attribute__((always_inline)) char * ?`mdy( Time time, char * buf ) { // short form 202 202 return mm_dd_yy( time, buf ); 203 203 } // mdy 204 204 205 205 char * dd_mm_yy( Time time, char * buf ); 206 static inline char * ?`dmy( Time time, char * buf ) { // short form206 static inline __attribute__((always_inline)) char * ?`dmy( Time time, char * buf ) { // short form 207 207 return dd_mm_yy( time, buf );; 208 208 } // dmy … … 212 212 //------------------------- timeval (cont) ------------------------- 213 213 214 static inline void ?{}( timeval & t, Time time ) with( t, time ) {214 static inline __attribute__((always_inline)) void ?{}( timeval & t, Time time ) with( t, time ) { 215 215 tv_sec = tn / TIMEGRAN; // seconds 216 216 tv_usec = tn % TIMEGRAN / (TIMEGRAN / 1_000_000LL); // microseconds … … 219 219 //------------------------- timespec (cont) ------------------------- 220 220 221 static inline void ?{}( timespec & t, Time time ) with( t, time ) {221 static inline __attribute__((always_inline)) void ?{}( timespec & t, Time time ) with( t, time ) { 222 222 tv_sec = tn / TIMEGRAN; // seconds 223 223 tv_nsec = tn % TIMEGRAN; // nanoseconds
Note:
See TracChangeset
for help on using the changeset viewer.