Changeset 9cc3a18


Ignore:
Timestamp:
Apr 15, 2021, 5:02:04 PM (2 months ago)
Author:
Thierry Delisle <tdelisle@…>
Branches:
arm-eh, jacob/cs343-translation, master
Children:
6abcb4d
Parents:
57b3675
Message:

Major clean-up before attempting to add new scheduler

Location:
libcfa/src/concurrency
Files:
6 edited

Legend:

Unmodified
Added
Removed
  • libcfa/src/concurrency/invoke.h

    r57b3675 r9cc3a18  
    148148                struct $thread * prev;
    149149                volatile unsigned long long ts;
    150                 int preferred;
    151150        };
    152151
  • libcfa/src/concurrency/kernel.hfa

    r57b3675 r9cc3a18  
    140140// Cluster Tools
    141141
    142 // Intrusives lanes which are used by the relaxed ready queue
     142// Intrusives lanes which are used by the ready queue
    143143struct __attribute__((aligned(128))) __intrusive_lane_t;
    144144void  ?{}(__intrusive_lane_t & this);
    145145void ^?{}(__intrusive_lane_t & this);
     146
     147// Aligned timestamps which are used by the relaxed ready queue
     148struct __attribute__((aligned(128))) __timestamp_t;
     149void  ?{}(__timestamp_t & this);
     150void ^?{}(__timestamp_t & this);
    146151
    147152//TODO adjust cache size to ARCHITECTURE
     
    155160                // Arary of lanes
    156161                __intrusive_lane_t * volatile data;
     162
     163                // Array of times
     164                __timestamp_t * volatile tscs;
    157165
    158166                // Number of lanes (empty or not)
  • libcfa/src/concurrency/kernel_private.hfa

    r57b3675 r9cc3a18  
    286286// push thread onto a ready queue for a cluster
    287287// returns true if the list was previously empty, false otherwise
    288 __attribute__((hot)) bool push(struct cluster * cltr, struct $thread * thrd);
     288__attribute__((hot)) void push(struct cluster * cltr, struct $thread * thrd);
    289289
    290290//-----------------------------------------------------------------------
     
    301301
    302302//-----------------------------------------------------------------------
    303 // remove thread from the ready queue of a cluster
    304 // returns bool if it wasn't found
    305 bool remove_head(struct cluster * cltr, struct $thread * thrd);
    306 
    307 //-----------------------------------------------------------------------
    308303// Increase the width of the ready queue (number of lanes) by 4
    309304void ready_queue_grow  (struct cluster * cltr);
  • libcfa/src/concurrency/ready_queue.cfa

    r57b3675 r9cc3a18  
    1919// #define USE_MPSC
    2020
     21#define USE_RELAXED_FIFO
     22// #define USE_WORK_STEALING
     23
    2124#include "bits/defs.hfa"
    2225#include "kernel_private.hfa"
     
    3841#endif
    3942
    40 #define BIAS 4
     43#if   defined(USE_RELAXED_FIFO)
     44        #define BIAS 4
     45        #define READYQ_SHARD_FACTOR 4
     46#elif defined(USE_WORK_STEALING)
     47        #define READYQ_SHARD_FACTOR 2
     48#else
     49        #error no scheduling strategy selected
     50#endif
     51
     52static inline [unsigned, bool] idx_from_r(unsigned r, unsigned preferred);
     53static inline struct $thread * try_pop(struct cluster * cltr, unsigned w);
     54static struct $thread * try_pop(struct cluster * cltr, unsigned i, unsigned j);
     55
    4156
    4257// returns the maximum number of processors the RWLock support
     
    191206
    192207//=======================================================================
    193 // Cforall Reqdy Queue used for scheduling
     208// Cforall Ready Queue used for scheduling
    194209//=======================================================================
    195210void ?{}(__ready_queue_t & this) with (this) {
    196211        lanes.data  = 0p;
     212        lanes.tscs  = 0p;
    197213        lanes.count = 0;
    198214}
     
    201217        verify( 1 == lanes.count );
    202218        free(lanes.data);
     219        free(lanes.tscs);
    203220}
    204221
    205222//-----------------------------------------------------------------------
    206 static inline [unsigned, bool] idx_from_r(unsigned r, unsigned preferred) {
    207         unsigned i;
    208         bool local;
    209         #if defined(BIAS)
    210                 unsigned rlow  = r % BIAS;
    211                 unsigned rhigh = r / BIAS;
    212                 if((0 != rlow) && preferred >= 0) {
    213                         // (BIAS - 1) out of BIAS chances
    214                         // Use perferred queues
    215                         i = preferred + (rhigh % 4);
    216                         local = true;
    217                 }
    218                 else {
    219                         // 1 out of BIAS chances
    220                         // Use all queues
    221                         i = rhigh;
    222                         local = false;
    223                 }
    224         #else
    225                 i = r;
    226                 local = false;
    227         #endif
    228         return [i, local];
    229 }
    230 
    231 //-----------------------------------------------------------------------
    232 __attribute__((hot)) bool push(struct cluster * cltr, struct $thread * thrd) with (cltr->ready_queue) {
     223__attribute__((hot)) void push(struct cluster * cltr, struct $thread * thrd) with (cltr->ready_queue) {
    233224        __cfadbg_print_safe(ready_queue, "Kernel : Pushing %p on cluster %p\n", thrd, cltr);
    234225
    235226        const bool external = (!kernelTLS().this_processor) || (cltr != kernelTLS().this_processor->cltr);
     227        /* paranoid */ verify(external || kernelTLS().this_processor->cltr_id < lanes.count );
    236228
    237229        // write timestamp
    238230        thrd->link.ts = rdtscl();
    239231
    240         bool first = false;
    241         __attribute__((unused)) bool local;
    242         __attribute__((unused)) int preferred;
    243         #if defined(BIAS)
    244                 /* paranoid */ verify(external || kernelTLS().this_processor->cltr_id < lanes.count );
    245                 preferred =
    246                         //*
    247                         external ? -1 : kernelTLS().this_processor->cltr_id;
    248                         /*/
    249                         thrd->link.preferred * 4;
    250                         //*/
    251         #endif
     232        bool local;
     233        int preferred = external ? -1 : kernelTLS().this_processor->cltr_id;
    252234
    253235        // Try to pick a lane and lock it
     
    255237        do {
    256238                // Pick the index of a lane
    257                 // unsigned r = __tls_rand();
    258239                unsigned r = __tls_rand_fwd();
    259240                [i, local] = idx_from_r(r, preferred);
     
    304285                }
    305286        #endif
    306 
    307         // return whether or not the list was empty before this push
    308         return first;
    309 }
    310 
    311 static struct $thread * try_pop(struct cluster * cltr, unsigned i, unsigned j);
    312 static struct $thread * try_pop(struct cluster * cltr, unsigned i);
     287}
    313288
    314289// Pop from the ready queue from a given cluster
    315290__attribute__((hot)) $thread * pop(struct cluster * cltr) with (cltr->ready_queue) {
    316291        /* paranoid */ verify( lanes.count > 0 );
     292        /* paranoid */ verify(kernelTLS().this_processor->cltr_id < lanes.count );
     293
    317294        unsigned count = __atomic_load_n( &lanes.count, __ATOMIC_RELAXED );
    318         int preferred;
    319         #if defined(BIAS)
    320                 /* paranoid */ verify(kernelTLS().this_processor->cltr_id < lanes.count );
    321                 preferred = kernelTLS().this_processor->cltr_id;
    322         #endif
     295        int preferred = kernelTLS().this_processor->cltr_id;
    323296
    324297
     
    326299        for(25) {
    327300                // Pick two lists at random
    328                 // unsigned ri = __tls_rand();
    329                 // unsigned rj = __tls_rand();
    330301                unsigned ri = __tls_rand_bck();
    331302                unsigned rj = __tls_rand_bck();
     
    348319                struct $thread * thrd = try_pop(cltr, i, j);
    349320                if(thrd) {
    350                         #if defined(BIAS) && !defined(__CFA_NO_STATISTICS__)
     321                        #if !defined(__CFA_NO_STATISTICS__)
    351322                                if( locali || localj ) __tls_stats()->ready.pick.pop.lsuccess++;
    352323                        #endif
     
    359330}
    360331
     332static void fix_times( struct cluster * cltr ) with( cltr->ready_queue ) {
     333        lanes.tscs = alloc(lanes.count, lanes.tscs`realloc);
     334        for(i; lanes.count) {
     335                lanes.tscs[i].tv = ts(lanes.data[i]);
     336        }
     337
     338}
     339
     340//=======================================================================
     341// Various Ready Queue utilities
     342//=======================================================================
     343// these function work the same or almost the same
     344// whether they are using work-stealing or relaxed fifo scheduling
     345
     346//-----------------------------------------------------------------------
     347// get index from random number with or without bias towards queues
     348static inline [unsigned, bool] idx_from_r(unsigned r, unsigned preferred) {
     349        unsigned i;
     350        bool local;
     351        unsigned rlow  = r % BIAS;
     352        unsigned rhigh = r / BIAS;
     353        if((0 != rlow) && preferred >= 0) {
     354                // (BIAS - 1) out of BIAS chances
     355                // Use perferred queues
     356                i = preferred + (rhigh % READYQ_SHARD_FACTOR);
     357                local = true;
     358        }
     359        else {
     360                // 1 out of BIAS chances
     361                // Use all queues
     362                i = rhigh;
     363                local = false;
     364        }
     365        return [i, local];
     366}
     367
     368//-----------------------------------------------------------------------
     369// try to pop from a lane given by index w
     370static inline struct $thread * try_pop(struct cluster * cltr, unsigned w) with (cltr->ready_queue) {
     371        // Get relevant elements locally
     372        __intrusive_lane_t & lane = lanes.data[w];
     373
     374        // If list looks empty retry
     375        if( is_empty(lane) ) return 0p;
     376
     377        // If we can't get the lock retry
     378        if( !__atomic_try_acquire(&lane.lock) ) return 0p;
     379
     380        // If list is empty, unlock and retry
     381        if( is_empty(lane) ) {
     382                __atomic_unlock(&lane.lock);
     383                return 0p;
     384        }
     385
     386        // Actually pop the list
     387        struct $thread * thrd;
     388        thrd = pop(lane);
     389
     390        /* paranoid */ verify(thrd);
     391        /* paranoid */ verify(lane.lock);
     392
     393        // Unlock and return
     394        __atomic_unlock(&lane.lock);
     395
     396        // Update statistics
     397        #if !defined(__CFA_NO_STATISTICS__)
     398                __tls_stats()->ready.pick.pop.success++;
     399        #endif
     400
     401        #if defined(USE_WORKSTEALING)
     402                lanes.times[i].val = thrd->links.ts;
     403        #endif
     404
     405        // return the popped thread
     406        return thrd;
     407}
     408
     409//-----------------------------------------------------------------------
     410// try to pop from any lanes making sure you don't miss any threads push
     411// before the start of the function
    361412__attribute__((hot)) struct $thread * pop_slow(struct cluster * cltr) with (cltr->ready_queue) {
    362413        /* paranoid */ verify( lanes.count > 0 );
     
    375426}
    376427
    377 
    378428//-----------------------------------------------------------------------
    379 // Given 2 indexes, pick the list with the oldest push an try to pop from it
    380 static inline struct $thread * try_pop(struct cluster * cltr, unsigned i, unsigned j) with (cltr->ready_queue) {
    381         #if !defined(__CFA_NO_STATISTICS__)
    382                 __tls_stats()->ready.pick.pop.attempt++;
    383         #endif
    384 
    385         // Pick the bet list
    386         int w = i;
    387         if( __builtin_expect(!is_empty(lanes.data[j]), true) ) {
    388                 w = (ts(lanes.data[i]) < ts(lanes.data[j])) ? i : j;
    389         }
    390 
    391         return try_pop(cltr, w);
    392 }
    393 
    394 static inline struct $thread * try_pop(struct cluster * cltr, unsigned w) with (cltr->ready_queue) {
    395         // Get relevant elements locally
    396         __intrusive_lane_t & lane = lanes.data[w];
    397 
    398         // If list looks empty retry
    399         if( is_empty(lane) ) return 0p;
    400 
    401         // If we can't get the lock retry
    402         if( !__atomic_try_acquire(&lane.lock) ) return 0p;
    403 
    404 
    405         // If list is empty, unlock and retry
    406         if( is_empty(lane) ) {
    407                 __atomic_unlock(&lane.lock);
    408                 return 0p;
    409         }
    410 
    411         // Actually pop the list
    412         struct $thread * thrd;
    413         thrd = pop(lane);
    414 
    415         /* paranoid */ verify(thrd);
    416         /* paranoid */ verify(lane.lock);
    417 
    418         // Unlock and return
    419         __atomic_unlock(&lane.lock);
    420 
    421         // Update statistics
    422         #if !defined(__CFA_NO_STATISTICS__)
    423                 __tls_stats()->ready.pick.pop.success++;
    424         #endif
    425 
    426         // Update the thread bias
    427         thrd->link.preferred = w / 4;
    428 
    429         // return the popped thread
    430         return thrd;
    431 }
    432 //-----------------------------------------------------------------------
    433 
    434 bool remove_head(struct cluster * cltr, struct $thread * thrd) with (cltr->ready_queue) {
    435         for(i; lanes.count) {
    436                 __intrusive_lane_t & lane = lanes.data[i];
    437 
    438                 bool removed = false;
    439 
    440                 __atomic_acquire(&lane.lock);
    441                         if(head(lane)->link.next == thrd) {
    442                                 $thread * pthrd;
    443                                 pthrd = pop(lane);
    444 
    445                                 /* paranoid */ verify( pthrd == thrd );
    446 
    447                                 removed = true;
    448                         }
    449                 __atomic_unlock(&lane.lock);
    450 
    451                 if( removed ) return true;
    452         }
    453         return false;
    454 }
    455 
    456 //-----------------------------------------------------------------------
    457 
     429// Check that all the intrusive queues in the data structure are still consistent
    458430static void check( __ready_queue_t & q ) with (q) {
    459431        #if defined(__CFA_WITH_VERIFY__) && !defined(USE_MPSC)
     
    480452}
    481453
     454//-----------------------------------------------------------------------
     455// Given 2 indexes, pick the list with the oldest push an try to pop from it
     456static inline struct $thread * try_pop(struct cluster * cltr, unsigned i, unsigned j) with (cltr->ready_queue) {
     457        #if !defined(__CFA_NO_STATISTICS__)
     458                __tls_stats()->ready.pick.pop.attempt++;
     459        #endif
     460
     461        // Pick the bet list
     462        int w = i;
     463        if( __builtin_expect(!is_empty(lanes.data[j]), true) ) {
     464                w = (ts(lanes.data[i]) < ts(lanes.data[j])) ? i : j;
     465        }
     466
     467        return try_pop(cltr, w);
     468}
     469
    482470// Call this function of the intrusive list was moved using memcpy
    483471// fixes the list so that the pointers back to anchors aren't left dangling
     
    499487}
    500488
    501 static void assign_list(unsigned & value, const int inc, dlist(processor, processor) & list, unsigned count) {
     489static void assign_list(unsigned & value, dlist(processor, processor) & list, unsigned count) {
    502490        processor * it = &list`first;
    503491        for(unsigned i = 0; i < count; i++) {
    504492                /* paranoid */ verifyf( it, "Unexpected null iterator, at index %u of %u\n", i, count);
    505493                it->cltr_id = value;
    506                 value += inc;
     494                value += READYQ_SHARD_FACTOR;
    507495                it = &(*it)`next;
    508496        }
    509497}
    510498
    511 static void reassign_cltr_id(struct cluster * cltr, const int inc) {
     499static void reassign_cltr_id(struct cluster * cltr) {
    512500        unsigned preferred = 0;
    513         assign_list(preferred, inc, cltr->procs.actives, cltr->procs.total - cltr->procs.idle);
    514         assign_list(preferred, inc, cltr->procs.idles  , cltr->procs.idle );
     501        assign_list(preferred, cltr->procs.actives, cltr->procs.total - cltr->procs.idle);
     502        assign_list(preferred, cltr->procs.idles  , cltr->procs.idle );
    515503}
    516504
     
    531519                // Make sure we always have atleast 1 list
    532520                if(target >= 2) {
    533                         ncount = target * 4;
     521                        ncount = target * READYQ_SHARD_FACTOR;
    534522                } else {
    535523                        ncount = 1;
     
    553541        }
    554542
    555         reassign_cltr_id(cltr, 4);
     543        fix_times(cltr);
     544
     545        reassign_cltr_id(cltr);
    556546
    557547        // Make sure that everything is consistent
     
    579569                // Find new count
    580570                // Make sure we always have atleast 1 list
    581                 lanes.count = target >= 2 ? target * 4: 1;
     571                lanes.count = target >= 2 ? target * READYQ_SHARD_FACTOR: 1;
    582572                /* paranoid */ verify( ocount >= lanes.count );
    583                 /* paranoid */ verify( lanes.count == target * 4 || target < 2 );
     573                /* paranoid */ verify( lanes.count == target * READYQ_SHARD_FACTOR || target < 2 );
    584574
    585575                // for printing count the number of displaced threads
     
    626616        }
    627617
    628         reassign_cltr_id(cltr, 4);
     618        fix_times(cltr);
     619
     620        reassign_cltr_id(cltr);
    629621
    630622        // Make sure that everything is consistent
  • libcfa/src/concurrency/ready_subqueue.hfa

    r57b3675 r9cc3a18  
    246246        #endif
    247247}
     248
     249// Aligned timestamps which are used by the relaxed ready queue
     250struct __attribute__((aligned(128))) __timestamp_t {
     251        volatile unsigned long long tv;
     252};
     253
     254void  ?{}(__timestamp_t & this) { this.tv = 0; }
     255void ^?{}(__timestamp_t & this) {}
  • libcfa/src/concurrency/thread.cfa

    r57b3675 r9cc3a18  
    3939        link.next = 0p;
    4040        link.prev = 0p;
    41         link.preferred = -1;
    4241        #if defined( __CFA_WITH_VERIFY__ )
    4342                canary = 0x0D15EA5E0D15EA5Ep;
Note: See TracChangeset for help on using the changeset viewer.