Ignore:
Timestamp:
Jan 19, 2026, 11:38:54 AM (2 weeks ago)
Author:
Michael Brooks <mlbrooks@…>
Branches:
master
Children:
79a8c2a
Parents:
5a95560
Message:

Various changes motivated by improving CFA score on len-1 queues.

No such CFA score improvement achieved. Each change helped only on stripped-down, "try to isolate an important factor" tests. Generally, the changes are benign refactorings. (Results substantiating "don't hurt" are forthcoming.)

Libcfa changes are

  • move a read action from between the memory breaks to before them
  • make the memory breaks conditionally excluded (default included, as before)

Harness changes are

  • add width, a compiled-in number of lists to use in round-robin order; defaults to 1, which is what was happening all along
  • make the analysis scripts tolerate (so far, ignore) the width column
  • rename CLI arg NumNodes to Length (now NumNodes is Length * Width)
  • factor core testing loops into helper function runtest
  • switch to signal-based termination (and add uC++ work-around)
  • put "iterator threading" into ITERS_SAVE, joining preexisting "save iter into elem's self ref"; make iterator threading conditional on iterators-active
  • disable insertion loop counter and obs_*-variable declarations (and thus writes) when observation disabled
  • generalize observation to work on multiple lists
  • make observation and assertion-check-enabled mode work on stripped CFA list implementations like tagging-disabled
  • through this observation, ensure correctness of multi-list versions
File:
1 edited

Legend:

Unmodified
Added
Removed
  • libcfa/src/collections/list2.hfa

    r5a95560 r9d3dc40  
    108108    #define ORIGIN_TAG_NEQ(v1, v2) 0
    109109
     110    #define TAGSONLY(...)
     111    #define NOTAGS(...) __VA_ARGS__
     112
    110113#else // Normal
    111114
     
    146149    )
    147150
     151    #define TAGSONLY(...) __VA_ARGS__
     152    #define NOTAGS(...)
     153
    148154#endif
    149155
     
    270276}
    271277
     278// Compile-time memory (cmem) barrier
     279// Prevents the optimizer from reordering instructions across it
     280// Originally included for correctness, though a broken state is not known to be reproducible.
     281// Found to have a critical impact on performance:
     282// - in the positions given by default: generally optimal
     283// - absent: sometimes much slower, depending on the test harness
     284// - in positions (that my be influenced by a principle but) that are arbitrary wrt microarchitecture: typically, much slower
     285#ifdef __EXPERIMENTAL_DISABLE_CMEM_BARRIER__
     286// upon request, disable cmem barriers
     287#define MAYBE_CMEM_BARRIER
     288#else
     289// by default, enable cmem barriers
     290#define MAYBE_CMEM_BARRIER asm( "" : : : "memory" )
     291#endif
     292
     293// Insert read (location)
     294// One of the read operations that occurs during an insert operation was found to be performace-critical under certain harnesses.
     295// Arguably, the position should not matter if cmem barriers are off.  Treating the factors as independent allows for measuring this idea.
     296#ifdef __EXPERIMENTAL_DELAY_INSERT_READ__
     297// upon request: do the read late (between the cmem barriers); this location is where the read was originally found when this insert read first became a performance-perterbing hypothesis
     298#define MAYBE_INSERT_READ_EARLY(...)
     299#define MAYBE_INSERT_READ_LATE(...) __VA_ARGS__
     300#else
     301// by default: do the read early (before the first cmem barrier); better performance has been seen here
     302#define MAYBE_INSERT_READ_EARLY(...) __VA_ARGS__
     303#define MAYBE_INSERT_READ_LATE(...)
     304#endif
     305
    272306forall( tE &, tLinks & | embedded( tE, tLinks, dlink(tE) ) ) {
    273307
     
    283317        dlink(tE) & linkToInsert = to_insert`inner;
    284318      NOLOOSE(
     319       TAGSONLY(
    285320                verify(ORIGIN_TAG_QUERY((size_t)linkToInsert.prev));
    286321                verify(ORIGIN_TAG_QUERY((size_t)linkToInsert.next));
     322       )
    287323                verify(ORIGIN_TAG_CLEAR((size_t)linkToInsert.prev) == (size_t)&linkToInsert);
    288324                verify(ORIGIN_TAG_CLEAR((size_t)linkToInsert.next) == (size_t)&linkToInsert);
    289325      )
    290326        dlink(tE) & list_pos_links = list_pos_real`inner;
    291         asm( "" : : : "memory" );
     327      MAYBE_INSERT_READ_EARLY(
     328        dlink(tE) & afterLinks = * (dlink(tE) *) ORIGIN_TAG_CLEAR( (size_t) list_pos_links.next );
     329      )
     330        MAYBE_CMEM_BARRIER;
    292331        size_t list_pos_links_num = (size_t)(& list_pos_links);
    293332        size_t to_insert_prev_num = ORIGIN_TAG_ASGN(list_pos_links_num, list_pos_tag);
     
    295334                linkToInsert.prev = to_insert_prev;
    296335                linkToInsert.next = list_pos_links.next;
     336      MAYBE_INSERT_READ_LATE(
    297337        dlink(tE) & afterLinks = * (dlink(tE) *) ORIGIN_TAG_CLEAR( (size_t) list_pos_links.next );
     338      )
    298339        size_t afterLinks_prev_tag = ORIGIN_TAG_QUERY((size_t)afterLinks.prev);
    299340        size_t linkToInsert_num = (size_t)(& linkToInsert);
     
    301342        afterLinks.prev = (dlink(tE)*)(afterLinks_prev_num);
    302343                list_pos_links.next = &linkToInsert;
    303         asm( "" : : : "memory" );
     344        MAYBE_CMEM_BARRIER;
    304345        }
    305346
     
    315356        dlink(tE) & linkToInsert = to_insert`inner;
    316357      NOLOOSE(
     358       TAGSONLY(
    317359                verify(ORIGIN_TAG_QUERY((size_t)linkToInsert.prev));
    318360                verify(ORIGIN_TAG_QUERY((size_t)linkToInsert.next));
     361       )
    319362                verify(ORIGIN_TAG_CLEAR((size_t)linkToInsert.prev) == (size_t)&linkToInsert);
    320363                verify(ORIGIN_TAG_CLEAR((size_t)linkToInsert.next) == (size_t)&linkToInsert);
    321364      )
    322365        dlink(tE) & list_pos_links = list_pos_real`inner;
    323         asm( "" : : : "memory" );
     366      MAYBE_INSERT_READ_EARLY(
     367        dlink(tE) & beforeLinks = * (dlink(tE) *) ORIGIN_TAG_CLEAR( (size_t) list_pos_links.prev );
     368      )
     369        MAYBE_CMEM_BARRIER;
    324370        size_t list_pos_links_num = (size_t)(& list_pos_links);
    325371        size_t to_insert_next_num = ORIGIN_TAG_ASGN(list_pos_links_num, list_pos_tag);
     
    327373                linkToInsert.next = to_insert_next;
    328374                linkToInsert.prev = list_pos_links.prev;
     375      MAYBE_INSERT_READ_LATE(
    329376        dlink(tE) & beforeLinks = * (dlink(tE) *) ORIGIN_TAG_CLEAR( (size_t) list_pos_links.prev );
     377      )
    330378        size_t beforeLinks_next_tag = ORIGIN_TAG_QUERY((size_t)beforeLinks.next);
    331379        size_t linkToInsert_num = (size_t)(& linkToInsert);
     
    333381        beforeLinks.next = (dlink(tE)*)(beforeLinks_next_num);
    334382                list_pos_links.prev = &linkToInsert;
    335         asm( "" : : : "memory" );
     383        MAYBE_CMEM_BARRIER;
    336384        }
    337385
     
    355403
    356404      NOLOOSE(
    357         asm( "" : : : "memory" );
     405        MAYBE_CMEM_BARRIER;
    358406        size_t list_pos_links_num = (size_t) &list_pos_links;
    359407        size_t list_pos_links_tagged_num = ORIGIN_TAG_ENABL( list_pos_links_num );
    360408                list_pos_links.next = list_pos_links.prev = (dlink(tE)*) list_pos_links_tagged_num;
    361         asm( "" : : : "memory" );
     409        MAYBE_CMEM_BARRIER;
    362410      )
    363411        return list_pos;
     
    482530    static inline void insert_first( dlist(tE, tLinks) &lst, tE & e ) {
    483531        dlink(tE) & linkToInsert = e`inner;
     532      NOLOOSE(
     533       TAGSONLY(
    484534                verify(ORIGIN_TAG_QUERY((size_t)linkToInsert.prev));
    485535                verify(ORIGIN_TAG_QUERY((size_t)linkToInsert.next));
     536       )
    486537                verify(ORIGIN_TAG_CLEAR((size_t)linkToInsert.prev) == (size_t)&linkToInsert);
    487538                verify(ORIGIN_TAG_CLEAR((size_t)linkToInsert.next) == (size_t)&linkToInsert);
     539      )
    488540        dlink(tE) & list_pos_links = lst;
    489         asm( "" : : : "memory" );
     541      MAYBE_INSERT_READ_EARLY(
     542        dlink(tE) & afterLinks = * (dlink(tE) *) ORIGIN_TAG_CLEAR( (size_t) list_pos_links.next );
     543      )
     544        MAYBE_CMEM_BARRIER;
    490545        size_t list_pos_links_num = (size_t)(& list_pos_links);
    491546        size_t to_insert_prev_num = ORIGIN_TAG_ENABL(list_pos_links_num);
     
    493548                linkToInsert.prev = to_insert_prev;
    494549                linkToInsert.next = list_pos_links.next;
     550      MAYBE_INSERT_READ_LATE(
    495551        dlink(tE) & afterLinks = * (dlink(tE) *) ORIGIN_TAG_CLEAR( (size_t) list_pos_links.next );
     552      )
    496553        size_t linkToInsert_num = (size_t)(& linkToInsert);
    497554        size_t afterLinks_prev_num = linkToInsert_num;
    498555        afterLinks.prev = (dlink(tE)*)(afterLinks_prev_num);
    499556                list_pos_links.next = &linkToInsert;
    500         asm( "" : : : "memory" );
     557        MAYBE_CMEM_BARRIER;
    501558    }
    502559
    503560    static inline void insert_last( dlist(tE, tLinks) &lst, tE & e ) {
    504         // insert_before(iter(lst), e);
    505561        dlink(tE) & linkToInsert = e`inner;
     562      NOLOOSE(
     563       TAGSONLY(
    506564                verify(ORIGIN_TAG_QUERY((size_t)linkToInsert.next));
    507565                verify(ORIGIN_TAG_QUERY((size_t)linkToInsert.prev));
     566       )
    508567                verify(ORIGIN_TAG_CLEAR((size_t)linkToInsert.next) == (size_t)&linkToInsert);
    509568                verify(ORIGIN_TAG_CLEAR((size_t)linkToInsert.prev) == (size_t)&linkToInsert);
     569      )
    510570        dlink(tE) & list_pos_links = lst;
    511         asm( "" : : : "memory" );
     571      MAYBE_INSERT_READ_EARLY(
     572        dlink(tE) & beforeLinks = * (dlink(tE) *) ORIGIN_TAG_CLEAR( (size_t) list_pos_links.prev );
     573      )
     574        MAYBE_CMEM_BARRIER;
    512575        size_t list_pos_links_num = (size_t)(& list_pos_links);
    513576        size_t to_insert_next_num = ORIGIN_TAG_ENABL(list_pos_links_num);
     
    515578                linkToInsert.next = to_insert_next;
    516579                linkToInsert.prev = list_pos_links.prev;
     580      MAYBE_INSERT_READ_LATE(
    517581        dlink(tE) & beforeLinks = * (dlink(tE) *) ORIGIN_TAG_CLEAR( (size_t) list_pos_links.prev );
     582      )
    518583        size_t linkToInsert_num = (size_t)(& linkToInsert);
    519584        size_t beforeLinks_next_num = linkToInsert_num;
    520585        beforeLinks.next = (dlink(tE)*)(beforeLinks_next_num);
    521586                list_pos_links.prev = &linkToInsert;
    522         asm( "" : : : "memory" );
     587        MAYBE_CMEM_BARRIER;
    523588    }
    524589
     
    526591                verify (&lst != 0p);
    527592        dlink(tE) & list_links = lst;
    528         verify (! ORIGIN_TAG_QUERY( (size_t) (& list_links) ) );
    529593        // call is valid on empty list; when so, list_links.next and after_links.prev have otags set
    530594
     
    541605        after_links.prev = (dlink(tE) *) after_links_prev_rslt;
    542606
    543         asm( "" : : : "memory" );
     607        MAYBE_CMEM_BARRIER;
    544608        size_t list_pos_links_num = (size_t) &fst_links;
    545609        size_t list_pos_links_tagged_num = ORIGIN_TAG_ENABL( list_pos_links_num );
    546610                fst_links.next = fst_links.prev = (dlink(tE)*) list_pos_links_tagged_num;
    547         asm( "" : : : "memory" );
     611        MAYBE_CMEM_BARRIER;
    548612
    549613        tytagref( tLinks, dlink(tE) ) retExt = { fst_links };
     
    568632        before_links.next = (dlink(tE) *) before_links_next_rslt;
    569633
    570         asm( "" : : : "memory" );
     634        MAYBE_CMEM_BARRIER;
    571635        size_t list_pos_links_num = (size_t) &last_links;
    572636        size_t list_pos_links_tagged_num = ORIGIN_TAG_ENABL( list_pos_links_num );
    573637                last_links.prev = last_links.next = (dlink(tE)*) list_pos_links_tagged_num;
    574         asm( "" : : : "memory" );
     638        MAYBE_CMEM_BARRIER;
    575639
    576640        tytagref( tLinks, dlink(tE) ) lpLnkTagged = { last_links };
     
    630694
    631695}
    632 
Note: See TracChangeset for help on using the changeset viewer.