Changeset 9d3dc40 for libcfa/src/collections/list2.hfa
- Timestamp:
- Jan 19, 2026, 11:38:54 AM (2 weeks ago)
- Branches:
- master
- Children:
- 79a8c2a
- Parents:
- 5a95560
- File:
-
- 1 edited
-
libcfa/src/collections/list2.hfa (modified) (17 diffs)
Legend:
- Unmodified
- Added
- Removed
-
libcfa/src/collections/list2.hfa
r5a95560 r9d3dc40 108 108 #define ORIGIN_TAG_NEQ(v1, v2) 0 109 109 110 #define TAGSONLY(...) 111 #define NOTAGS(...) __VA_ARGS__ 112 110 113 #else // Normal 111 114 … … 146 149 ) 147 150 151 #define TAGSONLY(...) __VA_ARGS__ 152 #define NOTAGS(...) 153 148 154 #endif 149 155 … … 270 276 } 271 277 278 // Compile-time memory (cmem) barrier 279 // Prevents the optimizer from reordering instructions across it 280 // Originally included for correctness, though a broken state is not known to be reproducible. 281 // Found to have a critical impact on performance: 282 // - in the positions given by default: generally optimal 283 // - absent: sometimes much slower, depending on the test harness 284 // - in positions (that my be influenced by a principle but) that are arbitrary wrt microarchitecture: typically, much slower 285 #ifdef __EXPERIMENTAL_DISABLE_CMEM_BARRIER__ 286 // upon request, disable cmem barriers 287 #define MAYBE_CMEM_BARRIER 288 #else 289 // by default, enable cmem barriers 290 #define MAYBE_CMEM_BARRIER asm( "" : : : "memory" ) 291 #endif 292 293 // Insert read (location) 294 // One of the read operations that occurs during an insert operation was found to be performace-critical under certain harnesses. 295 // Arguably, the position should not matter if cmem barriers are off. Treating the factors as independent allows for measuring this idea. 296 #ifdef __EXPERIMENTAL_DELAY_INSERT_READ__ 297 // upon request: do the read late (between the cmem barriers); this location is where the read was originally found when this insert read first became a performance-perterbing hypothesis 298 #define MAYBE_INSERT_READ_EARLY(...) 299 #define MAYBE_INSERT_READ_LATE(...) __VA_ARGS__ 300 #else 301 // by default: do the read early (before the first cmem barrier); better performance has been seen here 302 #define MAYBE_INSERT_READ_EARLY(...) __VA_ARGS__ 303 #define MAYBE_INSERT_READ_LATE(...) 304 #endif 305 272 306 forall( tE &, tLinks & | embedded( tE, tLinks, dlink(tE) ) ) { 273 307 … … 283 317 dlink(tE) & linkToInsert = to_insert`inner; 284 318 NOLOOSE( 319 TAGSONLY( 285 320 verify(ORIGIN_TAG_QUERY((size_t)linkToInsert.prev)); 286 321 verify(ORIGIN_TAG_QUERY((size_t)linkToInsert.next)); 322 ) 287 323 verify(ORIGIN_TAG_CLEAR((size_t)linkToInsert.prev) == (size_t)&linkToInsert); 288 324 verify(ORIGIN_TAG_CLEAR((size_t)linkToInsert.next) == (size_t)&linkToInsert); 289 325 ) 290 326 dlink(tE) & list_pos_links = list_pos_real`inner; 291 asm( "" : : : "memory" ); 327 MAYBE_INSERT_READ_EARLY( 328 dlink(tE) & afterLinks = * (dlink(tE) *) ORIGIN_TAG_CLEAR( (size_t) list_pos_links.next ); 329 ) 330 MAYBE_CMEM_BARRIER; 292 331 size_t list_pos_links_num = (size_t)(& list_pos_links); 293 332 size_t to_insert_prev_num = ORIGIN_TAG_ASGN(list_pos_links_num, list_pos_tag); … … 295 334 linkToInsert.prev = to_insert_prev; 296 335 linkToInsert.next = list_pos_links.next; 336 MAYBE_INSERT_READ_LATE( 297 337 dlink(tE) & afterLinks = * (dlink(tE) *) ORIGIN_TAG_CLEAR( (size_t) list_pos_links.next ); 338 ) 298 339 size_t afterLinks_prev_tag = ORIGIN_TAG_QUERY((size_t)afterLinks.prev); 299 340 size_t linkToInsert_num = (size_t)(& linkToInsert); … … 301 342 afterLinks.prev = (dlink(tE)*)(afterLinks_prev_num); 302 343 list_pos_links.next = &linkToInsert; 303 asm( "" : : : "memory" );344 MAYBE_CMEM_BARRIER; 304 345 } 305 346 … … 315 356 dlink(tE) & linkToInsert = to_insert`inner; 316 357 NOLOOSE( 358 TAGSONLY( 317 359 verify(ORIGIN_TAG_QUERY((size_t)linkToInsert.prev)); 318 360 verify(ORIGIN_TAG_QUERY((size_t)linkToInsert.next)); 361 ) 319 362 verify(ORIGIN_TAG_CLEAR((size_t)linkToInsert.prev) == (size_t)&linkToInsert); 320 363 verify(ORIGIN_TAG_CLEAR((size_t)linkToInsert.next) == (size_t)&linkToInsert); 321 364 ) 322 365 dlink(tE) & list_pos_links = list_pos_real`inner; 323 asm( "" : : : "memory" ); 366 MAYBE_INSERT_READ_EARLY( 367 dlink(tE) & beforeLinks = * (dlink(tE) *) ORIGIN_TAG_CLEAR( (size_t) list_pos_links.prev ); 368 ) 369 MAYBE_CMEM_BARRIER; 324 370 size_t list_pos_links_num = (size_t)(& list_pos_links); 325 371 size_t to_insert_next_num = ORIGIN_TAG_ASGN(list_pos_links_num, list_pos_tag); … … 327 373 linkToInsert.next = to_insert_next; 328 374 linkToInsert.prev = list_pos_links.prev; 375 MAYBE_INSERT_READ_LATE( 329 376 dlink(tE) & beforeLinks = * (dlink(tE) *) ORIGIN_TAG_CLEAR( (size_t) list_pos_links.prev ); 377 ) 330 378 size_t beforeLinks_next_tag = ORIGIN_TAG_QUERY((size_t)beforeLinks.next); 331 379 size_t linkToInsert_num = (size_t)(& linkToInsert); … … 333 381 beforeLinks.next = (dlink(tE)*)(beforeLinks_next_num); 334 382 list_pos_links.prev = &linkToInsert; 335 asm( "" : : : "memory" );383 MAYBE_CMEM_BARRIER; 336 384 } 337 385 … … 355 403 356 404 NOLOOSE( 357 asm( "" : : : "memory" );405 MAYBE_CMEM_BARRIER; 358 406 size_t list_pos_links_num = (size_t) &list_pos_links; 359 407 size_t list_pos_links_tagged_num = ORIGIN_TAG_ENABL( list_pos_links_num ); 360 408 list_pos_links.next = list_pos_links.prev = (dlink(tE)*) list_pos_links_tagged_num; 361 asm( "" : : : "memory" );409 MAYBE_CMEM_BARRIER; 362 410 ) 363 411 return list_pos; … … 482 530 static inline void insert_first( dlist(tE, tLinks) &lst, tE & e ) { 483 531 dlink(tE) & linkToInsert = e`inner; 532 NOLOOSE( 533 TAGSONLY( 484 534 verify(ORIGIN_TAG_QUERY((size_t)linkToInsert.prev)); 485 535 verify(ORIGIN_TAG_QUERY((size_t)linkToInsert.next)); 536 ) 486 537 verify(ORIGIN_TAG_CLEAR((size_t)linkToInsert.prev) == (size_t)&linkToInsert); 487 538 verify(ORIGIN_TAG_CLEAR((size_t)linkToInsert.next) == (size_t)&linkToInsert); 539 ) 488 540 dlink(tE) & list_pos_links = lst; 489 asm( "" : : : "memory" ); 541 MAYBE_INSERT_READ_EARLY( 542 dlink(tE) & afterLinks = * (dlink(tE) *) ORIGIN_TAG_CLEAR( (size_t) list_pos_links.next ); 543 ) 544 MAYBE_CMEM_BARRIER; 490 545 size_t list_pos_links_num = (size_t)(& list_pos_links); 491 546 size_t to_insert_prev_num = ORIGIN_TAG_ENABL(list_pos_links_num); … … 493 548 linkToInsert.prev = to_insert_prev; 494 549 linkToInsert.next = list_pos_links.next; 550 MAYBE_INSERT_READ_LATE( 495 551 dlink(tE) & afterLinks = * (dlink(tE) *) ORIGIN_TAG_CLEAR( (size_t) list_pos_links.next ); 552 ) 496 553 size_t linkToInsert_num = (size_t)(& linkToInsert); 497 554 size_t afterLinks_prev_num = linkToInsert_num; 498 555 afterLinks.prev = (dlink(tE)*)(afterLinks_prev_num); 499 556 list_pos_links.next = &linkToInsert; 500 asm( "" : : : "memory" );557 MAYBE_CMEM_BARRIER; 501 558 } 502 559 503 560 static inline void insert_last( dlist(tE, tLinks) &lst, tE & e ) { 504 // insert_before(iter(lst), e);505 561 dlink(tE) & linkToInsert = e`inner; 562 NOLOOSE( 563 TAGSONLY( 506 564 verify(ORIGIN_TAG_QUERY((size_t)linkToInsert.next)); 507 565 verify(ORIGIN_TAG_QUERY((size_t)linkToInsert.prev)); 566 ) 508 567 verify(ORIGIN_TAG_CLEAR((size_t)linkToInsert.next) == (size_t)&linkToInsert); 509 568 verify(ORIGIN_TAG_CLEAR((size_t)linkToInsert.prev) == (size_t)&linkToInsert); 569 ) 510 570 dlink(tE) & list_pos_links = lst; 511 asm( "" : : : "memory" ); 571 MAYBE_INSERT_READ_EARLY( 572 dlink(tE) & beforeLinks = * (dlink(tE) *) ORIGIN_TAG_CLEAR( (size_t) list_pos_links.prev ); 573 ) 574 MAYBE_CMEM_BARRIER; 512 575 size_t list_pos_links_num = (size_t)(& list_pos_links); 513 576 size_t to_insert_next_num = ORIGIN_TAG_ENABL(list_pos_links_num); … … 515 578 linkToInsert.next = to_insert_next; 516 579 linkToInsert.prev = list_pos_links.prev; 580 MAYBE_INSERT_READ_LATE( 517 581 dlink(tE) & beforeLinks = * (dlink(tE) *) ORIGIN_TAG_CLEAR( (size_t) list_pos_links.prev ); 582 ) 518 583 size_t linkToInsert_num = (size_t)(& linkToInsert); 519 584 size_t beforeLinks_next_num = linkToInsert_num; 520 585 beforeLinks.next = (dlink(tE)*)(beforeLinks_next_num); 521 586 list_pos_links.prev = &linkToInsert; 522 asm( "" : : : "memory" );587 MAYBE_CMEM_BARRIER; 523 588 } 524 589 … … 526 591 verify (&lst != 0p); 527 592 dlink(tE) & list_links = lst; 528 verify (! ORIGIN_TAG_QUERY( (size_t) (& list_links) ) );529 593 // call is valid on empty list; when so, list_links.next and after_links.prev have otags set 530 594 … … 541 605 after_links.prev = (dlink(tE) *) after_links_prev_rslt; 542 606 543 asm( "" : : : "memory" );607 MAYBE_CMEM_BARRIER; 544 608 size_t list_pos_links_num = (size_t) &fst_links; 545 609 size_t list_pos_links_tagged_num = ORIGIN_TAG_ENABL( list_pos_links_num ); 546 610 fst_links.next = fst_links.prev = (dlink(tE)*) list_pos_links_tagged_num; 547 asm( "" : : : "memory" );611 MAYBE_CMEM_BARRIER; 548 612 549 613 tytagref( tLinks, dlink(tE) ) retExt = { fst_links }; … … 568 632 before_links.next = (dlink(tE) *) before_links_next_rslt; 569 633 570 asm( "" : : : "memory" );634 MAYBE_CMEM_BARRIER; 571 635 size_t list_pos_links_num = (size_t) &last_links; 572 636 size_t list_pos_links_tagged_num = ORIGIN_TAG_ENABL( list_pos_links_num ); 573 637 last_links.prev = last_links.next = (dlink(tE)*) list_pos_links_tagged_num; 574 asm( "" : : : "memory" );638 MAYBE_CMEM_BARRIER; 575 639 576 640 tytagref( tLinks, dlink(tE) ) lpLnkTagged = { last_links }; … … 630 694 631 695 } 632
Note:
See TracChangeset
for help on using the changeset viewer.