#include <time.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include <assert.h>

#ifdef DISABLE_OBSERVATION
#include "proglang.h"
#define bobs_init(...)
#else
#include "observation.h"
#endif

#ifdef HUGE_USER_ITEMS
  #define UDATA_T float
  #define UDATA_LEN 17
#endif

// canonical insertion order means the order of traversing B_UserItem.{succ|pred}_pos references
// outer v middle action: as at (just after) inserting or (just before) removing the element in question...
// - outer-action node: one that is at the front or back of the list
// - middle-action node: otherwise
// outer action is
// - typical/default: middle action happens only when InterleaveFrac > 0, and only on this fraction
// - logically first: a prefix of items, in canonical insertion order, gets outer action; a suffix gets middle action
// regardless of interleaving, canonical insertion order is the same as linked-list element order in the final built-up state, up to polarity
// nontrivial interleaving makes the time oder of insertion/removal _operation_invocations_  different from canonical
typedef enum InterleaveAction { OuterActn = 0, InnerActn = 1 } InterleaveAction;
enum { N_INTERLEAVE_ACTION = 2 } ;

typedef
struct __attribute__(( aligned(64) )) B_UserItem 
    BFX_EXTRUSION_DECL(B_UserItem)
{
    BFX_INTRUSION(B_UserItem)
  #ifndef DISABLE_SHUFFLING_INDIRECTION
    size_t succ_pos;
    size_t pred_pos;
  #endif
    size_t self_ord;
  #ifndef DISABLE_INTERLEAVING
    InterleaveAction later_flip;
  #endif
  #ifndef DISABLE_ITERS
    BFX_LISTED_ELEM_T(struct B_UserItem) selfListed;
  #endif
  #ifdef HUGE_USER_ITEMS
    UDATA_T udata[UDATA_T] __attribute__((unused));
  #endif
}
B_UserItem;

BFX_EXTRUSION_FOLLOWUP(B_UserItem)

#if defined(NDEBUG) || (defined(__cforall) && !defined(__CFA_DEBUG__))
    enum { DefaultNumNodes = 1000, DefaultExperimentDurSec = 1, DefaultCheckDonePeriod = 1000, DefaultExperimentDurOpCount = -1, DefaultSeed = 5 };
    const double DefaultInterleaveFrac = 0.0;
    #define TRACE(tp)
#else 
    enum { DefaultNumNodes = 10, DefaultExperimentDurSec = 1, DefaultCheckDonePeriod = 2, DefaultExperimentDurOpCount = 20, DefaultSeed = 5 };
    const double DefaultInterleaveFrac = 0.5;
    static const char * tp_filter
    // = "";
    // = "+ea";
        = "*";
    #define TRACE(tp) \
        if (strcmp("*", tp_filter) == 0 || strchr(tp_filter, tp)) { \
            printf("%c", tp); \
            bobs_report(); \
        }
#endif

static B_UserItem *ui = NULL;

static BFX_LISTED_ELEM_T(B_UserItem) observedItem;

static BFX_LIST_HEAD_T(B_UserItem) lst;

B_UserItem * seekUi( size_t i ) {
  #ifdef DISABLE_SHUFFLING_INDIRECTION
    return & ui[i];
  #else
    B_UserItem * cur = & ui[0];
    for (; i > 0; i--) cur = & ui[ cur->succ_pos ];
    return cur;
  #endif
}

#ifdef DISABLE_OBSERVATION
MAYBE_EXTERN_C (
    void bobs_seek(size_t i) {}
    void bobs_moveNext() {}
    void bobs_movePrev() {}
    bool bobs_hasCurrent() { return 0; }
    void * bobs_getCurrentLoc() { return NULL; }
    size_t bobs_getCurrentVal() { return 0; }
    // enum bobs_op_movement_t bobs_op_movement = OP_MOVEMENT;
    // enum bobs_op_polarity_t bobs_op_polarity = OP_POLARITY;
)

#else

MAYBE_EXTERN_C (

    volatile size_t       bobs_ops_completed      = 0;
    volatile size_t bobs_prog_inserting     = 0;
    volatile size_t bobs_prog_removing      = 0;
    volatile size_t bobs_prog_removing_end  = 0;
    volatile size_t bobs_prog_rollover_flag = 0;
    //                    bobs_prog_rem_pos         (defined after BOP_REMPROGEND_IS_REMNO_BASED)

    void bobs_seek(size_t i) {
      #ifndef DISABLE_ITERS
        observedItem = seekUi(i)->selfListed;
      #endif
    }

    void bobs_moveNext() {
        observedItem = BFX_GET_AFTER(B_UserItem, lst, observedItem);
    }

    void bobs_movePrev() {
        observedItem = BFX_GET_BEFORE(B_UserItem, lst, observedItem);
    }

    bool bobs_hasCurrent() {
        return BFX_IS_VALID_POS(B_UserItem, lst, observedItem);
    }

    void * bobs_getCurrentLoc() {
        return BFX_DEREF_POS(B_UserItem, lst, observedItem);
    }
    size_t bobs_getCurrentVal() {
        B_UserItem * curUI = BFX_DEREF_POS(B_UserItem, lst, observedItem);
        return curUI->self_ord;
    }

    enum bobs_op_movement_t bobs_op_movement = OP_MOVEMENT;
    enum bobs_op_polarity_t bobs_op_polarity = OP_POLARITY;
)
#endif


#ifndef DISABLE_OBSERVATION

// Remove progress end (as in, outer) (number) is based (upon) remove-number
// True when an OP's REMELEM used remNo to choose which element to remove
// False otherwise; notably including when REMELEM just bases upon first/last
// Default to false.
#ifndef BOP_REMPROGEND_IS_REMNO_BASED
#define BOP_REMPROGEND_IS_REMNO_BASED false
#endif
MAYBE_EXTERN_C (
    volatile size_t const * bobs_prog_rem_pos =
      #ifdef DISABLE_INTERLEAVING
        & bobs_prog_removing
      #else
        BOP_REMPROGEND_IS_REMNO_BASED ? & bobs_prog_removing_end : & bobs_prog_removing
      #endif
      ;
)

#endif // ndef DISABLE_OBSERVATION

// size_t uDefaultPreemption() {
//         return 0;
// }

#ifdef DISABLE_ITERS
// Saves on memory accesses, makes element-oriented removals and observation impossible (instead, they crash)
static inline BFX_LISTED_ELEM_T(B_UserItem) buhrdice_pass( BFX_LISTED_ELEM_T(B_UserItem) v ) {   // prevent eliding, cheaper than volatile
    __asm__ __volatile__ ( "" : "+r"(v) );
    return v ;
} // pass
#define ITERS_SAVE(i, insertElemExpr) buhrdice_pass(insertElemExpr)
#endif

// begin: copied from https://stackoverflow.com/a/33021408
#define IMAX_BITS(m) ((m)/((m)%255+1) / 255%255*8 + 7-86/((m)%255+12))
#define RAND_MAX_WIDTH IMAX_BITS(RAND_MAX)
static_assert((RAND_MAX & (RAND_MAX + 1u)) == 0, "RAND_MAX not a Mersenne number");

uint64_t rand64(void) {
  uint64_t r = 0;
  for (int i = 0; i < 64; i += RAND_MAX_WIDTH) {
    r <<= RAND_MAX_WIDTH;
    r ^= (unsigned) rand();
  }
  return r;
}
// end: copied from https://stackoverflow.com/a/33021408

int main(int argc, const char *argv[]) {

  #ifdef DISABLE_OBSERVATION
    // define the outbound dependencies as locals, for compiling into nops
    size_t       bobs_ops_completed      = 0;
    size_t bobs_prog_inserting     = 0;
    size_t bobs_prog_removing      = 0;
    size_t bobs_prog_removing_end  = 0;
    size_t bobs_prog_rollover_flag = 0;
  #endif

    const char * usage_args = "[ExperimentDurSec [CheckDonePeriod [NumNodes [ExperimentDurOpCount [Seed [InterleaveFrac]]]]]]";
    const int static_arg_posns = 6;

    size_t ExperimentDurSec     = DefaultExperimentDurSec;
    size_t CheckDonePeriod      = DefaultCheckDonePeriod;
    size_t NumNodes             = DefaultNumNodes;
    size_t ExperimentDurOpCount = DefaultExperimentDurOpCount;
    size_t Seed                 = DefaultSeed;
    double InterleaveFrac       = DefaultInterleaveFrac;

    switch (((argc - 1) < static_arg_posns) ? (argc - 1) : static_arg_posns) {
      case 6: InterleaveFrac = atof(argv[6]);
      case 5: Seed = atoi(argv[5]);
      case 4: ExperimentDurOpCount = atol(argv[4]);
      case 3: NumNodes = atoi(argv[3]);
      case 2: CheckDonePeriod = atoi(argv[2]);
      case 1: ExperimentDurSec = atoi(argv[1]);
    }

    // printf("ExperimentDurSec=%d, CheckDonePeriod=%d, NumNodes=%d, ExperimentDurOpCount=%zd, Seed=%d,\n",
    //     ExperimentDurSec, CheckDonePeriod, NumNodes, ExperimentDurOpCount, Seed );

    if (ExperimentDurSec == 0 || CheckDonePeriod == 0 || NumNodes == 0 || ExperimentDurOpCount == 0 || Seed == 0 ) {
        printf("usage: %s %s\n", argv[0], usage_args);
        return -1;
    }

  #ifdef DISABLE_CLOCK_RECHECK
    if (ExperimentDurSec != -1) {
        printf("Error: experiment compiled as fixed-work only.  ExperimentDurSec (currently %d) must be set to -1.\nUsage: %s %s\n", ExperimentDurSec, argv[0], usage_args);
        return -1;
    }
  #endif

    ui = (B_UserItem*) malloc( (size_t)NumNodes * (size_t)sizeof(B_UserItem) );
    if (!ui) {
        printf("malloc request for %zd bytes for ui refused\n", (size_t)NumNodes * (size_t)sizeof(B_UserItem));
        return 1;
    }
    memset(ui, 0, (size_t)NumNodes * (size_t)sizeof(B_UserItem));
    // Construct
    #ifdef __cforall
    for (size_t i = 0; i < NumNodes; i++) {
        B_UserItem * curUI = & ui[i];
        (*curUI){};
    }
    #endif

    // Shuffling makes listed items' physical order in memory different from their order within to the list.
    // Affects insertion: next item to insert picked through insertOrdShuf.
  #ifdef DISABLE_SHUFFLING_INDIRECTION
    for ( size_t insertOrdinal = 0; insertOrdinal < NumNodes; insertOrdinal += 1 ) {
        ui[insertOrdinal].self_ord = insertOrdinal;
    }
  #else
    // To ensure random memory order of nodes being inserted, do so according to a shuffled ordering of them.
    {
        // insertOrdShuf is a temporary list of integers; once the order is generated, we'll write its consequence into the elements
        size_t * insertOrdShuf = (size_t *) malloc( sizeof( size_t ) * NumNodes );
        if (!insertOrdShuf) {
            printf("malloc request for %zd bytes for insertOrdShuf refused\n", (size_t)NumNodes * (size_t)sizeof(size_t));
            return 1;
        }
        // Fill with the ordinals (iota)
        for (size_t i = 0; i < NumNodes; i++) {
            insertOrdShuf[i] = i;
        }
        // Dummy "Seed" of -1 means skip the shuffle: measure overhead of shuffling indirection
        if (Seed != -1) {
            // Shuffle per https://en.wikipedia.org/wiki/Fisher%E2%80%93Yates_shuffle#Sattolo's_algorithm
            // Don't want to draw from all permutations, only single-cycle permutations
            srand(Seed);
            for (size_t i = NumNodes - 1; i > 0; --i) {
                size_t j = rand64() % i; // j in [0, i-1]
                size_t tmp = insertOrdShuf[i];
                insertOrdShuf[i] = insertOrdShuf[j];
                insertOrdShuf[j] = tmp;
            }
        }
        // take inserOrdShuf as successor-index; write consequences of this traversal into the elements
        size_t lastPos = 0;
        size_t curPos = 0;
        for ( size_t insertOrdinal = 0; insertOrdinal < NumNodes; insertOrdinal += 1 ) {
            B_UserItem * curItem = & ui[ curPos ];
            curItem->succ_pos = insertOrdShuf[ curPos ];
            curItem->pred_pos = lastPos;
            curItem->self_ord = insertOrdinal;
            lastPos = curPos;
            curPos = curItem->succ_pos;
        }
        assert( curPos == 0 ); // completed sole cycle
        ui[0].pred_pos = lastPos;
        free(insertOrdShuf);
    }
    #define INSERTPOS(insertNum) insertOrdShuf[insertNum]
  #endif


    /*
    
    
    
    
    
    TO FIX





    clarify self_ord meaning insertion order or physical-adjacency order
    
    
    
    
    
    
    
    
    
    */


    // Interleaving affects the list position where an element-oriented operation occurs: at an outer vs. inner node.
    // Perterbs the sequence of logical insert/remove numbers presented to the OP cartridge, e.g. from [0 1 2 3 4 5 6]
    // to [3 0 4 1 5 2 6], which is [inner outer inner outer inner outer solo], for a perfect-alternation interleave; except that the
    // outer/inner interleave is atually selected randomly.
  #ifndef DISABLE_INTERLEAVING
    // see InterleaveAction comments near top

    size_t numNodesItlv[N_INTERLEAVE_ACTION] = {};  // logically const, complex init

    //
    // begin: specific to a two-fingered interleave
    //
    static_assert( OuterActn == 0 && InnerActn == 1 && N_INTERLEAVE_ACTION == 2 );

    const size_t iactSplitPos = NumNodes - NumNodes * InterleaveFrac;

    // initialize numNodesItlv
    numNodesItlv[InnerActn] = (NumNodes-1) * InterleaveFrac;             // inner, later: usual outer-inner order revd for data dep
    numNodesItlv[OuterActn] = (NumNodes-1) - numNodesItlv[InnerActn];    // outer, earlier: ditto
    assert( (numNodesItlv[InnerActn] + numNodesItlv[OuterActn] == NumNodes - 1) && 
        "The two modes are meant to cover all items but one, which is left for bop_setup/bop_teardown" );

    B_UserItem * const iactFwdStartItem[N_INTERLEAVE_ACTION] = { seekUi(0),           (iactSplitPos == NumNodes) ? NULL : seekUi(iactSplitPos) };
    B_UserItem * const iactRevStartItem[N_INTERLEAVE_ACTION] = { seekUi(NumNodes-1),  (iactSplitPos == NumNodes) ? NULL : seekUi(NumNodes - iactSplitPos -1) };

    {
        // Not all nodes'interleave invalues are random.  Need bookending nodes set to specific values.
        // Last node seen in outer handling
        // - must refer to inner mode
        // - is near the split position (middle of list)
        // - forward outer processing goes first to middle
        // - reverse outer processing goes last to middle
        // Last node seen in inner handling
        // - must refer to outer mode
        // - is at a leaf (first or last of list)
        // - forward inner processing goes middle to last
        // - reverse inner processing goes middle to first
        // One of these cross references gets navigated, when its mode is the one that finishes first.
        // (By then, all references "back" into this mode have been used up, so it's a termination condition for the mode that finishes first.)
        // When the other mode exhausts its stash, the global ternimation condition (handled all non setup/teardown nodes) overrides its cross reference.
        // Call out these four nodes as special (equal number, 2, referring to each mode); fill the rest randomly.

        assert( InterleaveFrac >= 0.0 && InterleaveFrac <= 1.0 );
        // no action required on no-interleave
        if ( InterleaveFrac > 0.0 ) {

            // assert( ( NumNodes >= 2 * N_INTERLEAVE_ACTION)
            //     && "Don't feel like dealing with interleave edge cases on tiny lists");

            if ( NumNodes >= 2 * N_INTERLEAVE_ACTION ) {
                InterleaveAction * interleaveKey = (InterleaveAction *) malloc( sizeof(InterleaveAction) * NumNodes );

                const size_t iactStartPos   [N_INTERLEAVE_ACTION] = { 0,            iactSplitPos         };
                const size_t iactFinshPos   [N_INTERLEAVE_ACTION] = { iactSplitPos, NumNodes             };

                // generate randomly drawn ikey values
                static_assert( 1 && "sorry, not checkable" && "values of InterleaveAction are enumerable" );
                for ( unsigned char kk = 0; kk < (unsigned char) N_INTERLEAVE_ACTION; kk++ ) { // fill to the fractions
                    InterleaveAction k = (InterleaveAction) kk;
                    for ( int i = iactStartPos[k]; i < iactFinshPos[k]; i++ ) {
                        interleaveKey[i] = k;
                    }
                }
                for (size_t i = 0; i < NumNodes; i++) { // shuffle
                    size_t nodesRemaining = NumNodes - i;
                    size_t swapWith = i + (rand64() % nodesRemaining);
                    InterleaveAction tempValue = interleaveKey[swapWith];
                    interleaveKey[swapWith] = interleaveKey[i];
                    interleaveKey[i] = tempValue;
                }
                for (size_t i = 0; i < NumNodes; i++) { // record inside items
                    ui[i].later_flip = interleaveKey[i];
                }
                // set bookened positions
                // FIXME: still incorrect; can't simply overwrite like done here;
                // need to find an oppositely actioned node to swap with;
                // as is,
                //    ./perfexp--upp-upp--queue-inslast-remelem 5 100000 37 -1 3 .50
                // gets one too many outers;
                // this extra outer reenables the outer cursor at ordinal 18 
                // after the inner cursor already remvoed ordinal 18
                assert(false && "need to fix bookends");
                seekUi( iactStartPos[OuterActn]    )->later_flip = OuterActn;
                seekUi( iactStartPos[InnerActn]    )->later_flip = InnerActn;
                seekUi( iactFinshPos[OuterActn] -1 )->later_flip = InnerActn;
                seekUi( iactFinshPos[InnerActn] -1 )->later_flip = OuterActn;
    //
    // end: specific to a two-fingered interleave
    //
                free(interleaveKey);
            } else {
                // makefile targets call it, not easy to disable
                // so just fill as if no interleave
                for (size_t i = 0; i < NumNodes; i++) {
                    ui[i].later_flip = OuterActn;
                }
            }
        }

    }
  #endif

  #ifndef DISABLE_ITERS
    #define ITERS_SAVE(lastInsertedElemPtr, lastInsertedIter) (lastInsertedElemPtr)->selfListed = lastInsertedIter
  #endif

    BFX_INIT(B_UserItem, lst);

    bobs_init(NumNodes);

    // BOP_ADDFOO(lst, iters, insNo, item)
    // BOP_REMFOO(lst, iters, remNo)
    //   lst    lvalue of the list head being added to / removed from
    //   iters  array of ADD return values, in logical-insert order; on ADD, is valid at [0..insNo)
    //   insNo  interleave-perterbed counter of the ADD calls (logical insert number)
    //   remNo  interleave-perterbed counter of the REM calls (logical remove number)
    //   item   lvalue of the item being added
    // Logical insert number 0 and remove number n-1 are given with a distinguished hook.
    // Logical insert numbers [1,n) and remove numbers [0,n-1) are pumped by the basic SUT hooks.
    // This pattern lets BOP cartridges that measure element-level operations know statically when there is a reference element in the list.
    // The driver owns the relationship between a logical insert number and the location of the `item` argument within `ui`.  (Scattered for randomness.)
    // The BOP cartridge owns the relationship between logical remove number and any choice of an item in iters.  (Defines stack vs queue.)

    // Default init/teardown is insert/remove
    // Cartridges whose SUT insert/remove actions work on empty lists need not provide special-case ones.
    #ifndef BOP_INIT
    #define BOP_INIT(lst, item) BOP_INSERT(lst, ERROR_REQD_ITER_NOT_PROVIDED, item)
    #endif
    #ifndef BOP_TEARDOWN
    #define BOP_TEARDOWN(lst) BOP_REMOVE(lst, ERROR_REQD_ITER_NOT_PROVIDED)
    #endif


    size_t privateOpsCompleted = 0;

    double elapsed_sec = 0;
    clock_t start = clock();

    while (elapsed_sec <= (double) ExperimentDurSec && privateOpsCompleted < ExperimentDurOpCount) {
        for ( size_t t = 0; t < CheckDonePeriod; t += 1 ) {
            TRACE('a')              // insert special first
            ITERS_SAVE( &ui[0],
                BOP_INIT(lst, ui[0]) );
            TRACE('b')              // insert general
            // Keep lastInserted even on DISABLE_ITERS because it's communication to the remove phase
            BFX_LISTED_ELEM_T(B_UserItem) lastInserted = BFX_GET_FIRST(B_UserItem, lst);
            B_UserItem * insertNow = seekUi(1);
            for ( size_t privateCurInsert = 1;
                  (bobs_prog_inserting = privateCurInsert, privateCurInsert < NumNodes);
                  privateCurInsert += 1
                ) {
                assert( insertNow->self_ord == privateCurInsert );
                TRACE('-')
                lastInserted =
                    BOP_INSERT( lst, lastInserted, (*insertNow) );
                TRACE('+')
                ITERS_SAVE( insertNow, lastInserted );
              #ifdef DISABLE_SHUFFLING_INDIRECTION
                insertNow++;
              #else
                insertNow = & ui[ insertNow->succ_pos ];
              #endif
            }
          #ifdef DISABLE_INTERLEAVING
            // interleaving off, simple removes
            // (remove logic of 2b01f8eb0956)
          #ifndef DISABLE_ITERS
            BFX_LISTED_ELEM_T(B_UserItem) nextRemoval = BOP_SWITCH_REMDIR (
                ui[0].selfListed,  // forward starts from first insert
                lastInserted       // backward starts from last insert
            );
          #endif
            TRACE('c')
            for ( size_t privateCurRemove = 1;
                  (bobs_prog_removing = privateCurRemove, privateCurRemove < NumNodes);
                  privateCurRemove += 1
                ) {
              #ifdef DISABLE_ITERS
                #define curRemovalI ERROR_REQD_ITER_NOT_PROVIDED
              #else
                BFX_LISTED_ELEM_T(B_UserItem) curRemovalI = nextRemoval;
                B_UserItem * curRemovalE = BFX_DEREF_POS(B_UserItem, lst, nextRemoval);
               #ifdef DISABLE_SHUFFLING_INDIRECTION
                nextRemoval = (BOP_SWITCH_REMDIR( curRemovalE + 1, curRemovalE - 1 ))->selfListed;
               #else
                nextRemoval = ui[ curRemovalE->BOP_SWITCH_REMDIR(succ_pos, pred_pos) ].selfListed;
               #endif
              #endif
                TRACE('-')
                BOP_REMOVE( lst, curRemovalI );
                TRACE('+')
            }
          #else
            // interleaving on, complex removes
            TRACE('c')              // remove general
            size_t removeProgress  [N_INTERLEAVE_ACTION] = { 0, 0 };
            #define startItem BOP_SWITCH_REMDIR( iactFwdStartItem, iactRevStartItem )
            B_UserItem * removeItem[N_INTERLEAVE_ACTION] = 
                { startItem[OuterActn], startItem[InnerActn] };
            for ( InterleaveAction flip = OuterActn
                ; (bobs_prog_removing = removeProgress[0] + removeProgress[1] + 1,
                     bobs_prog_removing_end = removeProgress[0] + 1,
                     removeProgress[0] + removeProgress[1] < NumNodes - 1 )
                ;
                ) {
//printf("--- flip=%d removeProgress[0]=%zd removeProgress[1]=%zd removeItem[flip]->self_ord=%zd\n", flip, removeProgress[0], removeProgress[1], removeItem[flip]->self_ord);
                TRACE('-')
                BOP_REMOVE( lst, removeItem[flip]->selfListed );
                TRACE('+')

                InterleaveAction nextFlip = removeItem[flip]->later_flip;
                removeProgress[flip] += 1;
                removeItem[flip] = & ui[ removeItem[flip]-> BOP_SWITCH_REMDIR(succ_pos, pred_pos) ];
                flip = nextFlip;
            }

//             for ( InterleaveAction flip = OuterActn;
//                   (bobs_prog_removing = removeProgress[0] + removeProgress[1] + 1,
//                      bobs_prog_removing_end = removeProgress[0] + 1,
//                      removeProgress[0] < numNodesItlv[0] && removeProgress[1] < numNodesItlv[1] );
//                 ) {
//                 TRACE('-')
//                 BOP_REMOVE( lst, removeItem[flip]->selfListed );
//                 TRACE('+')

//                 InterleaveAction nextFlip = removeItem[flip]->later_flip;
//                 removeProgress[flip] += 1;
//                 removeItem[flip] = & ui[ removeItem[flip]-> BOP_SWITCH_REMDIR(succ_pos, pred_pos) ];
//                 flip = nextFlip;
//             }
//             TRACE('X')              // remove imbalanced
//             // most work done under general; it stops when either flip-side's work finishes
//             // now drain any any stragglers so both flip-sides' work finishes
//             for ( InterleaveAction flip = 0; flip < N_INTERLEAVE_ACTION; flip ++ ) {
//                 for ( ; (bobs_prog_removing = removeProgress[0] + removeProgress[1] + 1,
//                          bobs_prog_removing_end = removeProgress[0] + 1,
//                          removeProgress[flip] < numNodesItlv[flip] )
//                       ;
//                     ) {
// //printf("--- flip=%d removeProgress[flip]=%zd numNodesItlv[flip]=%zd removeItem[flip]->self_ord=%zd\n", flip, removeProgress[flip], numNodesItlv[flip], removeItem[flip]->self_ord);
//                     TRACE('-')
//                     BOP_REMOVE( lst, removeItem[flip]->selfListed );
//                     TRACE('+')

//                     removeProgress[flip] += 1;
//                     removeItem[flip] = & ui[ removeItem[flip]-> BOP_SWITCH_REMDIR(succ_pos, pred_pos) ];
//                 }
//             }
          #endif // DISABLE_INTERLEAVING
            TRACE('D')              // remove special last
            BOP_TEARDOWN(lst);
            TRACE('d')

            privateOpsCompleted += NumNodes;

            bobs_prog_rollover_flag = 1;
            TRACE('e')
            bobs_prog_inserting = 0;
            bobs_prog_removing = 0;
            bobs_prog_removing_end = 0;
            bobs_ops_completed = privateOpsCompleted;
            TRACE('f')
            bobs_prog_rollover_flag = 0;
            TRACE('g')
        }
      #ifndef DISABLE_CLOCK_RECHECK
        clock_t end = clock();
        elapsed_sec = ((double)(end - start)) / ((double)CLOCKS_PER_SEC);
      #endif
    }
    #ifdef DISABLE_CLOCK_RECHECK
    {
        clock_t end = clock();
        elapsed_sec = ((double)(end - start)) / ((double)CLOCKS_PER_SEC);
    }
    #endif

    double mean_op_dur_ns = elapsed_sec / ((double)bobs_ops_completed) * 1000 * 1000 * 1000;
    printf("%s,%zd,%f,%f\n", argv[0], bobs_ops_completed, elapsed_sec, mean_op_dur_ns);

    free(ui);
}
