Index: doc/theses/colby_parsons_MMAth/benchmarks/mutex_stmt/cfa/baseline.cfa
===================================================================
--- doc/theses/colby_parsons_MMAth/benchmarks/mutex_stmt/cfa/baseline.cfa	(revision 0da718119b2ce64d4fe7e641a9d6cdfd960b073d)
+++ doc/theses/colby_parsons_MMAth/benchmarks/mutex_stmt/cfa/baseline.cfa	(revision 0da718119b2ce64d4fe7e641a9d6cdfd960b073d)
@@ -0,0 +1,62 @@
+#include <locks.hfa>
+#include <mutex_stmt.hfa>
+#include <stdio.h>
+
+#include "../bench.h"
+
+test_spinlock LOCKS;
+
+inline void lock( test_spinlock &a, test_spinlock &b ) {
+    lock(a); lock(b);
+}
+inline void lock( test_spinlock &a, test_spinlock &b, test_spinlock &c, test_spinlock &d ) {
+    lock(a); lock(b); lock(c); lock(d);
+}
+inline void lock( test_spinlock &a, test_spinlock &b, test_spinlock &c, test_spinlock &d, test_spinlock &e, test_spinlock &f, test_spinlock &g, test_spinlock &h ) {
+    lock(a); lock(b); lock(c); lock(d); lock(e); lock(f); lock(g); lock(h);
+}
+inline void unlock( test_spinlock &a, test_spinlock &b ) {
+    unlock(a); unlock(b);
+}
+inline void unlock( test_spinlock &a, test_spinlock &b, test_spinlock &c, test_spinlock &d ) {
+    unlock(a); unlock(b); unlock(c); unlock(d);
+}
+inline void unlock( test_spinlock &a, test_spinlock &b, test_spinlock &c, test_spinlock &d, test_spinlock &e, test_spinlock &f, test_spinlock &g, test_spinlock &h ) {
+    unlock(a); unlock(b); unlock(c); unlock(d); unlock(e); unlock(f); unlock(g); unlock(h);
+}
+
+bool done = false;
+uint64_t total = 0;
+thread worker {};
+static inline void ?{}( worker & this, cluster & clu ) {
+    ((thread &)this){ clu };
+}
+void main( worker & w ) {
+    BENCH( lock( LOCKS ); unlock( LOCKS );, total, done )
+}
+
+int main( int argc, char * argv[] ) {
+	BENCH_START()
+    cluster clus;
+    processor * proc[threads];
+    for ( i; threads ) // create procs
+        (*(proc[i] = alloc())){clus};
+
+    worker * w[threads];
+    for ( i; threads ) // create threads
+        (*(w[i] = alloc())){ clus };
+    
+    sleep( 10`s );
+    done = true;
+
+    for ( i; threads ) // delete threads
+        delete(w[i]);
+
+    for ( i; threads ) // delete procs
+        delete(proc[i]);
+	printf( "%lu\n", total );
+}
+
+// Local Variables: //
+// tab-width: 4 //
+// End: //
Index: doc/theses/colby_parsons_MMAth/benchmarks/mutex_stmt/cfa/rand.cfa
===================================================================
--- doc/theses/colby_parsons_MMAth/benchmarks/mutex_stmt/cfa/rand.cfa	(revision 0da718119b2ce64d4fe7e641a9d6cdfd960b073d)
+++ doc/theses/colby_parsons_MMAth/benchmarks/mutex_stmt/cfa/rand.cfa	(revision 0da718119b2ce64d4fe7e641a9d6cdfd960b073d)
@@ -0,0 +1,120 @@
+#include <locks.hfa>
+#include <mutex_stmt.hfa>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "../bench.h"
+
+test_spinlock LOCKS;
+
+test_spinlock ** lock_arr;
+
+inline void locks( size_t * arr ) {
+    if (num_locks == 2) {
+        mutex( *lock_arr[arr[0]], *lock_arr[arr[1]] ) {}
+    } else if (num_locks == 4) {
+        mutex( *lock_arr[arr[0]], *lock_arr[arr[1]], *lock_arr[arr[2]], *lock_arr[arr[3]] ) {}
+    } else if (num_locks == 8) {
+        mutex( *lock_arr[arr[0]], *lock_arr[arr[1]], *lock_arr[arr[2]], *lock_arr[arr[3]], *lock_arr[arr[4]], *lock_arr[arr[5]], *lock_arr[arr[6]], *lock_arr[arr[7]] ) {}
+    }
+}
+
+bool done = false;
+uint64_t total = 0;
+size_t num_gen = 100; // number of rand orderings per thd
+size_t ** rand_arrs;
+
+// generate repeatable orderings for each experiment
+void gen_orders() {
+    rand_arrs = aalloc( threads );
+    for ( i; threads )
+        rand_arrs[i] = aalloc( num_locks * num_gen );
+
+    size_t work_arr[num_locks];
+
+    for ( i; num_locks )
+        work_arr[i] = i;
+
+    size_t curr_idx;
+    for ( i; threads ) {
+        state = i;
+        curr_idx = 0;
+        for ( j; num_gen ) {
+            for ( size_t k = num_locks; k > 0; k-- ) {
+                size_t rand_idx = next_int() % k; // choose one of remaining elems in work_arr
+                rand_arrs[i][curr_idx] = work_arr[rand_idx];
+                curr_idx++;
+
+                // swap chosen elem to end so it isn't picked again
+                size_t temp = work_arr[rand_idx];
+                work_arr[rand_idx] = work_arr[k - 1];
+                work_arr[k - 1] = temp;
+            }
+        }
+        
+    }
+}
+
+thread worker { size_t * my_arr; };
+static inline void ?{}( worker & this, cluster & clu, size_t id ) {
+    ((thread &)this){ clu };
+    this.my_arr = rand_arrs[id];
+}
+
+void main( worker & w ) with(w) {
+    uint64_t count = 0;
+    while (true) {
+        locks( my_arr + (count % num_gen) * num_locks );
+        count++;
+        if (done) break;
+    }
+    __atomic_add_fetch(&total, count, __ATOMIC_SEQ_CST);
+}
+    
+int main( int argc, char * argv[] ) {
+	BENCH_START()
+    if ( num_locks == -1 ) { printf("must pass # of locks to program!\n"); exit( EXIT_FAILURE ); }
+    cluster clus;
+    processor * proc[threads];
+    for ( i; threads ) // create procs
+        (*(proc[i] = alloc())){clus};
+
+    lock_arr = aalloc( num_locks );
+
+    if (num_locks >= 2) {
+        lock_arr[0] = &l1; lock_arr[1] = &l2;
+    }
+    if (num_locks >= 4) {
+        lock_arr[2] = &l3; lock_arr[3] = &l4;
+    }
+    if (num_locks == 8) {
+        lock_arr[4] = &l5; lock_arr[5] = &l6; lock_arr[6] = &l7; lock_arr[7] = &l8;
+    }
+
+    gen_orders();
+
+    worker * w[threads];
+    for ( i; threads ) // create threads
+        (*(w[i] = alloc())){ clus, i };
+    
+    sleep( 10`s );
+    done = true;
+
+    for ( i; threads ) // delete threads
+        delete(w[i]);
+
+    for ( i; threads ) // delete procs
+        delete(proc[i]);
+
+    for ( i; threads )
+        adelete(rand_arrs[i]);
+    adelete(rand_arrs);
+
+    adelete(lock_arr);
+    
+	printf( "%lu\n", total );
+}
+
+// Local Variables: //
+// tab-width: 4 //
+// End: //
Index: doc/theses/colby_parsons_MMAth/benchmarks/mutex_stmt/cpp/baseline.cc
===================================================================
--- doc/theses/colby_parsons_MMAth/benchmarks/mutex_stmt/cpp/baseline.cc	(revision 0da718119b2ce64d4fe7e641a9d6cdfd960b073d)
+++ doc/theses/colby_parsons_MMAth/benchmarks/mutex_stmt/cpp/baseline.cc	(revision 0da718119b2ce64d4fe7e641a9d6cdfd960b073d)
@@ -0,0 +1,36 @@
+#include <cstdio>
+#include <mutex>
+#include <thread>
+#include <chrono>
+#include "../bench.h"
+#include "cppLock.hpp"
+
+cpp_test_spinlock LOCKS;
+
+bool done = false;
+uint64_t total = 0;
+void thread_main() {
+    BENCH( lock( LOCKS ); unlock( LOCKS );, total, done )
+}
+
+int main( int argc, char * argv[] ) {
+	BENCH_START()
+    std::thread myThreads[threads];
+
+    for (int i = 0; i < threads; i++) {
+        myThreads[i] = std::thread(thread_main); // move constructed
+    }
+
+    std::this_thread::sleep_for (std::chrono::seconds(10));
+    done = true;
+    
+    for (int i = 0; i < threads; i++) {
+        myThreads[i].join();
+    }
+
+	printf( "%lu\n", total );
+}
+
+// Local Variables: //
+// tab-width: 4 //
+// End: //
Index: doc/theses/colby_parsons_MMAth/benchmarks/mutex_stmt/cpp/rand.cc
===================================================================
--- doc/theses/colby_parsons_MMAth/benchmarks/mutex_stmt/cpp/rand.cc	(revision 0da718119b2ce64d4fe7e641a9d6cdfd960b073d)
+++ doc/theses/colby_parsons_MMAth/benchmarks/mutex_stmt/cpp/rand.cc	(revision 0da718119b2ce64d4fe7e641a9d6cdfd960b073d)
@@ -0,0 +1,110 @@
+#include <cstdio>
+#include <mutex>
+#include <thread>
+#include <chrono>
+#include <stdlib.h>
+#include "cppLock.hpp"
+
+#include "../bench.h"
+
+cpp_test_spinlock LOCKS;
+cpp_test_spinlock  ** lock_arr;
+
+inline void locks( size_t * arr ) {
+    if (num_locks == 2) {
+        std::scoped_lock lock( *lock_arr[arr[0]], *lock_arr[arr[1]] );
+    } else if (num_locks == 4) {
+        std::scoped_lock lock( *lock_arr[arr[0]], *lock_arr[arr[1]], *lock_arr[arr[2]], *lock_arr[arr[3]] );
+    } else if (num_locks == 8) {
+        std::scoped_lock lock( *lock_arr[arr[0]], *lock_arr[arr[1]], *lock_arr[arr[2]], *lock_arr[arr[3]], *lock_arr[arr[4]], *lock_arr[arr[5]], *lock_arr[arr[6]], *lock_arr[arr[7]] );
+    }
+}
+
+bool done = false;
+uint64_t total = 0;
+size_t num_gen = 100; // number of rand orderings per thd
+size_t ** rand_arrs;
+
+// generate repeatable orderings for each experiment
+void gen_orders() {
+    rand_arrs = new size_t *[threads];
+    for ( int i = 0; i < threads; i++ )
+        rand_arrs[i] = new size_t[ num_locks * num_gen ];
+
+    size_t work_arr[num_locks];
+
+    for ( int i = 0; i < num_locks; i++ )
+        work_arr[i] = i;
+
+    size_t curr_idx;
+    for ( int i = 0; i < threads; i++ ) {
+        state = i;
+        curr_idx = 0;
+        for ( int j = 0; j < num_gen; j++ ) {
+            for ( size_t k = num_locks; k > 0; k-- ) {
+                size_t rand_idx = next_int() % k; // choose one of remaining elems in work_arr
+                rand_arrs[i][curr_idx] = work_arr[rand_idx];
+                curr_idx++;
+
+                // swap chosen elem to end so it isn't picked again
+                size_t temp = work_arr[rand_idx];
+                work_arr[rand_idx] = work_arr[k - 1];
+                work_arr[k - 1] = temp;
+            }
+        }
+        
+    }
+}
+
+void thread_main( int id ) {
+    size_t * my_arr = rand_arrs[id];
+    uint64_t count = 0;
+    while (true) {
+        locks( my_arr + (count % num_gen) * num_locks );
+        count++;
+        if (done) break;
+    }
+    __atomic_add_fetch(&total, count, __ATOMIC_SEQ_CST);
+}
+
+int main( int argc, char * argv[] ) {
+	BENCH_START()
+    if ( num_locks == -1 ) { printf("must pass # of locks to program!\n"); exit( EXIT_FAILURE ); }
+    
+    lock_arr = new cpp_test_spinlock *[ num_locks ];
+
+    if (num_locks >= 2) {
+        lock_arr[0] = &l1; lock_arr[1] = &l2;
+    }
+    if (num_locks >= 4) {
+        lock_arr[2] = &l3; lock_arr[3] = &l4;
+    }
+    if (num_locks == 8) {
+        lock_arr[4] = &l5; lock_arr[5] = &l6; lock_arr[6] = &l7; lock_arr[7] = &l8;
+    }
+
+    gen_orders();
+
+    std::thread myThreads[threads];
+    for (int i = 0; i < threads; i++) {
+        myThreads[i] = std::thread(thread_main, i); // move constructed
+    }
+
+    std::this_thread::sleep_for (std::chrono::seconds(10));
+    done = true;
+    
+    for (int i = 0; i < threads; i++) {
+        myThreads[i].join();
+    }
+
+    for ( int i = 0; i < threads; i++ )
+        delete[] rand_arrs[i];
+    delete[] rand_arrs;
+    delete[] lock_arr;
+
+	printf( "%lu\n", total );
+}
+
+// Local Variables: //
+// tab-width: 4 //
+// End: //
