Index: doc/theses/colby_parsons_MMAth/benchmarks/mutex_stmt/bench.h
===================================================================
--- doc/theses/colby_parsons_MMAth/benchmarks/mutex_stmt/bench.h	(revision 6e6989cd4896156a81bb9fdcf13eed2d8e4be9f7)
+++ doc/theses/colby_parsons_MMAth/benchmarks/mutex_stmt/bench.h	(revision 6e6989cd4896156a81bb9fdcf13eed2d8e4be9f7)
@@ -0,0 +1,78 @@
+#pragma once
+
+#if defined(__cforall)
+extern "C" {
+#endif
+	#include <stdlib.h>
+	#include <stdint.h>				// uint64_t
+	#include <unistd.h>				// sysconf
+#if ! defined(__cforall)
+	#include <time.h>
+	#include <sys/time.h>
+#else
+}
+#include <time.hfa>
+#endif
+
+#define L1 l1
+#define L2 L1, l2
+#define L3 L2, l3
+#define L4 L3, l4
+#define L5 L4, l5
+#define L6 L5, l6
+#define L7 L6, l7
+#define L8 L7, l8
+
+static inline uint64_t bench_time() {
+	struct timespec ts;
+	clock_gettime( CLOCK_THREAD_CPUTIME_ID, &ts );
+	return 1000000000LL * ts.tv_sec + ts.tv_nsec;
+} // bench_time
+
+
+#if defined(__cforall)
+struct test_spinlock {
+	volatile bool lock;
+};
+
+static inline void lock( test_spinlock & this ) {
+	for ( ;; ) {
+		if ( (this.lock == 0) && (__atomic_test_and_set( &this.lock, __ATOMIC_ACQUIRE ) == 0) ) break;
+	}
+}
+
+static inline void unlock( test_spinlock & this ) {
+	__atomic_clear( &this.lock, __ATOMIC_RELEASE );
+}
+#endif
+
+size_t threads = 1;
+
+#define BENCH_START()				\
+	if ( argc > 2 ) exit( EXIT_FAILURE );	\
+	if ( argc == 2 ) {			\
+		threads = atoi( argv[1] );	\
+	}
+
+#define BENCH(statement, output, done_flag)		\
+	uint64_t count = 0;		\
+    while (true) {          \
+	statement;				\
+    count++;                \
+    if (done_flag) break; \
+    }                       \
+    __atomic_add_fetch(&output, count, __ATOMIC_SEQ_CST);
+	// EndTime = bench_time();			\
+	// double output = (double)( EndTime - StartTime ) / times;
+
+
+#if defined(__cforall)
+Duration default_preemption() {
+	return 0;
+}
+#endif
+#if defined(__U_CPLUSPLUS__)
+unsigned int uDefaultPreemption() {
+	return 0;
+}
+#endif
Index: doc/theses/colby_parsons_MMAth/benchmarks/mutex_stmt/cfa/order.cfa
===================================================================
--- doc/theses/colby_parsons_MMAth/benchmarks/mutex_stmt/cfa/order.cfa	(revision 6e6989cd4896156a81bb9fdcf13eed2d8e4be9f7)
+++ doc/theses/colby_parsons_MMAth/benchmarks/mutex_stmt/cfa/order.cfa	(revision 6e6989cd4896156a81bb9fdcf13eed2d8e4be9f7)
@@ -0,0 +1,29 @@
+#include <locks.hfa>
+#include <mutex_stmt.hfa>
+#include <stdio.h>
+
+#include "../bench.h"
+
+test_spinlock LOCKS;
+
+bool done = false;
+uint64_t total = 0;
+thread worker {};
+void main( worker & w ) {
+    BENCH( mutex ( LOCKS ) { }, total, done )
+}
+
+int main( int argc, char * argv[] ) {
+	BENCH_START()
+    processor p[threads]; // one extra for main thread
+    {
+        worker w[threads];
+        sleep( 10`s );
+        done = true;
+    }
+	printf( "%lu\n", total );
+}
+
+// Local Variables: //
+// tab-width: 4 //
+// End: //
Index: doc/theses/colby_parsons_MMAth/benchmarks/mutex_stmt/cpp/cppLock.hpp
===================================================================
--- doc/theses/colby_parsons_MMAth/benchmarks/mutex_stmt/cpp/cppLock.hpp	(revision 6e6989cd4896156a81bb9fdcf13eed2d8e4be9f7)
+++ doc/theses/colby_parsons_MMAth/benchmarks/mutex_stmt/cpp/cppLock.hpp	(revision 6e6989cd4896156a81bb9fdcf13eed2d8e4be9f7)
@@ -0,0 +1,18 @@
+class cpp_test_spinlock {
+	volatile bool lockBool = 0;
+
+  public:
+	inline void lock() {
+		for ( ;; ) {
+			if ( (this->lockBool == 0) && (__atomic_test_and_set( &this->lockBool, __ATOMIC_ACQUIRE ) == 0) ) break;
+		}
+	}
+
+	inline bool try_lock() {
+		return (this->lockBool == 0) && (__atomic_test_and_set( &this->lockBool, __ATOMIC_ACQUIRE ) == 0);
+	}
+
+	inline void unlock() {
+		__atomic_clear( &this->lockBool, __ATOMIC_RELEASE );
+	}
+};
Index: doc/theses/colby_parsons_MMAth/benchmarks/mutex_stmt/cpp/order.cc
===================================================================
--- doc/theses/colby_parsons_MMAth/benchmarks/mutex_stmt/cpp/order.cc	(revision 6e6989cd4896156a81bb9fdcf13eed2d8e4be9f7)
+++ doc/theses/colby_parsons_MMAth/benchmarks/mutex_stmt/cpp/order.cc	(revision 6e6989cd4896156a81bb9fdcf13eed2d8e4be9f7)
@@ -0,0 +1,36 @@
+#include <cstdio>
+#include <mutex>
+#include <thread>
+#include <chrono>
+#include "../bench.h"
+#include "cppLock.hpp"
+
+cpp_test_spinlock LOCKS;
+
+bool done = false;
+uint64_t total = 0;
+void thread_main() {
+    BENCH( std::scoped_lock lock( LOCKS );, total, done )
+}
+
+int main( int argc, char * argv[] ) {
+	BENCH_START()
+    std::thread myThreads[threads];
+
+    for (int i = 0; i < threads; i++) {
+        myThreads[i] = std::thread(thread_main); // move constructed
+    }
+
+    std::this_thread::sleep_for (std::chrono::seconds(10));
+    done = true;
+    
+    for (int i = 0; i < threads; i++) {
+        myThreads[i].join();
+    }
+
+	printf( "%lu\n", total );
+}
+
+// Local Variables: //
+// tab-width: 4 //
+// End: //
Index: doc/theses/colby_parsons_MMAth/benchmarks/mutex_stmt/run
===================================================================
--- doc/theses/colby_parsons_MMAth/benchmarks/mutex_stmt/run	(revision 6e6989cd4896156a81bb9fdcf13eed2d8e4be9f7)
+++ doc/theses/colby_parsons_MMAth/benchmarks/mutex_stmt/run	(revision 6e6989cd4896156a81bb9fdcf13eed2d8e4be9f7)
@@ -0,0 +1,203 @@
+#!/bin/bash -
+
+false=0; true=1
+
+# Usage: arch [ hostname ] returns hostname, cores, startcore
+#
+#   Define machine architecture based on starting socket, CPUs (cores) per socket, number of
+#   sockets, has hyperthreading.
+
+start=0
+
+arch() {
+	hostname=${1:-`hostname`}			# return value
+	hashyper=${true}					# assume machine has hyperthreads
+	if [ "${hostname}" = "plg2" ] ; then
+		startsocket=${start}
+		cps=16							# coresPerSocket
+		sockets=2
+		hashyper=${false}				# has no hyperthreads
+	elif [ "${hostname}" = "nasus" ] ; then
+		startsocket=${start}
+		cps=64							# coresPerSocket
+		sockets=2
+	elif [ "${hostname}" = "pyke" ] ; then
+		startsocket=${start}
+		cps=24							# coresPerSocket
+		sockets=2
+	elif [ "${hostname}" = "jax" ] ; then
+		startsocket=${start}
+		cps=24							# coresPerSocket
+		sockets=4
+	else
+		echo "unsupported host" ${hostname}
+		exit 1
+	fi
+	cores=$(( ${cps} * ${sockets} ))
+	startcore=$(( ${startsocket} * ${cps} ))
+}
+
+# Usage: affinity (global cps, sockets, startsocket, hashyper, cores, startcore, wrap)
+#   returns taskset argument
+#
+#   This routine assumes hyperthreading has only 2 hyperthreads per core.
+#
+#   If hyperthread scanning is used: processor units are assigned across the low-number hyperthreads
+#   of the socket's cores. When the low-number hyperthreads are filled, the high-number hyperhtreads
+#   are assigned across the socket's cores. Then the next socket is assigned.
+#
+#   If hyperthread wrapping is used: processor units are assigned in low/high-number pairs of
+#   hyperthreads across the socket's cores. Then the next socket is assigned.
+
+wrap=${false}							# set to control hyperthread assignment across socket cores
+
+affinity() {
+	if [ ${wrap} -eq ${true} -a ${hashyper} -eq ${false} ] ; then
+		echo "architecture does not support hyperthreading for wrapping"
+		exit 1
+	fi
+	taskset=""							# return value
+	set -- $(( ${1} - 1 ))				# decrement $1
+	if [ ${1} -eq 0 ] ; then taskset="${startcore}-${startcore}"; return; fi
+	if [ ${1} -ge $(( ${cps} * ( ${sockets} - ${startsocket} ) * ( ${hashyper} + 1 ) )) ] ; then # error
+		echo "not enough cores $(( ${cores} * ${sockets} )) for $(( ${1} + 1 )) starting at ${startcore}"
+		exit 1
+	fi
+	if [ ${hashyper} -eq ${false} ] ; then taskset="${startcore}-$(( ${1} + ${startcore} ))"; return; fi # no hyperthreads
+	start2=$(( ${startcore} + ${cores} ))
+	if [ ${wrap} -eq ${true} ] ; then 	# hyperthread wrapping
+		end1=$(( ${1} / 2 + ${startcore} ))
+		end2=$(( ${end1} + ${cores} ))
+		if [ $(( ${1} % 2 )) -eq 0 ] ; then
+			end2=$(( ${end2} - 1 ))
+		fi
+		taskset="${startcore}-${end1},${start2}-${end2}"
+	else								# hyperthread scanning
+		if [ ${1} -lt ${cps} ] ; then taskset="${startcore}-$(( ${1} + ${startcore} ))"; return; fi
+		filled=$(( ${1} / ( ${cps} * 2 ) * ${cps} ))
+		modulus=$(( ${1} % ( ${cps} * 2 ) ))	# leftover cores added to saturated sockets
+		if [ ${modulus} -gt ${cps} ] ; then
+			taskset="${startcore}-$(( ${startcore} + ${filled} + ${cps} - 1 )),${start2}-$(( ${start2} + ${filled} + ${modulus} % ${cps} ))"
+		else
+			taskset="${startcore}-$(( ${startcore} + ${filled} + ${modulus} )),${start2}-$(( ${start2} + ${filled} - 1 ))"
+		fi
+	fi
+}
+
+numtimes=1
+
+# locks=('-DLOCKS=L1' '-DLOCKS=L2' '-DLOCKS=L3' '-DLOCKS=L4' '-DLOCKS=L5' '-DLOCKS=L6' '-DLOCKS=L7' '-DLOCKS=L8')
+# locks='1 2 3 4 5 6 7 8'
+lock_flags=('-DLOCKS=L2' '-DLOCKS=L4' '-DLOCKS=L8')
+locks=('2' '4' '8')
+
+num_threads='2 4 8 16 24 32'
+
+# toggle benchmarks
+rand=${false} # prolly not needed
+order=${true}
+
+runCFA=${true}
+runCPP=${true}
+# runCFA=${false}
+# runCPP=${false}
+
+cfa=~/cfa-cc/driver/cfa
+cpp=g++
+
+# Helpers to minimize code duplication
+
+# repeats a command ${numtimes}
+preprint=''
+repeat_command() {
+    t=1
+    while [ ${t} -le ${numtimes} ] ; do
+        echo -n -e ${preprint}
+        "${@}"
+        t=`expr ${t} + 1`
+    done
+}
+
+# prints the leading info for a given run of a variant
+print_header() {
+    echo ${1}':'
+    echo -e "cores\tthroughput (entries)"
+}
+
+# runs the current benchmark with provided args
+# only works for standard-run benchmarks (not Akka)
+# must split into pre and post args to be able to supply val of p
+pre_args=''
+post_args=''
+single_run() {
+    affinity ${1}
+    preprint="${1}\t"
+    repeat_command taskset -c ${taskset} ./a.${hostname} ${pre_args} ${1} ${post_args}
+}
+
+# runs the current bench for all processor vals
+# works for standard benchs that dont need to set a config file (not Akka or CAF)
+run_bench() {
+    for p in ${num_threads} ; do
+        single_run ${p}
+    done
+}
+
+arch # get hostname
+
+# set up leading info for python script
+echo $numtimes
+echo $num_threads
+
+for i in ${!locks[@]}; do
+        echo -n ${locks[$i]}' '
+done
+echo ""
+
+if [ ${runCFA} -eq ${true} ] ; then
+    echo -n 'CFA '
+fi # done CFA
+if [ ${runCPP} -eq ${true} ] ; then
+    echo -n 'CPP '
+fi # done CPP
+echo ""
+
+# done printing header info for output
+
+# cfa flags
+cfa_flags='-quiet -O3 -nodebug -DNDEBUG'
+
+# cpp flagse
+cpp_flags='-O3 -std=c++17 -lpthread -pthread -DNDEBUG'
+
+# run the benchmarks
+
+run_order() {
+    echo "order locks: "${1}
+
+    if [ ${runCFA} -eq ${true} ] ; then
+        cd cfa # CFA RUN
+        print_header 'CFA'
+        ${cfa} ${cfa_flags} ${2} order.cfa -o a.${hostname} > /dev/null 2>&1
+        run_bench
+        rm a.${hostname}
+        cd - > /dev/null
+    fi # done CFA
+
+    if [ ${runCPP} -eq ${true} ] ; then
+        cd cpp # CPP RUN
+        print_header 'CPP'
+        ${cpp} ${cpp_flags} ${2} order.cc -o a.${hostname} > /dev/null 2>&1
+        run_bench
+        rm a.${hostname}
+        cd - > /dev/null
+    fi # done CPP
+}
+
+# /usr/bin/time -f "%Uu %Ss %Er %Mkb"
+if [ ${order} -eq ${true} ] ; then
+    for i in ${!locks[@]}; do
+        run_order ${locks[$i]} ${lock_flags[$i]}
+    done
+fi
+