Index: tests/unified_locking/thread_test.cfa
===================================================================
--- tests/unified_locking/thread_test.cfa	(revision ee23a8d0b19c3c92187b39ec38ee19086eb2b62d)
+++ tests/unified_locking/thread_test.cfa	(revision ee23a8d0b19c3c92187b39ec38ee19086eb2b62d)
@@ -0,0 +1,85 @@
+#include <stdio.h>
+#include "locks.hfa"
+#include <stdlib.hfa>
+#include <thread.hfa>
+#include <containers/array.hfa>
+
+static unsigned int taskCount = 4;
+static unsigned int threadCount = 2;
+static unsigned int lockCount = 1;
+static unsigned int num_times = 10000;
+static const int workBufferSize = 16;
+static unsigned int work_unlocked = 10000;
+static unsigned int work_locked = 10000;
+
+// taken from martin's thread_test
+static inline void dowork(volatile int* buffer, unsigned int steps) {
+  int value = 0;
+  for (unsigned int i = 0; i < steps; i += 1) {
+    // a little more work than just a single memory access helps with stability
+    value += (buffer[i % workBufferSize] * 17) / 23 + 55;
+  }
+  buffer[0] += value;
+}
+
+thread worker {
+    linear_backoff_then_block_lock * locks;
+};
+
+void ?{}( worker & w, linear_backoff_then_block_lock * locks ) {
+	w.locks = locks;
+}
+
+linear_backoff_then_block_lock norm_lock;
+
+void main( worker & this ) with(this) {
+	int buffer[workBufferSize];
+    for (int i = 0; i < workBufferSize; i += 1) buffer[i] = rand() % 1024;
+    unsigned int lck = rand() % lockCount;
+    linear_backoff_then_block_lock * curr_lock = locks;//[lck];
+    for (unsigned int i = 0; i < num_times; i++) {
+        dowork(buffer, work_unlocked);
+        lock(curr_lock);
+        //printf("lock: %d %p ENTER\n", i, &curr_lock);
+        //lock(norm_lock);
+        dowork(buffer, work_locked);
+        //printf("lock: %d %p LEAVE\n", i, &curr_lock);
+        unlock(curr_lock);
+        //unlock(norm_lock);
+        lck = rand() % lockCount;
+        //curr_lock = locks[lck];
+    }
+}
+
+int main(int argc, char* argv[]) {
+    switch (argc) {
+        case 5:
+            num_times = atoi(argv[4]);
+        case 4:
+            lockCount = atoi(argv[3]);
+        case 3:
+            threadCount = atoi(argv[2]);
+        case 2:
+            taskCount = atoi(argv[1]);
+        case 1:
+            break;
+        default:
+            break; 
+    }
+	processor p[threadCount];
+    linear_backoff_then_block_lock locks;//[lockCount];
+    printf("lock allocation address: %p \n", &locks);
+    worker * worker_arr[taskCount];
+
+	printf("Start Test: martin lock simple\n");
+	clock_t begin = clock();
+	for (unsigned int i = 0; i < taskCount; i++) {
+        worker_arr[i] = new( &locks );
+    }
+    for (unsigned int i = 0; i < taskCount; i++) {
+        free( worker_arr[i] );
+    }
+	clock_t end = clock();
+	double time_spent = (double)(end - begin) / CLOCKS_PER_SEC;
+	printf("Done Test, time: %f\n", time_spent);
+}
