Index: libcfa/src/bits/algorithm.hfa
===================================================================
--- libcfa/src/bits/algorithm.hfa	(revision 73abe950d807eab747c9e14353b158fcca827308)
+++ libcfa/src/bits/algorithm.hfa	(revision 73abe950d807eab747c9e14353b158fcca827308)
@@ -0,0 +1,191 @@
+//
+// Cforall Version 1.0.0 Copyright (C) 2016 University of Waterloo
+//
+// The contents of this file are covered under the licence agreement in the
+// file "LICENCE" distributed with Cforall.
+//
+// bits/algorithms.hfa -- Builtins for exception handling.
+//
+// Author           : Thierry Delisle
+// Created On       : Mon Oct 30 13:37:34 2017
+// Last Modified By : --
+// Last Modified On : --
+// Update Count     : 0
+//
+
+#pragma once
+
+#ifdef SAFE_SORT
+forall( otype T | {  int ?<?( T, T ); int ?>?( T, T ); } ) static inline void __libcfa_small_sort2( T * arr );
+forall( otype T | {  int ?<?( T, T ); int ?>?( T, T ); } ) static inline void __libcfa_small_sort3( T * arr );
+forall( otype T | {  int ?<?( T, T ); int ?>?( T, T ); } ) static inline void __libcfa_small_sort4( T * arr );
+forall( otype T | {  int ?<?( T, T ); int ?>?( T, T ); } ) static inline void __libcfa_small_sort5( T * arr );
+forall( otype T | {  int ?<?( T, T ); int ?>?( T, T ); } ) static inline void __libcfa_small_sort6( T * arr );
+forall( otype T | {  int ?<?( T, T ); int ?>?( T, T ); } ) static inline void __libcfa_small_sortN( T * arr, size_t dim );
+
+forall( otype T | {  int ?<?( T, T ); int ?>?( T, T ); } )
+static inline void __libcfa_small_sort( T * arr, size_t dim ) {
+	switch( dim ) {
+		case 1 : return;
+		case 2 : __libcfa_small_sort2( arr ); return;
+		case 3 : __libcfa_small_sort3( arr ); return;
+		case 4 : __libcfa_small_sort4( arr ); return;
+		case 5 : __libcfa_small_sort5( arr ); return;
+		case 6 : __libcfa_small_sort6( arr ); return;
+		default: __libcfa_small_sortN( arr, dim ); return;
+	}
+}
+
+#define min(x, y) (y > x ? x : y)
+#define max(x, y) (y > x ? y : x)
+#define SWAP(x,y) { T a = min(arr[x], arr[y]); T b = max(arr[x], arr[y]); arr[x] = a; arr[y] = b;}
+
+forall( otype T | {  int ?<?( T, T ); int ?>?( T, T ); } )
+static inline void __libcfa_small_sort2( T * arr ) {
+	SWAP(0, 1);
+}
+
+forall( otype T | {  int ?<?( T, T ); int ?>?( T, T ); } )
+static inline void __libcfa_small_sort3( T * arr ) {
+	SWAP(1, 2);
+	SWAP(0, 2);
+	SWAP(0, 1);
+}
+
+forall( otype T | {  int ?<?( T, T ); int ?>?( T, T ); } )
+static inline void __libcfa_small_sort4( T * arr ) {
+	SWAP(0, 1);
+	SWAP(2, 3);
+	SWAP(0, 2);
+	SWAP(1, 3);
+	SWAP(1, 2);
+}
+
+forall( otype T | {  int ?<?( T, T ); int ?>?( T, T ); } )
+static inline void __libcfa_small_sort5( T * arr ) {
+	SWAP(0, 1);
+	SWAP(3, 4);
+	SWAP(2, 4);
+	SWAP(2, 3);
+	SWAP(0, 3);
+	SWAP(0, 2);
+	SWAP(1, 4);
+	SWAP(1, 3);
+	SWAP(1, 2);
+}
+
+forall( otype T | {  int ?<?( T, T ); int ?>?( T, T ); } )
+static inline void __libcfa_small_sort6( T * arr ) {
+	SWAP(1, 2);
+	SWAP(4, 5);
+	SWAP(0, 2);
+	SWAP(3, 5);
+	SWAP(0, 1);
+	SWAP(3, 4);
+	SWAP(1, 4);
+	SWAP(0, 3);
+	SWAP(2, 5);
+	SWAP(1, 3);
+	SWAP(2, 4);
+	SWAP(2, 3);
+}
+
+forall( otype T | {  int ?<?( T, T ); int ?>?( T, T ); } )
+static inline void __libcfa_small_sortN( T * arr, size_t dim ) {
+	int i, j;
+	for (i = 1; i < dim; i++) {
+		T tmp = arr[i];
+		for (j = i; j >= 1 && tmp < arr[j-1]; j--) {
+			arr[j] = arr[j-1];
+		}
+		arr[j] = tmp;
+	}
+}
+
+#else
+
+static inline void __libcfa_small_sort2( void* * arr );
+static inline void __libcfa_small_sort3( void* * arr );
+static inline void __libcfa_small_sort4( void* * arr );
+static inline void __libcfa_small_sort5( void* * arr );
+static inline void __libcfa_small_sort6( void* * arr );
+static inline void __libcfa_small_sortN( void* * arr, size_t dim );
+
+forall( dtype T )
+static inline void __libcfa_small_sort( T* * arr, size_t dim ) {
+	switch( dim ) {
+		case 1 : return;
+		case 2 : __libcfa_small_sort2( (void **) arr ); return;
+		case 3 : __libcfa_small_sort3( (void **) arr ); return;
+		case 4 : __libcfa_small_sort4( (void **) arr ); return;
+		case 5 : __libcfa_small_sort5( (void **) arr ); return;
+		case 6 : __libcfa_small_sort6( (void **) arr ); return;
+		default: __libcfa_small_sortN( (void **) arr, dim ); return;
+	}
+}
+
+#define min(x, y) (y > x ? x : y)
+#define max(x, y) (y > x ? y : x)
+#define SWAP(x,y) { void* a = min(arr[x], arr[y]); void* b = max(arr[x], arr[y]); arr[x] = a; arr[y] = b;}
+
+static inline void __libcfa_small_sort2( void* * arr ) {
+	SWAP(0, 1);
+}
+
+static inline void __libcfa_small_sort3( void* * arr ) {
+	SWAP(1, 2);
+	SWAP(0, 2);
+	SWAP(0, 1);
+}
+
+static inline void __libcfa_small_sort4( void* * arr ) {
+	SWAP(0, 1);
+	SWAP(2, 3);
+	SWAP(0, 2);
+	SWAP(1, 3);
+	SWAP(1, 2);
+}
+
+static inline void __libcfa_small_sort5( void* * arr ) {
+	SWAP(0, 1);
+	SWAP(3, 4);
+	SWAP(2, 4);
+	SWAP(2, 3);
+	SWAP(0, 3);
+	SWAP(0, 2);
+	SWAP(1, 4);
+	SWAP(1, 3);
+	SWAP(1, 2);
+}
+
+static inline void __libcfa_small_sort6( void* * arr ) {
+	SWAP(1, 2);
+	SWAP(4, 5);
+	SWAP(0, 2);
+	SWAP(3, 5);
+	SWAP(0, 1);
+	SWAP(3, 4);
+	SWAP(1, 4);
+	SWAP(0, 3);
+	SWAP(2, 5);
+	SWAP(1, 3);
+	SWAP(2, 4);
+	SWAP(2, 3);
+}
+
+static inline void __libcfa_small_sortN( void* * arr, size_t dim ) {
+	int i, j;
+	for (i = 1; i < dim; i++) {
+		void* tmp = arr[i];
+		for (j = i; j >= 1 && tmp < arr[j-1]; j--) {
+			arr[j] = arr[j-1];
+		}
+		arr[j] = tmp;
+	}
+}
+
+#endif
+
+#undef SWAP
+#undef min
+#undef max
Index: libcfa/src/bits/algorithms.h
===================================================================
--- libcfa/src/bits/algorithms.h	(revision ba9baadeb7d347b3375a3c22e4ce3628359e9187)
+++ 	(revision )
@@ -1,191 +1,0 @@
-//
-// Cforall Version 1.0.0 Copyright (C) 2016 University of Waterloo
-//
-// The contents of this file are covered under the licence agreement in the
-// file "LICENCE" distributed with Cforall.
-//
-// bits/algorithms.h -- Builtins for exception handling.
-//
-// Author           : Thierry Delisle
-// Created On       : Mon Oct 30 13:37:34 2017
-// Last Modified By : --
-// Last Modified On : --
-// Update Count     : 0
-//
-
-#pragma once
-
-#ifdef SAFE_SORT
-forall( otype T | {  int ?<?( T, T ); int ?>?( T, T ); } ) static inline void __libcfa_small_sort2( T * arr );
-forall( otype T | {  int ?<?( T, T ); int ?>?( T, T ); } ) static inline void __libcfa_small_sort3( T * arr );
-forall( otype T | {  int ?<?( T, T ); int ?>?( T, T ); } ) static inline void __libcfa_small_sort4( T * arr );
-forall( otype T | {  int ?<?( T, T ); int ?>?( T, T ); } ) static inline void __libcfa_small_sort5( T * arr );
-forall( otype T | {  int ?<?( T, T ); int ?>?( T, T ); } ) static inline void __libcfa_small_sort6( T * arr );
-forall( otype T | {  int ?<?( T, T ); int ?>?( T, T ); } ) static inline void __libcfa_small_sortN( T * arr, size_t dim );
-
-forall( otype T | {  int ?<?( T, T ); int ?>?( T, T ); } )
-static inline void __libcfa_small_sort( T * arr, size_t dim ) {
-	switch( dim ) {
-		case 1 : return;
-		case 2 : __libcfa_small_sort2( arr ); return;
-		case 3 : __libcfa_small_sort3( arr ); return;
-		case 4 : __libcfa_small_sort4( arr ); return;
-		case 5 : __libcfa_small_sort5( arr ); return;
-		case 6 : __libcfa_small_sort6( arr ); return;
-		default: __libcfa_small_sortN( arr, dim ); return;
-	}
-}
-
-#define min(x, y) (y > x ? x : y)
-#define max(x, y) (y > x ? y : x)
-#define SWAP(x,y) { T a = min(arr[x], arr[y]); T b = max(arr[x], arr[y]); arr[x] = a; arr[y] = b;}
-
-forall( otype T | {  int ?<?( T, T ); int ?>?( T, T ); } )
-static inline void __libcfa_small_sort2( T * arr ) {
-	SWAP(0, 1);
-}
-
-forall( otype T | {  int ?<?( T, T ); int ?>?( T, T ); } )
-static inline void __libcfa_small_sort3( T * arr ) {
-	SWAP(1, 2);
-	SWAP(0, 2);
-	SWAP(0, 1);
-}
-
-forall( otype T | {  int ?<?( T, T ); int ?>?( T, T ); } )
-static inline void __libcfa_small_sort4( T * arr ) {
-	SWAP(0, 1);
-	SWAP(2, 3);
-	SWAP(0, 2);
-	SWAP(1, 3);
-	SWAP(1, 2);
-}
-
-forall( otype T | {  int ?<?( T, T ); int ?>?( T, T ); } )
-static inline void __libcfa_small_sort5( T * arr ) {
-	SWAP(0, 1);
-	SWAP(3, 4);
-	SWAP(2, 4);
-	SWAP(2, 3);
-	SWAP(0, 3);
-	SWAP(0, 2);
-	SWAP(1, 4);
-	SWAP(1, 3);
-	SWAP(1, 2);
-}
-
-forall( otype T | {  int ?<?( T, T ); int ?>?( T, T ); } )
-static inline void __libcfa_small_sort6( T * arr ) {
-	SWAP(1, 2);
-	SWAP(4, 5);
-	SWAP(0, 2);
-	SWAP(3, 5);
-	SWAP(0, 1);
-	SWAP(3, 4);
-	SWAP(1, 4);
-	SWAP(0, 3);
-	SWAP(2, 5);
-	SWAP(1, 3);
-	SWAP(2, 4);
-	SWAP(2, 3);
-}
-
-forall( otype T | {  int ?<?( T, T ); int ?>?( T, T ); } )
-static inline void __libcfa_small_sortN( T * arr, size_t dim ) {
-	int i, j;
-	for (i = 1; i < dim; i++) {
-		T tmp = arr[i];
-		for (j = i; j >= 1 && tmp < arr[j-1]; j--) {
-			arr[j] = arr[j-1];
-		}
-		arr[j] = tmp;
-	}
-}
-
-#else
-
-static inline void __libcfa_small_sort2( void* * arr );
-static inline void __libcfa_small_sort3( void* * arr );
-static inline void __libcfa_small_sort4( void* * arr );
-static inline void __libcfa_small_sort5( void* * arr );
-static inline void __libcfa_small_sort6( void* * arr );
-static inline void __libcfa_small_sortN( void* * arr, size_t dim );
-
-forall( dtype T )
-static inline void __libcfa_small_sort( T* * arr, size_t dim ) {
-	switch( dim ) {
-		case 1 : return;
-		case 2 : __libcfa_small_sort2( (void **) arr ); return;
-		case 3 : __libcfa_small_sort3( (void **) arr ); return;
-		case 4 : __libcfa_small_sort4( (void **) arr ); return;
-		case 5 : __libcfa_small_sort5( (void **) arr ); return;
-		case 6 : __libcfa_small_sort6( (void **) arr ); return;
-		default: __libcfa_small_sortN( (void **) arr, dim ); return;
-	}
-}
-
-#define min(x, y) (y > x ? x : y)
-#define max(x, y) (y > x ? y : x)
-#define SWAP(x,y) { void* a = min(arr[x], arr[y]); void* b = max(arr[x], arr[y]); arr[x] = a; arr[y] = b;}
-
-static inline void __libcfa_small_sort2( void* * arr ) {
-	SWAP(0, 1);
-}
-
-static inline void __libcfa_small_sort3( void* * arr ) {
-	SWAP(1, 2);
-	SWAP(0, 2);
-	SWAP(0, 1);
-}
-
-static inline void __libcfa_small_sort4( void* * arr ) {
-	SWAP(0, 1);
-	SWAP(2, 3);
-	SWAP(0, 2);
-	SWAP(1, 3);
-	SWAP(1, 2);
-}
-
-static inline void __libcfa_small_sort5( void* * arr ) {
-	SWAP(0, 1);
-	SWAP(3, 4);
-	SWAP(2, 4);
-	SWAP(2, 3);
-	SWAP(0, 3);
-	SWAP(0, 2);
-	SWAP(1, 4);
-	SWAP(1, 3);
-	SWAP(1, 2);
-}
-
-static inline void __libcfa_small_sort6( void* * arr ) {
-	SWAP(1, 2);
-	SWAP(4, 5);
-	SWAP(0, 2);
-	SWAP(3, 5);
-	SWAP(0, 1);
-	SWAP(3, 4);
-	SWAP(1, 4);
-	SWAP(0, 3);
-	SWAP(2, 5);
-	SWAP(1, 3);
-	SWAP(2, 4);
-	SWAP(2, 3);
-}
-
-static inline void __libcfa_small_sortN( void* * arr, size_t dim ) {
-	int i, j;
-	for (i = 1; i < dim; i++) {
-		void* tmp = arr[i];
-		for (j = i; j >= 1 && tmp < arr[j-1]; j--) {
-			arr[j] = arr[j-1];
-		}
-		arr[j] = tmp;
-	}
-}
-
-#endif
-
-#undef SWAP
-#undef min
-#undef max
Index: libcfa/src/bits/align.h
===================================================================
--- libcfa/src/bits/align.h	(revision ba9baadeb7d347b3375a3c22e4ce3628359e9187)
+++ 	(revision )
@@ -1,62 +1,0 @@
-//
-// Cforall Version 1.0.0 Copyright (C) 2016 University of Waterloo
-//
-// The contents of this file are covered under the licence agreement in the
-// file "LICENCE" distributed with Cforall.
-//
-// align.h --
-//
-// Author           : Thierry Delisle
-// Created On       : Mon Nov 28 12:27:26 2016
-// Last Modified By : Peter A. Buhr
-// Last Modified On : Fri Jul 21 23:05:35 2017
-// Update Count     : 2
-//
-// This  library is free  software; you  can redistribute  it and/or  modify it
-// under the terms of the GNU Lesser General Public License as published by the
-// Free Software  Foundation; either  version 2.1 of  the License, or  (at your
-// option) any later version.
-//
-// This library is distributed in the  hope that it will be useful, but WITHOUT
-// ANY  WARRANTY;  without even  the  implied  warranty  of MERCHANTABILITY  or
-// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
-// for more details.
-//
-// You should  have received a  copy of the  GNU Lesser General  Public License
-// along  with this library.
-//
-
-#pragma once
-
-#include <assert.h>
-#include <stdbool.h>
-
-// Minimum size used to align memory boundaries for memory allocations.
-#define libAlign() (sizeof(double))
-
-// Check for power of 2
-static inline bool libPow2( unsigned long int value ) {
-    // clears all bits below value, rounding value down to the next lower multiple of value
-    return (value & (value - 1ul)) == 0ul;
-} // libPow2
-
-
-// Returns value aligned at the floor of align.
-static inline unsigned long int libFloor( unsigned long int value, unsigned long int align ) {
-    assert( libPow2( align ) );
-    // clears all bits above or equal to align, getting (value % align), the phase of value with regards to align
-    return value & -align;
-} // libFloor
-
-
-// Returns value aligned at the ceiling of align.
-
-static inline unsigned long int libCeiling( unsigned long int value, unsigned long int align ) {
-    assert( libPow2( align ) );
-    // "negate, round down, negate" is the same as round up
-    return -libFloor( -value, align );
-} // uCeiling
-
-// Local Variables: //
-// compile-command: "make install" //
-// End: //
Index: libcfa/src/bits/align.hfa
===================================================================
--- libcfa/src/bits/align.hfa	(revision 73abe950d807eab747c9e14353b158fcca827308)
+++ libcfa/src/bits/align.hfa	(revision 73abe950d807eab747c9e14353b158fcca827308)
@@ -0,0 +1,62 @@
+//
+// Cforall Version 1.0.0 Copyright (C) 2016 University of Waterloo
+//
+// The contents of this file are covered under the licence agreement in the
+// file "LICENCE" distributed with Cforall.
+//
+// align.hfa --
+//
+// Author           : Thierry Delisle
+// Created On       : Mon Nov 28 12:27:26 2016
+// Last Modified By : Peter A. Buhr
+// Last Modified On : Fri Jul 21 23:05:35 2017
+// Update Count     : 2
+//
+// This  library is free  software; you  can redistribute  it and/or  modify it
+// under the terms of the GNU Lesser General Public License as published by the
+// Free Software  Foundation; either  version 2.1 of  the License, or  (at your
+// option) any later version.
+//
+// This library is distributed in the  hope that it will be useful, but WITHOUT
+// ANY  WARRANTY;  without even  the  implied  warranty  of MERCHANTABILITY  or
+// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
+// for more details.
+//
+// You should  have received a  copy of the  GNU Lesser General  Public License
+// along  with this library.
+//
+
+#pragma once
+
+#include <assert.h>
+#include <stdbool.h>
+
+// Minimum size used to align memory boundaries for memory allocations.
+#define libAlign() (sizeof(double))
+
+// Check for power of 2
+static inline bool libPow2( unsigned long int value ) {
+    // clears all bits below value, rounding value down to the next lower multiple of value
+    return (value & (value - 1ul)) == 0ul;
+} // libPow2
+
+
+// Returns value aligned at the floor of align.
+static inline unsigned long int libFloor( unsigned long int value, unsigned long int align ) {
+    assert( libPow2( align ) );
+    // clears all bits above or equal to align, getting (value % align), the phase of value with regards to align
+    return value & -align;
+} // libFloor
+
+
+// Returns value aligned at the ceiling of align.
+
+static inline unsigned long int libCeiling( unsigned long int value, unsigned long int align ) {
+    assert( libPow2( align ) );
+    // "negate, round down, negate" is the same as round up
+    return -libFloor( -value, align );
+} // uCeiling
+
+// Local Variables: //
+// compile-command: "make install" //
+// End: //
Index: libcfa/src/bits/containers.h
===================================================================
--- libcfa/src/bits/containers.h	(revision ba9baadeb7d347b3375a3c22e4ce3628359e9187)
+++ 	(revision )
@@ -1,282 +1,0 @@
-//
-// Cforall Version 1.0.0 Copyright (C) 2016 University of Waterloo
-//
-// The contents of this file are covered under the licence agreement in the
-// file "LICENCE" distributed with Cforall.
-//
-// bits/containers.h -- Intrusive generic containers.h
-//
-// Author           : Thierry Delisle
-// Created On       : Tue Oct 31 16:38:50 2017
-// Last Modified By : --
-// Last Modified On : --
-// Update Count     : 0
-
-#pragma once
-
-#include "bits/align.h"
-#include "bits/defs.h"
-
-//-----------------------------------------------------------------------------
-// Array
-//-----------------------------------------------------------------------------
-
-#ifdef __cforall
-	forall(dtype T)
-#else
-	#define T void
-#endif
-struct __small_array {
-	T *           data;
-	__lock_size_t size;
-};
-#undef T
-
-#ifdef __cforall
-	#define __small_array_t(T) __small_array(T)
-#else
-	#define __small_array_t(T) struct __small_array
-#endif
-
-#ifdef __cforall
-	// forall(otype T | sized(T))
-	// static inline void ?{}(__small_array(T) & this) {}
-
-	forall(dtype T | sized(T))
-	static inline T& ?[?]( __small_array(T) & this, __lock_size_t idx) {
-		return ((typeof(this.data))this.data)[idx];
-	}
-
-	forall(dtype T | sized(T))
-	static inline T& ?[?]( const __small_array(T) & this, __lock_size_t idx) {
-		return ((typeof(this.data))this.data)[idx];
-	}
-
-	forall(dtype T | sized(T))
-	static inline T* begin( const __small_array(T) & this ) {
-		return ((typeof(this.data))this.data);
-	}
-
-	forall(dtype T | sized(T))
-	static inline T* end( const __small_array(T) & this ) {
-		return ((typeof(this.data))this.data) + this.size;
-	}
-#endif
-
-//-----------------------------------------------------------------------------
-// Node Base
-//-----------------------------------------------------------------------------
-
-#ifdef __cforall
-	trait is_node(dtype T) {
-		T*& get_next( T& );
-	};
-#endif
-
-//-----------------------------------------------------------------------------
-// Stack
-//-----------------------------------------------------------------------------
-#ifdef __cforall
-	forall(dtype TYPE | is_node(TYPE))
-	#define T TYPE
-#else
-	#define T void
-#endif
-struct __stack {
-	T * top;
-};
-#undef T
-
-#ifdef __cforall
-#define __stack_t(T) __stack(T)
-#else
-#define __stack_t(T) struct __stack
-#endif
-
-#ifdef __cforall
-	forall(dtype T | is_node(T))
-	static inline void ?{}( __stack(T) & this ) {
-		(this.top){ NULL };
-	}
-
-	forall(dtype T | is_node(T) | sized(T))
-	static inline void push( __stack(T) & this, T * val ) {
-		verify( !get_next( *val ) );
-		get_next( *val ) = this.top;
-		this.top = val;
-	}
-
-	forall(dtype T | is_node(T) | sized(T))
-	static inline T * pop( __stack(T) & this ) {
-		T * top = this.top;
-		if( top ) {
-			this.top = get_next( *top );
-			get_next( *top ) = NULL;
-		}
-		return top;
-	}
-#endif
-
-//-----------------------------------------------------------------------------
-// Queue
-//-----------------------------------------------------------------------------
-#ifdef __cforall
-	forall(dtype TYPE | is_node(TYPE))
-	#define T TYPE
-#else
-	#define T void
-#endif
-struct __queue {
-	T * head;
-	T ** tail;
-};
-#undef T
-
-#ifdef __cforall
-#define __queue_t(T) __queue(T)
-#else
-#define __queue_t(T) struct __queue
-#endif
-
-#ifdef __cforall
-
-	forall(dtype T | is_node(T))
-	static inline void ?{}( __queue(T) & this ) with( this ) {
-		head{ NULL };
-		tail{ &head };
-	}
-
-	forall(dtype T | is_node(T) | sized(T))
-	static inline void append( __queue(T) & this, T * val ) with( this ) {
-		verify(tail != NULL);
-		*tail = val;
-		tail = &get_next( *val );
-	}
-
-	forall(dtype T | is_node(T) | sized(T))
-	static inline T * pop_head( __queue(T) & this ) {
-		T * head = this.head;
-		if( head ) {
-			this.head = get_next( *head );
-			if( !get_next( *head ) ) {
-				this.tail = &this.head;
-			}
-			get_next( *head ) = NULL;
-		}
-		return head;
-	}
-
-	forall(dtype T | is_node(T) | sized(T))
-	static inline T * remove( __queue(T) & this, T ** it ) with( this ) {
-		T * val = *it;
-		verify( val );
-
-		(*it) = get_next( *val );
-
-		if( tail == &get_next( *val ) ) {
-			tail = it;
-		}
-
-		get_next( *val ) = NULL;
-
-		verify( (head == NULL) == (&head == tail) );
-		verify( *tail == NULL );
-		return val;
-	}
-
-	forall(dtype T | is_node(T))
-	static inline bool ?!=?( __queue(T) & this, zero_t zero ) {
-		return this.head != 0;
-	}
-#endif
-
-
-//-----------------------------------------------------------------------------
-// Doubly Linked List
-//-----------------------------------------------------------------------------
-#ifdef __cforall
-	forall(dtype TYPE | sized(TYPE))
-	#define T TYPE
-	#define __getter_t * [T * & next, T * & prev] ( T & )
-#else
-	typedef void (*__generit_c_getter_t)();
-	#define T void
-	#define __getter_t __generit_c_getter_t
-#endif
-struct __dllist {
-	T * head;
-	__getter_t __get;
-};
-#undef T
-#undef __getter_t
-
-#ifdef __cforall
-#define __dllist_t(T) __dllist(T)
-#else
-#define __dllist_t(T) struct __dllist
-#endif
-
-#ifdef __cforall
-
-	forall(dtype T | sized(T))
-	static inline [void] ?{}( __dllist(T) & this, * [T * & next, T * & prev] ( T & ) __get ) {
-		this.head{ NULL };
-		this.__get = __get;
-	}
-
-	#define next 0
-	#define prev 1
-	forall(dtype T | sized(T))
-	static inline void push_front( __dllist(T) & this, T & node ) with( this ) {
-		verify(__get);
-		if ( head ) {
-			__get( node ).next = head;
-			__get( node ).prev = __get( *head ).prev;
-			// inserted node must be consistent before it is seen
-			// prevent code movement across barrier
-			asm( "" : : : "memory" );
-			__get( *head ).prev = &node;
-			T & _prev = *__get( node ).prev;
-			__get( _prev ).next = &node;
-		}
-		else {
-			__get( node ).next = &node;
-			__get( node ).prev = &node;
-		}
-
-		// prevent code movement across barrier
-		asm( "" : : : "memory" );
-		head = &node;
-	}
-
-	forall(dtype T | sized(T))
-	static inline void remove( __dllist(T) & this, T & node ) with( this ) {
-		verify(__get);
-		if ( &node == head ) {
-			if ( __get( *head ).next == head ) {
-				head = NULL;
-			}
-			else {
-				head = __get( *head ).next;
-			}
-		}
-		__get( *__get( node ).next ).prev = __get( node ).prev;
-		__get( *__get( node ).prev ).next = __get( node ).next;
-		__get( node ).next = NULL;
-		__get( node ).prev = NULL;
-	}
-
-	forall(dtype T | sized(T))
-	static inline bool ?!=?( __dllist(T) & this, zero_t zero ) {
-		return this.head != 0;
-	}
-	#undef next
-	#undef prev
-#endif
-
-//-----------------------------------------------------------------------------
-// Tools
-//-----------------------------------------------------------------------------
-#ifdef __cforall
-
-#endif
Index: libcfa/src/bits/containers.hfa
===================================================================
--- libcfa/src/bits/containers.hfa	(revision 73abe950d807eab747c9e14353b158fcca827308)
+++ libcfa/src/bits/containers.hfa	(revision 73abe950d807eab747c9e14353b158fcca827308)
@@ -0,0 +1,282 @@
+//
+// Cforall Version 1.0.0 Copyright (C) 2016 University of Waterloo
+//
+// The contents of this file are covered under the licence agreement in the
+// file "LICENCE" distributed with Cforall.
+//
+// bits/containers.hfa -- Intrusive generic containers.hfa
+//
+// Author           : Thierry Delisle
+// Created On       : Tue Oct 31 16:38:50 2017
+// Last Modified By : --
+// Last Modified On : --
+// Update Count     : 0
+
+#pragma once
+
+#include "bits/align.hfa"
+#include "bits/defs.hfa"
+
+//-----------------------------------------------------------------------------
+// Array
+//-----------------------------------------------------------------------------
+
+#ifdef __cforall
+	forall(dtype T)
+#else
+	#define T void
+#endif
+struct __small_array {
+	T *           data;
+	__lock_size_t size;
+};
+#undef T
+
+#ifdef __cforall
+	#define __small_array_t(T) __small_array(T)
+#else
+	#define __small_array_t(T) struct __small_array
+#endif
+
+#ifdef __cforall
+	// forall(otype T | sized(T))
+	// static inline void ?{}(__small_array(T) & this) {}
+
+	forall(dtype T | sized(T))
+	static inline T& ?[?]( __small_array(T) & this, __lock_size_t idx) {
+		return ((typeof(this.data))this.data)[idx];
+	}
+
+	forall(dtype T | sized(T))
+	static inline T& ?[?]( const __small_array(T) & this, __lock_size_t idx) {
+		return ((typeof(this.data))this.data)[idx];
+	}
+
+	forall(dtype T | sized(T))
+	static inline T* begin( const __small_array(T) & this ) {
+		return ((typeof(this.data))this.data);
+	}
+
+	forall(dtype T | sized(T))
+	static inline T* end( const __small_array(T) & this ) {
+		return ((typeof(this.data))this.data) + this.size;
+	}
+#endif
+
+//-----------------------------------------------------------------------------
+// Node Base
+//-----------------------------------------------------------------------------
+
+#ifdef __cforall
+	trait is_node(dtype T) {
+		T*& get_next( T& );
+	};
+#endif
+
+//-----------------------------------------------------------------------------
+// Stack
+//-----------------------------------------------------------------------------
+#ifdef __cforall
+	forall(dtype TYPE | is_node(TYPE))
+	#define T TYPE
+#else
+	#define T void
+#endif
+struct __stack {
+	T * top;
+};
+#undef T
+
+#ifdef __cforall
+#define __stack_t(T) __stack(T)
+#else
+#define __stack_t(T) struct __stack
+#endif
+
+#ifdef __cforall
+	forall(dtype T | is_node(T))
+	static inline void ?{}( __stack(T) & this ) {
+		(this.top){ NULL };
+	}
+
+	forall(dtype T | is_node(T) | sized(T))
+	static inline void push( __stack(T) & this, T * val ) {
+		verify( !get_next( *val ) );
+		get_next( *val ) = this.top;
+		this.top = val;
+	}
+
+	forall(dtype T | is_node(T) | sized(T))
+	static inline T * pop( __stack(T) & this ) {
+		T * top = this.top;
+		if( top ) {
+			this.top = get_next( *top );
+			get_next( *top ) = NULL;
+		}
+		return top;
+	}
+#endif
+
+//-----------------------------------------------------------------------------
+// Queue
+//-----------------------------------------------------------------------------
+#ifdef __cforall
+	forall(dtype TYPE | is_node(TYPE))
+	#define T TYPE
+#else
+	#define T void
+#endif
+struct __queue {
+	T * head;
+	T ** tail;
+};
+#undef T
+
+#ifdef __cforall
+#define __queue_t(T) __queue(T)
+#else
+#define __queue_t(T) struct __queue
+#endif
+
+#ifdef __cforall
+
+	forall(dtype T | is_node(T))
+	static inline void ?{}( __queue(T) & this ) with( this ) {
+		head{ NULL };
+		tail{ &head };
+	}
+
+	forall(dtype T | is_node(T) | sized(T))
+	static inline void append( __queue(T) & this, T * val ) with( this ) {
+		verify(tail != NULL);
+		*tail = val;
+		tail = &get_next( *val );
+	}
+
+	forall(dtype T | is_node(T) | sized(T))
+	static inline T * pop_head( __queue(T) & this ) {
+		T * head = this.head;
+		if( head ) {
+			this.head = get_next( *head );
+			if( !get_next( *head ) ) {
+				this.tail = &this.head;
+			}
+			get_next( *head ) = NULL;
+		}
+		return head;
+	}
+
+	forall(dtype T | is_node(T) | sized(T))
+	static inline T * remove( __queue(T) & this, T ** it ) with( this ) {
+		T * val = *it;
+		verify( val );
+
+		(*it) = get_next( *val );
+
+		if( tail == &get_next( *val ) ) {
+			tail = it;
+		}
+
+		get_next( *val ) = NULL;
+
+		verify( (head == NULL) == (&head == tail) );
+		verify( *tail == NULL );
+		return val;
+	}
+
+	forall(dtype T | is_node(T))
+	static inline bool ?!=?( __queue(T) & this, zero_t zero ) {
+		return this.head != 0;
+	}
+#endif
+
+
+//-----------------------------------------------------------------------------
+// Doubly Linked List
+//-----------------------------------------------------------------------------
+#ifdef __cforall
+	forall(dtype TYPE | sized(TYPE))
+	#define T TYPE
+	#define __getter_t * [T * & next, T * & prev] ( T & )
+#else
+	typedef void (*__generit_c_getter_t)();
+	#define T void
+	#define __getter_t __generit_c_getter_t
+#endif
+struct __dllist {
+	T * head;
+	__getter_t __get;
+};
+#undef T
+#undef __getter_t
+
+#ifdef __cforall
+#define __dllist_t(T) __dllist(T)
+#else
+#define __dllist_t(T) struct __dllist
+#endif
+
+#ifdef __cforall
+
+	forall(dtype T | sized(T))
+	static inline [void] ?{}( __dllist(T) & this, * [T * & next, T * & prev] ( T & ) __get ) {
+		this.head{ NULL };
+		this.__get = __get;
+	}
+
+	#define next 0
+	#define prev 1
+	forall(dtype T | sized(T))
+	static inline void push_front( __dllist(T) & this, T & node ) with( this ) {
+		verify(__get);
+		if ( head ) {
+			__get( node ).next = head;
+			__get( node ).prev = __get( *head ).prev;
+			// inserted node must be consistent before it is seen
+			// prevent code movement across barrier
+			asm( "" : : : "memory" );
+			__get( *head ).prev = &node;
+			T & _prev = *__get( node ).prev;
+			__get( _prev ).next = &node;
+		}
+		else {
+			__get( node ).next = &node;
+			__get( node ).prev = &node;
+		}
+
+		// prevent code movement across barrier
+		asm( "" : : : "memory" );
+		head = &node;
+	}
+
+	forall(dtype T | sized(T))
+	static inline void remove( __dllist(T) & this, T & node ) with( this ) {
+		verify(__get);
+		if ( &node == head ) {
+			if ( __get( *head ).next == head ) {
+				head = NULL;
+			}
+			else {
+				head = __get( *head ).next;
+			}
+		}
+		__get( *__get( node ).next ).prev = __get( node ).prev;
+		__get( *__get( node ).prev ).next = __get( node ).next;
+		__get( node ).next = NULL;
+		__get( node ).prev = NULL;
+	}
+
+	forall(dtype T | sized(T))
+	static inline bool ?!=?( __dllist(T) & this, zero_t zero ) {
+		return this.head != 0;
+	}
+	#undef next
+	#undef prev
+#endif
+
+//-----------------------------------------------------------------------------
+// Tools
+//-----------------------------------------------------------------------------
+#ifdef __cforall
+
+#endif
Index: libcfa/src/bits/debug.h
===================================================================
--- libcfa/src/bits/debug.h	(revision ba9baadeb7d347b3375a3c22e4ce3628359e9187)
+++ 	(revision )
@@ -1,74 +1,0 @@
-//
-// Cforall Version 1.0.0 Copyright (C) 2016 University of Waterloo
-//
-// The contents of this file are covered under the licence agreement in the
-// file "LICENCE" distributed with Cforall.
-//
-// debug.h --
-//
-// Author           : Thierry Delisle
-// Created On       : Mon Nov 28 12:27:26 2016
-// Last Modified By : Peter A. Buhr
-// Last Modified On : Thu Feb  8 12:35:19 2018
-// Update Count     : 2
-//
-
-#pragma once
-
-#ifdef __CFA_DEBUG__
-	#define __cfaabi_dbg_debug_do(...) __VA_ARGS__
-	#define __cfaabi_dbg_no_debug_do(...)
-	#define __cfaabi_dbg_ctx __PRETTY_FUNCTION__
-	#define __cfaabi_dbg_ctx2 , __PRETTY_FUNCTION__
-	#define __cfaabi_dbg_ctx_param const char * caller
-	#define __cfaabi_dbg_ctx_param2 , const char * caller
-#else
-	#define __cfaabi_dbg_debug_do(...)
-	#define __cfaabi_dbg_no_debug_do(...) __VA_ARGS__
-	#define __cfaabi_dbg_ctx
-	#define __cfaabi_dbg_ctx2
-	#define __cfaabi_dbg_ctx_param
-	#define __cfaabi_dbg_ctx_param2
-#endif
-
-#ifdef __cforall
-extern "C" {
-#endif
-	#include <stdarg.h>
-	#include <stdio.h>
-
-      extern void __cfaabi_dbg_bits_write( const char *buffer, int len );
-      extern void __cfaabi_dbg_bits_acquire();
-      extern void __cfaabi_dbg_bits_release();
-      extern void __cfaabi_dbg_bits_print_safe  ( const char fmt[], ... ) __attribute__(( format(printf, 1, 2) ));
-      extern void __cfaabi_dbg_bits_print_nolock( const char fmt[], ... ) __attribute__(( format(printf, 1, 2) ));
-      extern void __cfaabi_dbg_bits_print_vararg( const char fmt[], va_list arg );
-      extern void __cfaabi_dbg_bits_print_buffer( char buffer[], int buffer_size, const char fmt[], ... ) __attribute__(( format(printf, 3, 4) ));
-#ifdef __cforall
-}
-#endif
-
-#ifdef __CFA_DEBUG_PRINT__
-	#define __cfaabi_dbg_write( buffer, len )         __cfaabi_dbg_bits_write( buffer, len )
-	#define __cfaabi_dbg_acquire()                    __cfaabi_dbg_bits_acquire()
-	#define __cfaabi_dbg_release()                    __cfaabi_dbg_bits_release()
-	#define __cfaabi_dbg_print_safe(...)              __cfaabi_dbg_bits_print_safe   (__VA_ARGS__)
-	#define __cfaabi_dbg_print_nolock(...)            __cfaabi_dbg_bits_print_nolock (__VA_ARGS__)
-	#define __cfaabi_dbg_print_buffer(...)            __cfaabi_dbg_bits_print_buffer (__VA_ARGS__)
-	#define __cfaabi_dbg_print_buffer_decl(...)       char __dbg_text[256]; int __dbg_len = snprintf( __dbg_text, 256, __VA_ARGS__ ); __cfaabi_dbg_bits_write( __dbg_text, __dbg_len );
-	#define __cfaabi_dbg_print_buffer_local(...)      __dbg_len = snprintf( __dbg_text, 256, __VA_ARGS__ ); __cfaabi_dbg_bits_write( __dbg_text, __dbg_len );
-#else
-	#define __cfaabi_dbg_write(...)               ((void)0)
-	#define __cfaabi_dbg_acquire()                ((void)0)
-	#define __cfaabi_dbg_release()                ((void)0)
-	#define __cfaabi_dbg_print_safe(...)          ((void)0)
-	#define __cfaabi_dbg_print_nolock(...)        ((void)0)
-	#define __cfaabi_dbg_print_buffer(...)        ((void)0)
-	#define __cfaabi_dbg_print_buffer_decl(...)   ((void)0)
-	#define __cfaabi_dbg_print_buffer_local(...)  ((void)0)
-#endif
-
-// Local Variables: //
-// mode: c //
-// tab-width: 4 //
-// End: //
Index: libcfa/src/bits/debug.hfa
===================================================================
--- libcfa/src/bits/debug.hfa	(revision 73abe950d807eab747c9e14353b158fcca827308)
+++ libcfa/src/bits/debug.hfa	(revision 73abe950d807eab747c9e14353b158fcca827308)
@@ -0,0 +1,74 @@
+//
+// Cforall Version 1.0.0 Copyright (C) 2016 University of Waterloo
+//
+// The contents of this file are covered under the licence agreement in the
+// file "LICENCE" distributed with Cforall.
+//
+// debug.hfa --
+//
+// Author           : Thierry Delisle
+// Created On       : Mon Nov 28 12:27:26 2016
+// Last Modified By : Peter A. Buhr
+// Last Modified On : Thu Feb  8 12:35:19 2018
+// Update Count     : 2
+//
+
+#pragma once
+
+#ifdef __CFA_DEBUG__
+	#define __cfaabi_dbg_debug_do(...) __VA_ARGS__
+	#define __cfaabi_dbg_no_debug_do(...)
+	#define __cfaabi_dbg_ctx __PRETTY_FUNCTION__
+	#define __cfaabi_dbg_ctx2 , __PRETTY_FUNCTION__
+	#define __cfaabi_dbg_ctx_param const char * caller
+	#define __cfaabi_dbg_ctx_param2 , const char * caller
+#else
+	#define __cfaabi_dbg_debug_do(...)
+	#define __cfaabi_dbg_no_debug_do(...) __VA_ARGS__
+	#define __cfaabi_dbg_ctx
+	#define __cfaabi_dbg_ctx2
+	#define __cfaabi_dbg_ctx_param
+	#define __cfaabi_dbg_ctx_param2
+#endif
+
+#ifdef __cforall
+extern "C" {
+#endif
+	#include <stdarg.h>
+	#include <stdio.h>
+
+      extern void __cfaabi_dbg_bits_write( const char *buffer, int len );
+      extern void __cfaabi_dbg_bits_acquire();
+      extern void __cfaabi_dbg_bits_release();
+      extern void __cfaabi_dbg_bits_print_safe  ( const char fmt[], ... ) __attribute__(( format(printf, 1, 2) ));
+      extern void __cfaabi_dbg_bits_print_nolock( const char fmt[], ... ) __attribute__(( format(printf, 1, 2) ));
+      extern void __cfaabi_dbg_bits_print_vararg( const char fmt[], va_list arg );
+      extern void __cfaabi_dbg_bits_print_buffer( char buffer[], int buffer_size, const char fmt[], ... ) __attribute__(( format(printf, 3, 4) ));
+#ifdef __cforall
+}
+#endif
+
+#ifdef __CFA_DEBUG_PRINT__
+	#define __cfaabi_dbg_write( buffer, len )         __cfaabi_dbg_bits_write( buffer, len )
+	#define __cfaabi_dbg_acquire()                    __cfaabi_dbg_bits_acquire()
+	#define __cfaabi_dbg_release()                    __cfaabi_dbg_bits_release()
+	#define __cfaabi_dbg_print_safe(...)              __cfaabi_dbg_bits_print_safe   (__VA_ARGS__)
+	#define __cfaabi_dbg_print_nolock(...)            __cfaabi_dbg_bits_print_nolock (__VA_ARGS__)
+	#define __cfaabi_dbg_print_buffer(...)            __cfaabi_dbg_bits_print_buffer (__VA_ARGS__)
+	#define __cfaabi_dbg_print_buffer_decl(...)       char __dbg_text[256]; int __dbg_len = snprintf( __dbg_text, 256, __VA_ARGS__ ); __cfaabi_dbg_bits_write( __dbg_text, __dbg_len );
+	#define __cfaabi_dbg_print_buffer_local(...)      __dbg_len = snprintf( __dbg_text, 256, __VA_ARGS__ ); __cfaabi_dbg_bits_write( __dbg_text, __dbg_len );
+#else
+	#define __cfaabi_dbg_write(...)               ((void)0)
+	#define __cfaabi_dbg_acquire()                ((void)0)
+	#define __cfaabi_dbg_release()                ((void)0)
+	#define __cfaabi_dbg_print_safe(...)          ((void)0)
+	#define __cfaabi_dbg_print_nolock(...)        ((void)0)
+	#define __cfaabi_dbg_print_buffer(...)        ((void)0)
+	#define __cfaabi_dbg_print_buffer_decl(...)   ((void)0)
+	#define __cfaabi_dbg_print_buffer_local(...)  ((void)0)
+#endif
+
+// Local Variables: //
+// mode: c //
+// tab-width: 4 //
+// End: //
Index: libcfa/src/bits/defs.h
===================================================================
--- libcfa/src/bits/defs.h	(revision ba9baadeb7d347b3375a3c22e4ce3628359e9187)
+++ 	(revision )
@@ -1,42 +1,0 @@
-//
-// Cforall Version 1.0.0 Copyright (C) 2016 University of Waterloo
-//
-// The contents of this file are covered under the licence agreement in the
-// file "LICENCE" distributed with Cforall.
-//
-// defs.h --
-//
-// Author           : Thierry Delisle
-// Created On       : Thu Nov  9 13:24:10 2017
-// Last Modified By : Peter A. Buhr
-// Last Modified On : Thu Feb  8 16:22:41 2018
-// Update Count     : 8
-//
-
-#pragma once
-
-#include <stdbool.h>
-#include <stddef.h>
-#include <stdint.h>
-
-#define likely(x)   __builtin_expect(!!(x), 1)
-#define unlikely(x) __builtin_expect(!!(x), 0)
-#define thread_local _Thread_local
-
-typedef void (*fptr_t)();
-typedef int_fast16_t __lock_size_t;
-
-#ifdef __cforall
-#define __cfa_anonymous_object(x) inline struct x
-#else
-#define __cfa_anonymous_object(x) x __cfa_anonymous_object
-#endif
-
-#ifdef __cforall
-void abort ( const char fmt[], ... ) __attribute__ (( format(printf, 1, 2), __nothrow__, __leaf__, __noreturn__ ));
-extern "C" {
-#endif
-void __cabi_abort( const char fmt[], ... ) __attribute__ (( format(printf, 1, 2), __nothrow__, __leaf__, __noreturn__ ));
-#ifdef __cforall
-}
-#endif
Index: libcfa/src/bits/defs.hfa
===================================================================
--- libcfa/src/bits/defs.hfa	(revision 73abe950d807eab747c9e14353b158fcca827308)
+++ libcfa/src/bits/defs.hfa	(revision 73abe950d807eab747c9e14353b158fcca827308)
@@ -0,0 +1,42 @@
+//
+// Cforall Version 1.0.0 Copyright (C) 2016 University of Waterloo
+//
+// The contents of this file are covered under the licence agreement in the
+// file "LICENCE" distributed with Cforall.
+//
+// defs.hfa --
+//
+// Author           : Thierry Delisle
+// Created On       : Thu Nov  9 13:24:10 2017
+// Last Modified By : Peter A. Buhr
+// Last Modified On : Thu Feb  8 16:22:41 2018
+// Update Count     : 8
+//
+
+#pragma once
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#define likely(x)   __builtin_expect(!!(x), 1)
+#define unlikely(x) __builtin_expect(!!(x), 0)
+#define thread_local _Thread_local
+
+typedef void (*fptr_t)();
+typedef int_fast16_t __lock_size_t;
+
+#ifdef __cforall
+#define __cfa_anonymous_object(x) inline struct x
+#else
+#define __cfa_anonymous_object(x) x __cfa_anonymous_object
+#endif
+
+#ifdef __cforall
+void abort ( const char fmt[], ... ) __attribute__ (( format(printf, 1, 2), __nothrow__, __leaf__, __noreturn__ ));
+extern "C" {
+#endif
+void __cabi_abort( const char fmt[], ... ) __attribute__ (( format(printf, 1, 2), __nothrow__, __leaf__, __noreturn__ ));
+#ifdef __cforall
+}
+#endif
Index: libcfa/src/bits/locks.h
===================================================================
--- libcfa/src/bits/locks.h	(revision ba9baadeb7d347b3375a3c22e4ce3628359e9187)
+++ 	(revision )
@@ -1,165 +1,0 @@
-//
-// Cforall Version 1.0.0 Copyright (C) 2016 University of Waterloo
-//
-// The contents of this file are covered under the licence agreement in the
-// file "LICENCE" distributed with Cforall.
-//
-// bits/locks.h -- Fast internal locks.
-//
-// Author           : Thierry Delisle
-// Created On       : Tue Oct 31 15:14:38 2017
-// Last Modified By : Peter A. Buhr
-// Last Modified On : Fri Mar 30 18:18:13 2018
-// Update Count     : 9
-//
-
-#pragma once
-
-#include "bits/debug.h"
-#include "bits/defs.h"
-#include <assert.h>
-
-#ifdef __cforall
-	extern "C" {
-		#include <pthread.h>
-	}
-#endif
-
-// pause to prevent excess processor bus usage
-#if defined( __sparc )
-	#define Pause() __asm__ __volatile__ ( "rd %ccr,%g0" )
-#elif defined( __i386 ) || defined( __x86_64 )
-	#define Pause() __asm__ __volatile__ ( "pause" : : : )
-#elif defined( __ARM_ARCH )
-	#define Pause() __asm__ __volatile__ ( "nop" : : : )
-#else
-	#error unsupported architecture
-#endif
-
-#if defined( __i386 ) || defined( __x86_64 ) || defined( __ARM_ARCH )
-	// Intel recommendation
-	#define __ALIGN__ __attribute__(( aligned (128) ))
-#elif defined( __sparc )
-	#define __ALIGN__ CALIGN
-#else
-	#error unsupported architecture
-#endif
-
-struct __spinlock_t {
-	// Wrap in struct to prevent false sharing with debug info
-	struct {
-		// Align lock on 128-bit boundary
-		__ALIGN__ volatile _Bool lock;
-	};
-	#ifdef __CFA_DEBUG__
-		// previous function to acquire the lock
-		const char * prev_name;
-		// previous thread to acquire the lock
-		void* prev_thrd;
-	#endif
-} __ALIGN__;
-
-#ifdef __cforall
-	extern "C" {
-		extern void disable_interrupts();
-		extern void enable_interrupts_noPoll();
-	}
-
-	extern void yield( unsigned int );
-
-	static inline void ?{}( __spinlock_t & this ) {
-		this.lock = 0;
-	}
-
-
-	#ifdef __CFA_DEBUG__
-		void __cfaabi_dbg_record(__spinlock_t & this, const char * prev_name);
-	#else
-		#define __cfaabi_dbg_record(x, y)
-	#endif
-
-	// Lock the spinlock, return false if already acquired
-	static inline _Bool try_lock  ( __spinlock_t & this __cfaabi_dbg_ctx_param2 ) {
-		_Bool result = (this.lock == 0) && (__atomic_test_and_set( &this.lock, __ATOMIC_ACQUIRE ) == 0);
-		if( result ) {
-			disable_interrupts();
-			__cfaabi_dbg_record( this, caller );
-		}
-		return result;
-	}
-
-	// Lock the spinlock, spin if already acquired
-	static inline void lock( __spinlock_t & this __cfaabi_dbg_ctx_param2 ) {
-		#ifndef NOEXPBACK
-			enum { SPIN_START = 4, SPIN_END = 64 * 1024, };
-			unsigned int spin = SPIN_START;
-		#endif
-
-		for ( unsigned int i = 1;; i += 1 ) {
-			if ( (this.lock == 0) && (__atomic_test_and_set( &this.lock, __ATOMIC_ACQUIRE ) == 0) ) break;
-			#ifndef NOEXPBACK
-				// exponential spin
-				for ( volatile unsigned int s = 0; s < spin; s += 1 ) Pause();
-
-				// slowly increase by powers of 2
-				if ( i % 64 == 0 ) spin += spin;
-
-				// prevent overflow
-				if ( spin > SPIN_END ) spin = SPIN_START;
-			#else
-				Pause();
-			#endif
-		}
-		disable_interrupts();
-		__cfaabi_dbg_record( this, caller );
-	}
-
-	static inline void unlock( __spinlock_t & this ) {
-		enable_interrupts_noPoll();
-		__atomic_clear( &this.lock, __ATOMIC_RELEASE );
-	}
-
-
-	#ifdef __CFA_WITH_VERIFY__
-		extern bool __cfaabi_dbg_in_kernel();
-	#endif
-
-	struct __bin_sem_t {
-		bool     		signaled;
-		pthread_mutex_t 	lock;
-		pthread_cond_t  	cond;
-	};
-
-	static inline void ?{}(__bin_sem_t & this) with( this ) {
-		signaled = false;
-		pthread_mutex_init(&lock, NULL);
-		pthread_cond_init (&cond, NULL);
-	}
-
-	static inline void ^?{}(__bin_sem_t & this) with( this ) {
-		pthread_mutex_destroy(&lock);
-		pthread_cond_destroy (&cond);
-	}
-
-	static inline void wait(__bin_sem_t & this) with( this ) {
-		verify(__cfaabi_dbg_in_kernel());
-		pthread_mutex_lock(&lock);
-			if(!signaled) {   // this must be a loop, not if!
-				pthread_cond_wait(&cond, &lock);
-			}
-			signaled = false;
-		pthread_mutex_unlock(&lock);
-	}
-
-	static inline void post(__bin_sem_t & this) with( this ) {
-		verify(__cfaabi_dbg_in_kernel());
-
-		pthread_mutex_lock(&lock);
-			bool needs_signal = !signaled;
-			signaled = true;
-		pthread_mutex_unlock(&lock);
-
-		if (needs_signal)
-			pthread_cond_signal(&cond);
-	}
-#endif
Index: libcfa/src/bits/locks.hfa
===================================================================
--- libcfa/src/bits/locks.hfa	(revision 73abe950d807eab747c9e14353b158fcca827308)
+++ libcfa/src/bits/locks.hfa	(revision 73abe950d807eab747c9e14353b158fcca827308)
@@ -0,0 +1,165 @@
+//
+// Cforall Version 1.0.0 Copyright (C) 2016 University of Waterloo
+//
+// The contents of this file are covered under the licence agreement in the
+// file "LICENCE" distributed with Cforall.
+//
+// bits/locks.hfa -- Fast internal locks.
+//
+// Author           : Thierry Delisle
+// Created On       : Tue Oct 31 15:14:38 2017
+// Last Modified By : Peter A. Buhr
+// Last Modified On : Fri Mar 30 18:18:13 2018
+// Update Count     : 9
+//
+
+#pragma once
+
+#include "bits/debug.hfa"
+#include "bits/defs.hfa"
+#include <assert.h>
+
+#ifdef __cforall
+	extern "C" {
+		#include <pthread.h>
+	}
+#endif
+
+// pause to prevent excess processor bus usage
+#if defined( __sparc )
+	#define Pause() __asm__ __volatile__ ( "rd %ccr,%g0" )
+#elif defined( __i386 ) || defined( __x86_64 )
+	#define Pause() __asm__ __volatile__ ( "pause" : : : )
+#elif defined( __ARM_ARCH )
+	#define Pause() __asm__ __volatile__ ( "nop" : : : )
+#else
+	#error unsupported architecture
+#endif
+
+#if defined( __i386 ) || defined( __x86_64 ) || defined( __ARM_ARCH )
+	// Intel recommendation
+	#define __ALIGN__ __attribute__(( aligned (128) ))
+#elif defined( __sparc )
+	#define __ALIGN__ CALIGN
+#else
+	#error unsupported architecture
+#endif
+
+struct __spinlock_t {
+	// Wrap in struct to prevent false sharing with debug info
+	struct {
+		// Align lock on 128-bit boundary
+		__ALIGN__ volatile _Bool lock;
+	};
+	#ifdef __CFA_DEBUG__
+		// previous function to acquire the lock
+		const char * prev_name;
+		// previous thread to acquire the lock
+		void* prev_thrd;
+	#endif
+} __ALIGN__;
+
+#ifdef __cforall
+	extern "C" {
+		extern void disable_interrupts();
+		extern void enable_interrupts_noPoll();
+	}
+
+	extern void yield( unsigned int );
+
+	static inline void ?{}( __spinlock_t & this ) {
+		this.lock = 0;
+	}
+
+
+	#ifdef __CFA_DEBUG__
+		void __cfaabi_dbg_record(__spinlock_t & this, const char * prev_name);
+	#else
+		#define __cfaabi_dbg_record(x, y)
+	#endif
+
+	// Lock the spinlock, return false if already acquired
+	static inline _Bool try_lock  ( __spinlock_t & this __cfaabi_dbg_ctx_param2 ) {
+		_Bool result = (this.lock == 0) && (__atomic_test_and_set( &this.lock, __ATOMIC_ACQUIRE ) == 0);
+		if( result ) {
+			disable_interrupts();
+			__cfaabi_dbg_record( this, caller );
+		}
+		return result;
+	}
+
+	// Lock the spinlock, spin if already acquired
+	static inline void lock( __spinlock_t & this __cfaabi_dbg_ctx_param2 ) {
+		#ifndef NOEXPBACK
+			enum { SPIN_START = 4, SPIN_END = 64 * 1024, };
+			unsigned int spin = SPIN_START;
+		#endif
+
+		for ( unsigned int i = 1;; i += 1 ) {
+			if ( (this.lock == 0) && (__atomic_test_and_set( &this.lock, __ATOMIC_ACQUIRE ) == 0) ) break;
+			#ifndef NOEXPBACK
+				// exponential spin
+				for ( volatile unsigned int s = 0; s < spin; s += 1 ) Pause();
+
+				// slowly increase by powers of 2
+				if ( i % 64 == 0 ) spin += spin;
+
+				// prevent overflow
+				if ( spin > SPIN_END ) spin = SPIN_START;
+			#else
+				Pause();
+			#endif
+		}
+		disable_interrupts();
+		__cfaabi_dbg_record( this, caller );
+	}
+
+	static inline void unlock( __spinlock_t & this ) {
+		enable_interrupts_noPoll();
+		__atomic_clear( &this.lock, __ATOMIC_RELEASE );
+	}
+
+
+	#ifdef __CFA_WITH_VERIFY__
+		extern bool __cfaabi_dbg_in_kernel();
+	#endif
+
+	struct __bin_sem_t {
+		bool     		signaled;
+		pthread_mutex_t 	lock;
+		pthread_cond_t  	cond;
+	};
+
+	static inline void ?{}(__bin_sem_t & this) with( this ) {
+		signaled = false;
+		pthread_mutex_init(&lock, NULL);
+		pthread_cond_init (&cond, NULL);
+	}
+
+	static inline void ^?{}(__bin_sem_t & this) with( this ) {
+		pthread_mutex_destroy(&lock);
+		pthread_cond_destroy (&cond);
+	}
+
+	static inline void wait(__bin_sem_t & this) with( this ) {
+		verify(__cfaabi_dbg_in_kernel());
+		pthread_mutex_lock(&lock);
+			if(!signaled) {   // this must be a loop, not if!
+				pthread_cond_wait(&cond, &lock);
+			}
+			signaled = false;
+		pthread_mutex_unlock(&lock);
+	}
+
+	static inline void post(__bin_sem_t & this) with( this ) {
+		verify(__cfaabi_dbg_in_kernel());
+
+		pthread_mutex_lock(&lock);
+			bool needs_signal = !signaled;
+			signaled = true;
+		pthread_mutex_unlock(&lock);
+
+		if (needs_signal)
+			pthread_cond_signal(&cond);
+	}
+#endif
Index: libcfa/src/bits/signal.h
===================================================================
--- libcfa/src/bits/signal.h	(revision ba9baadeb7d347b3375a3c22e4ce3628359e9187)
+++ 	(revision )
@@ -1,65 +1,0 @@
-//
-// Cforall Version 1.0.0 Copyright (C) 2016 University of Waterloo
-//
-// The contents of this file are covered under the licence agreement in the
-// file "LICENCE" distributed with Cforall.
-//
-// bits/signal.h -- Helper functions and defines to use signals
-//
-// Author           : Thierry Delisle
-// Created On       : Thu Jan 25 16:06:29 2018
-// Last Modified By :
-// Last Modified On :
-// Update Count     :
-//
-
-#pragma once
-
-#include "bits/debug.h"
-#include "bits/defs.h"
-
-extern "C" {
-#include <errno.h>
-#define __USE_GNU
-#include <signal.h>
-#undef __USE_GNU
-#include <stdlib.h>
-#include <string.h>
-}
-
-// Short hands for signal context information
-#define __CFA_SIGCXT__ ucontext_t *
-#define __CFA_SIGPARMS__ __attribute__((unused)) int sig, __attribute__((unused)) siginfo_t *sfp, __attribute__((unused)) __CFA_SIGCXT__ cxt
-
-// Sigaction wrapper : register an signal handler
-static void __cfaabi_sigaction( int sig, void (*handler)(__CFA_SIGPARMS__), int flags ) {
-	struct sigaction act;
-
-	act.sa_sigaction = (void (*)(int, siginfo_t *, void *))handler;
-	act.sa_flags = flags;
-
-	if ( sigaction( sig, &act, NULL ) == -1 ) {
-		__cfaabi_dbg_print_buffer_decl(
-			" __cfaabi_sigaction( sig:%d, handler:%p, flags:%d ), problem installing signal handler, error(%d) %s.\n",
-			sig, handler, flags, errno, strerror( errno )
-		);
-		_exit( EXIT_FAILURE );
-	}
-}
-
-// Sigaction wrapper : restore default handler
-static void __cfaabi_sigdefault( int sig ) {
-	struct sigaction act;
-
-	act.sa_handler = SIG_DFL;
-	act.sa_flags = 0;
-	sigemptyset( &act.sa_mask );
-
-	if ( sigaction( sig, &act, NULL ) == -1 ) {
-		__cfaabi_dbg_print_buffer_decl(
-			" __cfaabi_sigdefault( sig:%d ), problem reseting signal handler, error(%d) %s.\n",
-			sig, errno, strerror( errno )
-		);
-		_exit( EXIT_FAILURE );
-	}
-}
Index: libcfa/src/bits/signal.hfa
===================================================================
--- libcfa/src/bits/signal.hfa	(revision 73abe950d807eab747c9e14353b158fcca827308)
+++ libcfa/src/bits/signal.hfa	(revision 73abe950d807eab747c9e14353b158fcca827308)
@@ -0,0 +1,65 @@
+//
+// Cforall Version 1.0.0 Copyright (C) 2016 University of Waterloo
+//
+// The contents of this file are covered under the licence agreement in the
+// file "LICENCE" distributed with Cforall.
+//
+// bits/signal.hfa -- Helper functions and defines to use signals
+//
+// Author           : Thierry Delisle
+// Created On       : Thu Jan 25 16:06:29 2018
+// Last Modified By :
+// Last Modified On :
+// Update Count     :
+//
+
+#pragma once
+
+#include "bits/debug.hfa"
+#include "bits/defs.hfa"
+
+extern "C" {
+#include <errno.h>
+#define __USE_GNU
+#include <signal.hfa>
+#undef __USE_GNU
+#include <stdlib.h>
+#include <string.h>
+}
+
+// Short hands for signal context information
+#define __CFA_SIGCXT__ ucontext_t *
+#define __CFA_SIGPARMS__ __attribute__((unused)) int sig, __attribute__((unused)) siginfo_t *sfp, __attribute__((unused)) __CFA_SIGCXT__ cxt
+
+// Sigaction wrapper : register an signal handler
+static void __cfaabi_sigaction( int sig, void (*handler)(__CFA_SIGPARMS__), int flags ) {
+	struct sigaction act;
+
+	act.sa_sigaction = (void (*)(int, siginfo_t *, void *))handler;
+	act.sa_flags = flags;
+
+	if ( sigaction( sig, &act, NULL ) == -1 ) {
+		__cfaabi_dbg_print_buffer_decl(
+			" __cfaabi_sigaction( sig:%d, handler:%p, flags:%d ), problem installing signal handler, error(%d) %s.\n",
+			sig, handler, flags, errno, strerror( errno )
+		);
+		_exit( EXIT_FAILURE );
+	}
+}
+
+// Sigaction wrapper : restore default handler
+static void __cfaabi_sigdefault( int sig ) {
+	struct sigaction act;
+
+	act.sa_handler = SIG_DFL;
+	act.sa_flags = 0;
+	sigemptyset( &act.sa_mask );
+
+	if ( sigaction( sig, &act, NULL ) == -1 ) {
+		__cfaabi_dbg_print_buffer_decl(
+			" __cfaabi_sigdefault( sig:%d ), problem reseting signal handler, error(%d) %s.\n",
+			sig, errno, strerror( errno )
+		);
+		_exit( EXIT_FAILURE );
+	}
+}
