Index: libcfa/src/bits/algorithms.h
===================================================================
--- libcfa/src/bits/algorithms.h	(revision bf71cfdb7285490eee552b461158846f626cc52f)
+++ libcfa/src/bits/algorithms.h	(revision bf71cfdb7285490eee552b461158846f626cc52f)
@@ -0,0 +1,191 @@
+//
+// Cforall Version 1.0.0 Copyright (C) 2016 University of Waterloo
+//
+// The contents of this file are covered under the licence agreement in the
+// file "LICENCE" distributed with Cforall.
+//
+// bits/algorithms.h -- Builtins for exception handling.
+//
+// Author           : Thierry Delisle
+// Created On       : Mon Oct 30 13:37:34 2017
+// Last Modified By : --
+// Last Modified On : --
+// Update Count     : 0
+//
+
+#pragma once
+
+#ifdef SAFE_SORT
+forall( otype T | {  int ?<?( T, T ); int ?>?( T, T ); } ) static inline void __libcfa_small_sort2( T * arr );
+forall( otype T | {  int ?<?( T, T ); int ?>?( T, T ); } ) static inline void __libcfa_small_sort3( T * arr );
+forall( otype T | {  int ?<?( T, T ); int ?>?( T, T ); } ) static inline void __libcfa_small_sort4( T * arr );
+forall( otype T | {  int ?<?( T, T ); int ?>?( T, T ); } ) static inline void __libcfa_small_sort5( T * arr );
+forall( otype T | {  int ?<?( T, T ); int ?>?( T, T ); } ) static inline void __libcfa_small_sort6( T * arr );
+forall( otype T | {  int ?<?( T, T ); int ?>?( T, T ); } ) static inline void __libcfa_small_sortN( T * arr, size_t dim );
+
+forall( otype T | {  int ?<?( T, T ); int ?>?( T, T ); } )
+static inline void __libcfa_small_sort( T * arr, size_t dim ) {
+	switch( dim ) {
+		case 1 : return;
+		case 2 : __libcfa_small_sort2( arr ); return;
+		case 3 : __libcfa_small_sort3( arr ); return;
+		case 4 : __libcfa_small_sort4( arr ); return;
+		case 5 : __libcfa_small_sort5( arr ); return;
+		case 6 : __libcfa_small_sort6( arr ); return;
+		default: __libcfa_small_sortN( arr, dim ); return;
+	}
+}
+
+#define min(x, y) (y > x ? x : y)
+#define max(x, y) (y > x ? y : x)
+#define SWAP(x,y) { T a = min(arr[x], arr[y]); T b = max(arr[x], arr[y]); arr[x] = a; arr[y] = b;}
+
+forall( otype T | {  int ?<?( T, T ); int ?>?( T, T ); } )
+static inline void __libcfa_small_sort2( T * arr ) {
+	SWAP(0, 1);
+}
+
+forall( otype T | {  int ?<?( T, T ); int ?>?( T, T ); } )
+static inline void __libcfa_small_sort3( T * arr ) {
+	SWAP(1, 2);
+	SWAP(0, 2);
+	SWAP(0, 1);
+}
+
+forall( otype T | {  int ?<?( T, T ); int ?>?( T, T ); } )
+static inline void __libcfa_small_sort4( T * arr ) {
+	SWAP(0, 1);
+	SWAP(2, 3);
+	SWAP(0, 2);
+	SWAP(1, 3);
+	SWAP(1, 2);
+}
+
+forall( otype T | {  int ?<?( T, T ); int ?>?( T, T ); } )
+static inline void __libcfa_small_sort5( T * arr ) {
+	SWAP(0, 1);
+	SWAP(3, 4);
+	SWAP(2, 4);
+	SWAP(2, 3);
+	SWAP(0, 3);
+	SWAP(0, 2);
+	SWAP(1, 4);
+	SWAP(1, 3);
+	SWAP(1, 2);
+}
+
+forall( otype T | {  int ?<?( T, T ); int ?>?( T, T ); } )
+static inline void __libcfa_small_sort6( T * arr ) {
+	SWAP(1, 2);
+	SWAP(4, 5);
+	SWAP(0, 2);
+	SWAP(3, 5);
+	SWAP(0, 1);
+	SWAP(3, 4);
+	SWAP(1, 4);
+	SWAP(0, 3);
+	SWAP(2, 5);
+	SWAP(1, 3);
+	SWAP(2, 4);
+	SWAP(2, 3);
+}
+
+forall( otype T | {  int ?<?( T, T ); int ?>?( T, T ); } )
+static inline void __libcfa_small_sortN( T * arr, size_t dim ) {
+	int i, j;
+	for (i = 1; i < dim; i++) {
+		T tmp = arr[i];
+		for (j = i; j >= 1 && tmp < arr[j-1]; j--) {
+			arr[j] = arr[j-1];
+		}
+		arr[j] = tmp;
+	}
+}
+
+#else
+
+static inline void __libcfa_small_sort2( void* * arr );
+static inline void __libcfa_small_sort3( void* * arr );
+static inline void __libcfa_small_sort4( void* * arr );
+static inline void __libcfa_small_sort5( void* * arr );
+static inline void __libcfa_small_sort6( void* * arr );
+static inline void __libcfa_small_sortN( void* * arr, size_t dim );
+
+forall( dtype T )
+static inline void __libcfa_small_sort( T* * arr, size_t dim ) {
+	switch( dim ) {
+		case 1 : return;
+		case 2 : __libcfa_small_sort2( (void **) arr ); return;
+		case 3 : __libcfa_small_sort3( (void **) arr ); return;
+		case 4 : __libcfa_small_sort4( (void **) arr ); return;
+		case 5 : __libcfa_small_sort5( (void **) arr ); return;
+		case 6 : __libcfa_small_sort6( (void **) arr ); return;
+		default: __libcfa_small_sortN( (void **) arr, dim ); return;
+	}
+}
+
+#define min(x, y) (y > x ? x : y)
+#define max(x, y) (y > x ? y : x)
+#define SWAP(x,y) { void* a = min(arr[x], arr[y]); void* b = max(arr[x], arr[y]); arr[x] = a; arr[y] = b;}
+
+static inline void __libcfa_small_sort2( void* * arr ) {
+	SWAP(0, 1);
+}
+
+static inline void __libcfa_small_sort3( void* * arr ) {
+	SWAP(1, 2);
+	SWAP(0, 2);
+	SWAP(0, 1);
+}
+
+static inline void __libcfa_small_sort4( void* * arr ) {
+	SWAP(0, 1);
+	SWAP(2, 3);
+	SWAP(0, 2);
+	SWAP(1, 3);
+	SWAP(1, 2);
+}
+
+static inline void __libcfa_small_sort5( void* * arr ) {
+	SWAP(0, 1);
+	SWAP(3, 4);
+	SWAP(2, 4);
+	SWAP(2, 3);
+	SWAP(0, 3);
+	SWAP(0, 2);
+	SWAP(1, 4);
+	SWAP(1, 3);
+	SWAP(1, 2);
+}
+
+static inline void __libcfa_small_sort6( void* * arr ) {
+	SWAP(1, 2);
+	SWAP(4, 5);
+	SWAP(0, 2);
+	SWAP(3, 5);
+	SWAP(0, 1);
+	SWAP(3, 4);
+	SWAP(1, 4);
+	SWAP(0, 3);
+	SWAP(2, 5);
+	SWAP(1, 3);
+	SWAP(2, 4);
+	SWAP(2, 3);
+}
+
+static inline void __libcfa_small_sortN( void* * arr, size_t dim ) {
+	int i, j;
+	for (i = 1; i < dim; i++) {
+		void* tmp = arr[i];
+		for (j = i; j >= 1 && tmp < arr[j-1]; j--) {
+			arr[j] = arr[j-1];
+		}
+		arr[j] = tmp;
+	}
+}
+
+#endif
+
+#undef SWAP
+#undef min
+#undef max
Index: libcfa/src/bits/align.h
===================================================================
--- libcfa/src/bits/align.h	(revision bf71cfdb7285490eee552b461158846f626cc52f)
+++ libcfa/src/bits/align.h	(revision bf71cfdb7285490eee552b461158846f626cc52f)
@@ -0,0 +1,62 @@
+//
+// Cforall Version 1.0.0 Copyright (C) 2016 University of Waterloo
+//
+// The contents of this file are covered under the licence agreement in the
+// file "LICENCE" distributed with Cforall.
+//
+// align.h --
+//
+// Author           : Thierry Delisle
+// Created On       : Mon Nov 28 12:27:26 2016
+// Last Modified By : Peter A. Buhr
+// Last Modified On : Fri Jul 21 23:05:35 2017
+// Update Count     : 2
+//
+// This  library is free  software; you  can redistribute  it and/or  modify it
+// under the terms of the GNU Lesser General Public License as published by the
+// Free Software  Foundation; either  version 2.1 of  the License, or  (at your
+// option) any later version.
+//
+// This library is distributed in the  hope that it will be useful, but WITHOUT
+// ANY  WARRANTY;  without even  the  implied  warranty  of MERCHANTABILITY  or
+// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
+// for more details.
+//
+// You should  have received a  copy of the  GNU Lesser General  Public License
+// along  with this library.
+//
+
+#pragma once
+
+#include <assert.h>
+#include <stdbool.h>
+
+// Minimum size used to align memory boundaries for memory allocations.
+#define libAlign() (sizeof(double))
+
+// Check for power of 2
+static inline bool libPow2( unsigned long int value ) {
+    // clears all bits below value, rounding value down to the next lower multiple of value
+    return (value & (value - 1ul)) == 0ul;
+} // libPow2
+
+
+// Returns value aligned at the floor of align.
+static inline unsigned long int libFloor( unsigned long int value, unsigned long int align ) {
+    assert( libPow2( align ) );
+    // clears all bits above or equal to align, getting (value % align), the phase of value with regards to align
+    return value & -align;
+} // libFloor
+
+
+// Returns value aligned at the ceiling of align.
+
+static inline unsigned long int libCeiling( unsigned long int value, unsigned long int align ) {
+    assert( libPow2( align ) );
+    // "negate, round down, negate" is the same as round up
+    return -libFloor( -value, align );
+} // uCeiling
+
+// Local Variables: //
+// compile-command: "make install" //
+// End: //
Index: libcfa/src/bits/containers.h
===================================================================
--- libcfa/src/bits/containers.h	(revision bf71cfdb7285490eee552b461158846f626cc52f)
+++ libcfa/src/bits/containers.h	(revision bf71cfdb7285490eee552b461158846f626cc52f)
@@ -0,0 +1,282 @@
+//
+// Cforall Version 1.0.0 Copyright (C) 2016 University of Waterloo
+//
+// The contents of this file are covered under the licence agreement in the
+// file "LICENCE" distributed with Cforall.
+//
+// bits/containers.h -- Intrusive generic containers.h
+//
+// Author           : Thierry Delisle
+// Created On       : Tue Oct 31 16:38:50 2017
+// Last Modified By : --
+// Last Modified On : --
+// Update Count     : 0
+
+#pragma once
+
+#include "bits/align.h"
+#include "bits/defs.h"
+
+//-----------------------------------------------------------------------------
+// Array
+//-----------------------------------------------------------------------------
+
+#ifdef __cforall
+	forall(dtype T)
+#else
+	#define T void
+#endif
+struct __small_array {
+	T *           data;
+	__lock_size_t size;
+};
+#undef T
+
+#ifdef __cforall
+	#define __small_array_t(T) __small_array(T)
+#else
+	#define __small_array_t(T) struct __small_array
+#endif
+
+#ifdef __cforall
+	// forall(otype T | sized(T))
+	// static inline void ?{}(__small_array(T) & this) {}
+
+	forall(dtype T | sized(T))
+	static inline T& ?[?]( __small_array(T) & this, __lock_size_t idx) {
+		return ((typeof(this.data))this.data)[idx];
+	}
+
+	forall(dtype T | sized(T))
+	static inline T& ?[?]( const __small_array(T) & this, __lock_size_t idx) {
+		return ((typeof(this.data))this.data)[idx];
+	}
+
+	forall(dtype T | sized(T))
+	static inline T* begin( const __small_array(T) & this ) {
+		return ((typeof(this.data))this.data);
+	}
+
+	forall(dtype T | sized(T))
+	static inline T* end( const __small_array(T) & this ) {
+		return ((typeof(this.data))this.data) + this.size;
+	}
+#endif
+
+//-----------------------------------------------------------------------------
+// Node Base
+//-----------------------------------------------------------------------------
+
+#ifdef __cforall
+	trait is_node(dtype T) {
+		T*& get_next( T& );
+	};
+#endif
+
+//-----------------------------------------------------------------------------
+// Stack
+//-----------------------------------------------------------------------------
+#ifdef __cforall
+	forall(dtype TYPE | is_node(TYPE))
+	#define T TYPE
+#else
+	#define T void
+#endif
+struct __stack {
+	T * top;
+};
+#undef T
+
+#ifdef __cforall
+#define __stack_t(T) __stack(T)
+#else
+#define __stack_t(T) struct __stack
+#endif
+
+#ifdef __cforall
+	forall(dtype T | is_node(T))
+	static inline void ?{}( __stack(T) & this ) {
+		(this.top){ NULL };
+	}
+
+	forall(dtype T | is_node(T) | sized(T))
+	static inline void push( __stack(T) & this, T * val ) {
+		verify( !get_next( *val ) );
+		get_next( *val ) = this.top;
+		this.top = val;
+	}
+
+	forall(dtype T | is_node(T) | sized(T))
+	static inline T * pop( __stack(T) & this ) {
+		T * top = this.top;
+		if( top ) {
+			this.top = get_next( *top );
+			get_next( *top ) = NULL;
+		}
+		return top;
+	}
+#endif
+
+//-----------------------------------------------------------------------------
+// Queue
+//-----------------------------------------------------------------------------
+#ifdef __cforall
+	forall(dtype TYPE | is_node(TYPE))
+	#define T TYPE
+#else
+	#define T void
+#endif
+struct __queue {
+	T * head;
+	T ** tail;
+};
+#undef T
+
+#ifdef __cforall
+#define __queue_t(T) __queue(T)
+#else
+#define __queue_t(T) struct __queue
+#endif
+
+#ifdef __cforall
+
+	forall(dtype T | is_node(T))
+	static inline void ?{}( __queue(T) & this ) with( this ) {
+		head{ NULL };
+		tail{ &head };
+	}
+
+	forall(dtype T | is_node(T) | sized(T))
+	static inline void append( __queue(T) & this, T * val ) with( this ) {
+		verify(tail != NULL);
+		*tail = val;
+		tail = &get_next( *val );
+	}
+
+	forall(dtype T | is_node(T) | sized(T))
+	static inline T * pop_head( __queue(T) & this ) {
+		T * head = this.head;
+		if( head ) {
+			this.head = get_next( *head );
+			if( !get_next( *head ) ) {
+				this.tail = &this.head;
+			}
+			get_next( *head ) = NULL;
+		}
+		return head;
+	}
+
+	forall(dtype T | is_node(T) | sized(T))
+	static inline T * remove( __queue(T) & this, T ** it ) with( this ) {
+		T * val = *it;
+		verify( val );
+
+		(*it) = get_next( *val );
+
+		if( tail == &get_next( *val ) ) {
+			tail = it;
+		}
+
+		get_next( *val ) = NULL;
+
+		verify( (head == NULL) == (&head == tail) );
+		verify( *tail == NULL );
+		return val;
+	}
+
+	forall(dtype T | is_node(T))
+	static inline bool ?!=?( __queue(T) & this, zero_t zero ) {
+		return this.head != 0;
+	}
+#endif
+
+
+//-----------------------------------------------------------------------------
+// Doubly Linked List
+//-----------------------------------------------------------------------------
+#ifdef __cforall
+	forall(dtype TYPE | sized(TYPE))
+	#define T TYPE
+	#define __getter_t * [T * & next, T * & prev] ( T & )
+#else
+	typedef void (*__generit_c_getter_t)();
+	#define T void
+	#define __getter_t __generit_c_getter_t
+#endif
+struct __dllist {
+	T * head;
+	__getter_t __get;
+};
+#undef T
+#undef __getter_t
+
+#ifdef __cforall
+#define __dllist_t(T) __dllist(T)
+#else
+#define __dllist_t(T) struct __dllist
+#endif
+
+#ifdef __cforall
+
+	forall(dtype T | sized(T))
+	static inline [void] ?{}( __dllist(T) & this, * [T * & next, T * & prev] ( T & ) __get ) {
+		this.head{ NULL };
+		this.__get = __get;
+	}
+
+	#define next 0
+	#define prev 1
+	forall(dtype T | sized(T))
+	static inline void push_front( __dllist(T) & this, T & node ) with( this ) {
+		verify(__get);
+		if ( head ) {
+			__get( node ).next = head;
+			__get( node ).prev = __get( *head ).prev;
+			// inserted node must be consistent before it is seen
+			// prevent code movement across barrier
+			asm( "" : : : "memory" );
+			__get( *head ).prev = &node;
+			T & _prev = *__get( node ).prev;
+			__get( _prev ).next = &node;
+		}
+		else {
+			__get( node ).next = &node;
+			__get( node ).prev = &node;
+		}
+
+		// prevent code movement across barrier
+		asm( "" : : : "memory" );
+		head = &node;
+	}
+
+	forall(dtype T | sized(T))
+	static inline void remove( __dllist(T) & this, T & node ) with( this ) {
+		verify(__get);
+		if ( &node == head ) {
+			if ( __get( *head ).next == head ) {
+				head = NULL;
+			}
+			else {
+				head = __get( *head ).next;
+			}
+		}
+		__get( *__get( node ).next ).prev = __get( node ).prev;
+		__get( *__get( node ).prev ).next = __get( node ).next;
+		__get( node ).next = NULL;
+		__get( node ).prev = NULL;
+	}
+
+	forall(dtype T | sized(T))
+	static inline bool ?!=?( __dllist(T) & this, zero_t zero ) {
+		return this.head != 0;
+	}
+	#undef next
+	#undef prev
+#endif
+
+//-----------------------------------------------------------------------------
+// Tools
+//-----------------------------------------------------------------------------
+#ifdef __cforall
+
+#endif
Index: libcfa/src/bits/debug.c
===================================================================
--- libcfa/src/bits/debug.c	(revision bf71cfdb7285490eee552b461158846f626cc52f)
+++ libcfa/src/bits/debug.c	(revision bf71cfdb7285490eee552b461158846f626cc52f)
@@ -0,0 +1,88 @@
+//
+// Cforall Version 1.0.0 Copyright (C) 2016 University of Waterloo
+//
+// The contents of this file are covered under the licence agreement in the
+// file "LICENCE" distributed with Cforall.
+//
+// debug.c --
+//
+// Author           : Thierry Delisle
+// Created On       : Thu Mar 30 12:30:01 2017
+// Last Modified By :
+// Last Modified On :
+// Update Count     : 1
+//
+
+extern "C" {
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <stdarg.h>
+#include <unistd.h>
+}
+
+enum { buffer_size = 512 };
+static char buffer[ buffer_size ];
+
+extern "C" {
+
+	void __cfaabi_dbg_bits_write( const char *in_buffer, int len ) {
+		// ensure all data is written
+		for ( int count = 0, retcode; count < len; count += retcode ) {
+			in_buffer += count;
+
+			for ( ;; ) {
+				retcode = write( STDERR_FILENO, in_buffer, len - count );
+
+				// not a timer interrupt ?
+				if ( retcode != -1 || errno != EINTR ) break;
+			}
+
+			if ( retcode == -1 ) _exit( EXIT_FAILURE );
+		}
+	}
+
+	void __cfaabi_dbg_bits_acquire() __attribute__((__weak__)) {}
+	void __cfaabi_dbg_bits_release() __attribute__((__weak__)) {}
+
+	void __cfaabi_dbg_bits_print_safe  ( const char fmt[], ... ) __attribute__(( format(printf, 1, 2) )) {
+		va_list args;
+
+		va_start( args, fmt );
+		__cfaabi_dbg_bits_acquire();
+
+		int len = vsnprintf( buffer, buffer_size, fmt, args );
+		__cfaabi_dbg_bits_write( buffer, len );
+
+		__cfaabi_dbg_bits_release();
+		va_end( args );
+	}
+
+	void __cfaabi_dbg_bits_print_nolock( const char fmt[], ... ) __attribute__(( format(printf, 1, 2) )) {
+		va_list args;
+
+		va_start( args, fmt );
+
+		int len = vsnprintf( buffer, buffer_size, fmt, args );
+		__cfaabi_dbg_bits_write( buffer, len );
+
+		va_end( args );
+	}
+
+	void __cfaabi_dbg_bits_print_vararg( const char fmt[], va_list args ) {
+		int len = vsnprintf( buffer, buffer_size, fmt, args );
+		__cfaabi_dbg_bits_write( buffer, len );
+	}
+
+	void __cfaabi_dbg_bits_print_buffer( char in_buffer[], int in_buffer_size, const char fmt[], ... ) __attribute__(( format(printf, 3, 4) )) {
+		va_list args;
+
+		va_start( args, fmt );
+
+		int len = vsnprintf( in_buffer, in_buffer_size, fmt, args );
+		__cfaabi_dbg_bits_write( in_buffer, len );
+
+		va_end( args );
+	}
+}
Index: libcfa/src/bits/debug.h
===================================================================
--- libcfa/src/bits/debug.h	(revision bf71cfdb7285490eee552b461158846f626cc52f)
+++ libcfa/src/bits/debug.h	(revision bf71cfdb7285490eee552b461158846f626cc52f)
@@ -0,0 +1,74 @@
+//
+// Cforall Version 1.0.0 Copyright (C) 2016 University of Waterloo
+//
+// The contents of this file are covered under the licence agreement in the
+// file "LICENCE" distributed with Cforall.
+//
+// debug.h --
+//
+// Author           : Thierry Delisle
+// Created On       : Mon Nov 28 12:27:26 2016
+// Last Modified By : Peter A. Buhr
+// Last Modified On : Thu Feb  8 12:35:19 2018
+// Update Count     : 2
+//
+
+#pragma once
+
+#ifdef __CFA_DEBUG__
+	#define __cfaabi_dbg_debug_do(...) __VA_ARGS__
+	#define __cfaabi_dbg_no_debug_do(...)
+	#define __cfaabi_dbg_ctx __PRETTY_FUNCTION__
+	#define __cfaabi_dbg_ctx2 , __PRETTY_FUNCTION__
+	#define __cfaabi_dbg_ctx_param const char * caller
+	#define __cfaabi_dbg_ctx_param2 , const char * caller
+#else
+	#define __cfaabi_dbg_debug_do(...)
+	#define __cfaabi_dbg_no_debug_do(...) __VA_ARGS__
+	#define __cfaabi_dbg_ctx
+	#define __cfaabi_dbg_ctx2
+	#define __cfaabi_dbg_ctx_param
+	#define __cfaabi_dbg_ctx_param2
+#endif
+
+#ifdef __cforall
+extern "C" {
+#endif
+	#include <stdarg.h>
+	#include <stdio.h>
+
+      extern void __cfaabi_dbg_bits_write( const char *buffer, int len );
+      extern void __cfaabi_dbg_bits_acquire();
+      extern void __cfaabi_dbg_bits_release();
+      extern void __cfaabi_dbg_bits_print_safe  ( const char fmt[], ... ) __attribute__(( format(printf, 1, 2) ));
+      extern void __cfaabi_dbg_bits_print_nolock( const char fmt[], ... ) __attribute__(( format(printf, 1, 2) ));
+      extern void __cfaabi_dbg_bits_print_vararg( const char fmt[], va_list arg );
+      extern void __cfaabi_dbg_bits_print_buffer( char buffer[], int buffer_size, const char fmt[], ... ) __attribute__(( format(printf, 3, 4) ));
+#ifdef __cforall
+}
+#endif
+
+#ifdef __CFA_DEBUG_PRINT__
+	#define __cfaabi_dbg_write( buffer, len )         __cfaabi_dbg_bits_write( buffer, len )
+	#define __cfaabi_dbg_acquire()                    __cfaabi_dbg_bits_acquire()
+	#define __cfaabi_dbg_release()                    __cfaabi_dbg_bits_release()
+	#define __cfaabi_dbg_print_safe(...)              __cfaabi_dbg_bits_print_safe   (__VA_ARGS__)
+	#define __cfaabi_dbg_print_nolock(...)            __cfaabi_dbg_bits_print_nolock (__VA_ARGS__)
+	#define __cfaabi_dbg_print_buffer(...)            __cfaabi_dbg_bits_print_buffer (__VA_ARGS__)
+	#define __cfaabi_dbg_print_buffer_decl(...)       char __dbg_text[256]; int __dbg_len = snprintf( __dbg_text, 256, __VA_ARGS__ ); __cfaabi_dbg_bits_write( __dbg_text, __dbg_len );
+	#define __cfaabi_dbg_print_buffer_local(...)      __dbg_len = snprintf( __dbg_text, 256, __VA_ARGS__ ); __cfaabi_dbg_bits_write( __dbg_text, __dbg_len );
+#else
+	#define __cfaabi_dbg_write(...)               ((void)0)
+	#define __cfaabi_dbg_acquire()                ((void)0)
+	#define __cfaabi_dbg_release()                ((void)0)
+	#define __cfaabi_dbg_print_safe(...)          ((void)0)
+	#define __cfaabi_dbg_print_nolock(...)        ((void)0)
+	#define __cfaabi_dbg_print_buffer(...)        ((void)0)
+	#define __cfaabi_dbg_print_buffer_decl(...)   ((void)0)
+	#define __cfaabi_dbg_print_buffer_local(...)  ((void)0)
+#endif
+
+// Local Variables: //
+// mode: c //
+// tab-width: 4 //
+// End: //
Index: libcfa/src/bits/defs.h
===================================================================
--- libcfa/src/bits/defs.h	(revision bf71cfdb7285490eee552b461158846f626cc52f)
+++ libcfa/src/bits/defs.h	(revision bf71cfdb7285490eee552b461158846f626cc52f)
@@ -0,0 +1,42 @@
+//
+// Cforall Version 1.0.0 Copyright (C) 2016 University of Waterloo
+//
+// The contents of this file are covered under the licence agreement in the
+// file "LICENCE" distributed with Cforall.
+//
+// defs.h --
+//
+// Author           : Thierry Delisle
+// Created On       : Thu Nov  9 13:24:10 2017
+// Last Modified By : Peter A. Buhr
+// Last Modified On : Thu Feb  8 16:22:41 2018
+// Update Count     : 8
+//
+
+#pragma once
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#define likely(x)   __builtin_expect(!!(x), 1)
+#define unlikely(x) __builtin_expect(!!(x), 0)
+#define thread_local _Thread_local
+
+typedef void (*fptr_t)();
+typedef int_fast16_t __lock_size_t;
+
+#ifdef __cforall
+#define __cfa_anonymous_object(x) inline struct x
+#else
+#define __cfa_anonymous_object(x) x __cfa_anonymous_object
+#endif
+
+#ifdef __cforall
+void abort ( const char fmt[], ... ) __attribute__ (( format(printf, 1, 2), __nothrow__, __leaf__, __noreturn__ ));
+extern "C" {
+#endif
+void __cabi_abort( const char fmt[], ... ) __attribute__ (( format(printf, 1, 2), __nothrow__, __leaf__, __noreturn__ ));
+#ifdef __cforall
+}
+#endif
Index: libcfa/src/bits/locks.h
===================================================================
--- libcfa/src/bits/locks.h	(revision bf71cfdb7285490eee552b461158846f626cc52f)
+++ libcfa/src/bits/locks.h	(revision bf71cfdb7285490eee552b461158846f626cc52f)
@@ -0,0 +1,165 @@
+//
+// Cforall Version 1.0.0 Copyright (C) 2016 University of Waterloo
+//
+// The contents of this file are covered under the licence agreement in the
+// file "LICENCE" distributed with Cforall.
+//
+// bits/locks.h -- Fast internal locks.
+//
+// Author           : Thierry Delisle
+// Created On       : Tue Oct 31 15:14:38 2017
+// Last Modified By : Peter A. Buhr
+// Last Modified On : Fri Mar 30 18:18:13 2018
+// Update Count     : 9
+//
+
+#pragma once
+
+#include "bits/debug.h"
+#include "bits/defs.h"
+#include <assert.h>
+
+#ifdef __cforall
+	extern "C" {
+		#include <pthread.h>
+	}
+#endif
+
+// pause to prevent excess processor bus usage
+#if defined( __sparc )
+	#define Pause() __asm__ __volatile__ ( "rd %ccr,%g0" )
+#elif defined( __i386 ) || defined( __x86_64 )
+	#define Pause() __asm__ __volatile__ ( "pause" : : : )
+#elif defined( __ARM_ARCH )
+	#define Pause() __asm__ __volatile__ ( "nop" : : : )
+#else
+	#error unsupported architecture
+#endif
+
+#if defined( __i386 ) || defined( __x86_64 ) || defined( __ARM_ARCH )
+	// Intel recommendation
+	#define __ALIGN__ __attribute__(( aligned (128) ))
+#elif defined( __sparc )
+	#define __ALIGN__ CALIGN
+#else
+	#error unsupported architecture
+#endif
+
+struct __spinlock_t {
+	// Wrap in struct to prevent false sharing with debug info
+	struct {
+		// Align lock on 128-bit boundary
+		__ALIGN__ volatile _Bool lock;
+	};
+	#ifdef __CFA_DEBUG__
+		// previous function to acquire the lock
+		const char * prev_name;
+		// previous thread to acquire the lock
+		void* prev_thrd;
+	#endif
+} __ALIGN__;
+
+#ifdef __cforall
+	extern "C" {
+		extern void disable_interrupts();
+		extern void enable_interrupts_noPoll();
+	}
+
+	extern void yield( unsigned int );
+
+	static inline void ?{}( __spinlock_t & this ) {
+		this.lock = 0;
+	}
+
+
+	#ifdef __CFA_DEBUG__
+		void __cfaabi_dbg_record(__spinlock_t & this, const char * prev_name);
+	#else
+		#define __cfaabi_dbg_record(x, y)
+	#endif
+
+	// Lock the spinlock, return false if already acquired
+	static inline _Bool try_lock  ( __spinlock_t & this __cfaabi_dbg_ctx_param2 ) {
+		_Bool result = (this.lock == 0) && (__atomic_test_and_set( &this.lock, __ATOMIC_ACQUIRE ) == 0);
+		if( result ) {
+			disable_interrupts();
+			__cfaabi_dbg_record( this, caller );
+		}
+		return result;
+	}
+
+	// Lock the spinlock, spin if already acquired
+	static inline void lock( __spinlock_t & this __cfaabi_dbg_ctx_param2 ) {
+		#ifndef NOEXPBACK
+			enum { SPIN_START = 4, SPIN_END = 64 * 1024, };
+			unsigned int spin = SPIN_START;
+		#endif
+
+		for ( unsigned int i = 1;; i += 1 ) {
+			if ( (this.lock == 0) && (__atomic_test_and_set( &this.lock, __ATOMIC_ACQUIRE ) == 0) ) break;
+			#ifndef NOEXPBACK
+				// exponential spin
+				for ( volatile unsigned int s = 0; s < spin; s += 1 ) Pause();
+
+				// slowly increase by powers of 2
+				if ( i % 64 == 0 ) spin += spin;
+
+				// prevent overflow
+				if ( spin > SPIN_END ) spin = SPIN_START;
+			#else
+				Pause();
+			#endif
+		}
+		disable_interrupts();
+		__cfaabi_dbg_record( this, caller );
+	}
+
+	static inline void unlock( __spinlock_t & this ) {
+		enable_interrupts_noPoll();
+		__atomic_clear( &this.lock, __ATOMIC_RELEASE );
+	}
+
+
+	#ifdef __CFA_WITH_VERIFY__
+		extern bool __cfaabi_dbg_in_kernel();
+	#endif
+
+	struct __bin_sem_t {
+		bool     		signaled;
+		pthread_mutex_t 	lock;
+		pthread_cond_t  	cond;
+	};
+
+	static inline void ?{}(__bin_sem_t & this) with( this ) {
+		signaled = false;
+		pthread_mutex_init(&lock, NULL);
+		pthread_cond_init (&cond, NULL);
+	}
+
+	static inline void ^?{}(__bin_sem_t & this) with( this ) {
+		pthread_mutex_destroy(&lock);
+		pthread_cond_destroy (&cond);
+	}
+
+	static inline void wait(__bin_sem_t & this) with( this ) {
+		verify(__cfaabi_dbg_in_kernel());
+		pthread_mutex_lock(&lock);
+			if(!signaled) {   // this must be a loop, not if!
+				pthread_cond_wait(&cond, &lock);
+			}
+			signaled = false;
+		pthread_mutex_unlock(&lock);
+	}
+
+	static inline void post(__bin_sem_t & this) with( this ) {
+		verify(__cfaabi_dbg_in_kernel());
+
+		pthread_mutex_lock(&lock);
+			bool needs_signal = !signaled;
+			signaled = true;
+		pthread_mutex_unlock(&lock);
+
+		if (needs_signal)
+			pthread_cond_signal(&cond);
+	}
+#endif
Index: libcfa/src/bits/signal.h
===================================================================
--- libcfa/src/bits/signal.h	(revision bf71cfdb7285490eee552b461158846f626cc52f)
+++ libcfa/src/bits/signal.h	(revision bf71cfdb7285490eee552b461158846f626cc52f)
@@ -0,0 +1,65 @@
+//
+// Cforall Version 1.0.0 Copyright (C) 2016 University of Waterloo
+//
+// The contents of this file are covered under the licence agreement in the
+// file "LICENCE" distributed with Cforall.
+//
+// bits/signal.h -- Helper functions and defines to use signals
+//
+// Author           : Thierry Delisle
+// Created On       : Thu Jan 25 16:06:29 2018
+// Last Modified By :
+// Last Modified On :
+// Update Count     :
+//
+
+#pragma once
+
+#include "bits/debug.h"
+#include "bits/defs.h"
+
+extern "C" {
+#include <errno.h>
+#define __USE_GNU
+#include <signal.h>
+#undef __USE_GNU
+#include <stdlib.h>
+#include <string.h>
+}
+
+// Short hands for signal context information
+#define __CFA_SIGCXT__ ucontext_t *
+#define __CFA_SIGPARMS__ __attribute__((unused)) int sig, __attribute__((unused)) siginfo_t *sfp, __attribute__((unused)) __CFA_SIGCXT__ cxt
+
+// Sigaction wrapper : register an signal handler
+static void __cfaabi_sigaction( int sig, void (*handler)(__CFA_SIGPARMS__), int flags ) {
+	struct sigaction act;
+
+	act.sa_sigaction = (void (*)(int, siginfo_t *, void *))handler;
+	act.sa_flags = flags;
+
+	if ( sigaction( sig, &act, NULL ) == -1 ) {
+		__cfaabi_dbg_print_buffer_decl(
+			" __cfaabi_sigaction( sig:%d, handler:%p, flags:%d ), problem installing signal handler, error(%d) %s.\n",
+			sig, handler, flags, errno, strerror( errno )
+		);
+		_exit( EXIT_FAILURE );
+	}
+}
+
+// Sigaction wrapper : restore default handler
+static void __cfaabi_sigdefault( int sig ) {
+	struct sigaction act;
+
+	act.sa_handler = SIG_DFL;
+	act.sa_flags = 0;
+	sigemptyset( &act.sa_mask );
+
+	if ( sigaction( sig, &act, NULL ) == -1 ) {
+		__cfaabi_dbg_print_buffer_decl(
+			" __cfaabi_sigdefault( sig:%d ), problem reseting signal handler, error(%d) %s.\n",
+			sig, errno, strerror( errno )
+		);
+		_exit( EXIT_FAILURE );
+	}
+}
