Index: src/libcfa/bits/containers.h
===================================================================
--- src/libcfa/bits/containers.h	(revision 11094d9e8320e6ac1134dd033cf6a3b08036a935)
+++ src/libcfa/bits/containers.h	(revision 11094d9e8320e6ac1134dd033cf6a3b08036a935)
@@ -0,0 +1,132 @@
+//
+// Cforall Version 1.0.0 Copyright (C) 2016 University of Waterloo
+//
+// The contents of this file are covered under the licence agreement in the
+// file "LICENCE" distributed with Cforall.
+//
+// bits/containers.h -- Intrusive generic containers.h
+//
+// Author           : Thierry Delisle
+// Created On       : Tue Oct 31 16:38:50 2017
+// Last Modified By : --
+// Last Modified On : --
+// Update Count     : 0
+
+#pragma once
+
+#include <stddef.h>
+
+#include "libhdr.h"
+
+//-----------------------------------------------------------------------------
+// Node Base
+//-----------------------------------------------------------------------------
+
+#ifdef __CFORALL__
+	trait is_node(dtype T) {
+		T*& get_next( T& );
+	};
+#endif
+
+//-----------------------------------------------------------------------------
+// Stack
+//-----------------------------------------------------------------------------
+#ifdef __CFORALL__
+	forall(dtype TYPE | is_node(TYPE))
+	#define T TYPE
+#else
+	#define T void
+#endif
+struct __stack {
+	T * top;
+};
+
+#ifdef __CFORALL__
+#define __stack_t(T) __stack(T)
+#else
+#define __stack_t(T) struct __stack
+#endif
+
+#ifdef __CFORALL__
+	forall(dtype T | is_node(T))
+	void ?{}( __stack(T) & this ) {
+		this.top = NULL;
+	}
+
+	forall(dtype T | is_node(T) | sized(T))
+	void push( __stack(T) & this, T * val ) {
+		verify( !get_next( *val ) );
+		get_next( *val ) = this.top;
+		this.top = val;
+	}
+
+	forall(dtype T | is_node(T) | sized(T))
+	T * pop( __stack(T) & this ) {
+		T * top = this.top;
+		if( top ) {
+			this.top = get_next( *top );
+			get_next( *top ) = NULL;
+		}
+		return top;
+	}
+#endif
+
+//-----------------------------------------------------------------------------
+// Queue
+//-----------------------------------------------------------------------------
+#ifdef __CFORALL__
+	forall(dtype T | is_node(T))
+	#define T TYPE
+#else
+	#define T void
+#endif
+struct __queue {
+	T * head;
+	T ** tail;
+};
+
+#ifdef __CFORALL__
+	forall(dtype T | is_node(T))
+	void ?{}( __queue(T) & this ) {
+		this.head = NULL;
+		this.tail = &this.head;
+	}
+
+	forall(dtype T | is_node(T) | sized(T))
+	void append( __queue(T) & this, T * val ) {
+		verify(this.tail != NULL);
+		*this.tail = val;
+		this.tail = &get_next( *val );
+	}
+
+	forall(dtype T | is_node(T) | sized(T))
+	T * pop_head( __queue(T) & this ) {
+		T * head = this.head;
+		if( head ) {
+			this.head = get_next( *head );
+			if( !get_next( *head ) ) {
+				this.tail = &this.head;
+			}
+			get_next( *head ) = NULL;
+		}
+		return head;
+	}
+
+	forall(dtype T | is_node(T) | sized(T))
+	T * remove( __queue(T) & this, T ** it ) {
+		T * val = *it;
+		verify( val );
+
+		(*it) = get_next( *val );
+
+		if( this.tail == &get_next( *val ) ) {
+			this.tail = it;
+		}
+
+		get_next( *val ) = NULL;
+
+		verify( (this.head == NULL) == (&this.head == this.tail) );
+		verify( *this.tail == NULL );
+		return val;
+	}
+#endif
Index: src/libcfa/bits/defs.h
===================================================================
--- src/libcfa/bits/defs.h	(revision 11094d9e8320e6ac1134dd033cf6a3b08036a935)
+++ src/libcfa/bits/defs.h	(revision 11094d9e8320e6ac1134dd033cf6a3b08036a935)
@@ -0,0 +1,23 @@
+//
+// Cforall Version 1.0.0 Copyright (C) 2016 University of Waterloo
+//
+// The contents of this file are covered under the licence agreement in the
+// file "LICENCE" distributed with Cforall.
+//
+// bits/defs.h --
+//
+// Author           : Thierry Delisle
+// Created On       : Thu Nov 09 13:24:10 2017
+// Last Modified By :
+// Last Modified On :
+// Update Count     :
+//
+
+#pragma once
+
+#include <stdbool.h>
+#include <stdint.h>
+
+#define unlikely(x)    __builtin_expect(!!(x), 0)
+#define likely  (x)    __builtin_expect(!!(x), 1)
+#define thread_local _Thread_local
Index: src/libcfa/bits/locks.h
===================================================================
--- src/libcfa/bits/locks.h	(revision 11094d9e8320e6ac1134dd033cf6a3b08036a935)
+++ src/libcfa/bits/locks.h	(revision 11094d9e8320e6ac1134dd033cf6a3b08036a935)
@@ -0,0 +1,121 @@
+//
+// Cforall Version 1.0.0 Copyright (C) 2016 University of Waterloo
+//
+// The contents of this file are covered under the licence agreement in the
+// file "LICENCE" distributed with Cforall.
+//
+// bits/locks.h -- Fast internal locks.
+//
+// Author           : Thierry Delisle
+// Created On       : Tue Oct 31 15:14:38 2017
+// Last Modified By : --
+// Last Modified On : --
+// Update Count     : 0
+//
+
+#pragma once
+
+#include "bits/defs.h"
+
+#include "libhdr.h"
+
+// pause to prevent excess processor bus usage
+#if defined( __sparc )
+	#define Pause() __asm__ __volatile__ ( "rd %ccr,%g0" )
+#elif defined( __i386 ) || defined( __x86_64 )
+	#define Pause() __asm__ __volatile__ ( "pause" : : : )
+#else
+	#error unsupported architecture
+#endif
+
+#if defined( __i386 ) || defined( __x86_64 )
+	// Intel recommendation
+	#define __ALIGN__ __attribute__(( aligned (128) ))
+#elif defined( __sparc )
+	#define __ALIGN__ CALIGN
+#else
+	#error unsupported architecture
+#endif
+
+#if defined( __x86_64 )
+	#define __lock_test_and_test_and_set( lock ) (lock) == 0 && __sync_lock_test_and_set_8( &(lock), 1 ) == 0
+	#define __lock_release( lock ) __sync_lock_release_8( &(lock) );
+#elif defined( __i386 )
+	#define __lock_test_and_test_and_set( lock ) (lock) == 0 && __sync_lock_test_and_set_4( &(lock), 1 ) == 0
+	#define __lock_release( lock ) __sync_lock_release_4( &(lock) );
+#else
+	#error unsupported architecture
+#endif
+
+struct __spinlock_t {
+	__ALIGN__ volatile uintptr_t lock;
+	#ifdef __CFA_DEBUG__
+		const char * prev_name;
+		void* prev_thrd;
+	#endif
+} __ALIGN__;
+
+#ifdef __CFORALL__
+	extern void yield( unsigned int );
+	extern thread_local struct thread_desc *    volatile this_thread;
+
+	static inline void ?{}( __spinlock_t & this ) {
+		this.lock = 0;
+	}
+
+	// Lock the spinlock, return false if already acquired
+	static inline _Bool try_lock  ( __spinlock_t & this DEBUG_CTX_PARAM2 ) {
+		_Bool result = __lock_test_and_test_and_set( this.lock );
+		LIB_DEBUG_DO(
+			if( result ) {
+				this.prev_name = caller;
+				this.prev_thrd = this_thread;
+			}
+		)
+		return result;
+	}
+
+	// Lock the spinlock, spin if already acquired
+	static inline void lock( __spinlock_t & this DEBUG_CTX_PARAM2 ) {
+		#ifndef NOEXPBACK
+			enum { SPIN_START = 4, SPIN_END = 64 * 1024, };
+			unsigned int spin = SPIN_START;
+		#endif
+
+		for ( unsigned int i = 1;; i += 1 ) {
+			if ( __lock_test_and_test_and_set( this.lock ) ) break;
+			#ifndef NOEXPBACK
+				// exponential spin
+				for ( volatile unsigned int s = 0; s < spin; s += 1 ) Pause();
+
+				// slowly increase by powers of 2
+				if ( i % 64 == 0 ) spin += spin;
+
+				// prevent overflow
+				if ( spin > SPIN_END ) spin = SPIN_START;
+			#else
+				Pause();
+			#endif
+		}
+		LIB_DEBUG_DO(
+			this.prev_name = caller;
+			this.prev_thrd = this_thread;
+		)
+	}
+
+	// Lock the spinlock, spin if already acquired
+	static inline void lock_yield( __spinlock_t & this DEBUG_CTX_PARAM2 ) {
+		for ( unsigned int i = 1;; i += 1 ) {
+			if ( __lock_test_and_test_and_set( this.lock ) ) break;
+			yield( i );
+		}
+		LIB_DEBUG_DO(
+			this.prev_name = caller;
+			this.prev_thrd = this_thread;
+		)
+	}
+
+	static inline void unlock( __spinlock_t & this ) {
+		__lock_release( this.lock );
+	}
+#endif
