Index: libcfa/src/Makefile.am
===================================================================
--- libcfa/src/Makefile.am	(revision c4c8571a1aaa3100966db2d603b3a32857faebdd)
+++ libcfa/src/Makefile.am	(revision a7d696f24154f74fedcb586aaa645387836cdff2)
@@ -145,5 +145,6 @@
 	concurrency/stats.cfa \
 	concurrency/stats.hfa \
-	concurrency/stats.hfa
+	concurrency/stats.hfa \
+	concurrency/pthread.cfa
 
 else
Index: libcfa/src/bits/defs.hfa
===================================================================
--- libcfa/src/bits/defs.hfa	(revision c4c8571a1aaa3100966db2d603b3a32857faebdd)
+++ libcfa/src/bits/defs.hfa	(revision a7d696f24154f74fedcb586aaa645387836cdff2)
@@ -21,4 +21,5 @@
 #include <stdint.h>
 #include <assert.h>
+#include <pthread.h>
 
 #define likely(x)   __builtin_expect(!!(x), 1)
@@ -45,4 +46,21 @@
 #endif
 void __cabi_abort( const char fmt[], ... ) __attribute__ (( format(printf, 1, 2), __nothrow__, __leaf__, __noreturn__ ));
+int real_pthread_create(pthread_t *_thread, const pthread_attr_t *attr, void *(*start_routine) (void *), void *arg);
+int real_pthread_join(pthread_t _thread, void **retval);
+pthread_t real_pthread_self(void);
+int real_pthread_mutex_init(pthread_mutex_t *_mutex, const pthread_mutexattr_t *attr);
+int real_pthread_mutex_destroy(pthread_mutex_t *_mutex);
+int real_pthread_mutex_lock(pthread_mutex_t *_mutex);
+int real_pthread_mutex_unlock(pthread_mutex_t *_mutex);
+int real_pthread_mutex_trylock(pthread_mutex_t *_mutex);
+int real_pthread_cond_init(pthread_cond_t *cond, const pthread_condattr_t *attr);
+int real_pthread_cond_wait(pthread_cond_t *cond, pthread_mutex_t *_mutex);
+int real_pthread_cond_signal(pthread_cond_t *cond);
+int real_pthread_cond_broadcast(pthread_cond_t *cond);
+int real_pthread_cond_destroy(pthread_cond_t *cond);
+int real_pthread_attr_init(pthread_attr_t *attr);
+int real_pthread_attr_destroy(pthread_attr_t *attr);
+int real_pthread_attr_setstack( pthread_attr_t *attr, void *stackaddr, size_t stacksize );
+int real_pthread_attr_getstacksize( const pthread_attr_t *attr, size_t *stacksize );
 #ifdef __cforall
 }
Index: libcfa/src/concurrency/clib/cfathread.cfa
===================================================================
--- libcfa/src/concurrency/clib/cfathread.cfa	(revision c4c8571a1aaa3100966db2d603b3a32857faebdd)
+++ libcfa/src/concurrency/clib/cfathread.cfa	(revision a7d696f24154f74fedcb586aaa645387836cdff2)
@@ -172,9 +172,9 @@
 
 		pthread_attr_t attr;
-		if (int ret = pthread_attr_init(&attr); 0 != ret) {
+		if (int ret = real_pthread_attr_init(&attr); 0 != ret) {
 			abort | "failed to create master epoll thread attr: " | ret | strerror(ret);
 		}
 
-		if (int ret = pthread_create(&master_poller, &attr, master_epoll, 0p); 0 != ret) {
+		if (int ret = real_pthread_create(&master_poller, &attr, master_epoll, 0p); 0 != ret) {
 			abort | "failed to create master epoll thread: " | ret | strerror(ret);
 		}
Index: libcfa/src/concurrency/kernel/startup.cfa
===================================================================
--- libcfa/src/concurrency/kernel/startup.cfa	(revision c4c8571a1aaa3100966db2d603b3a32857faebdd)
+++ libcfa/src/concurrency/kernel/startup.cfa	(revision a7d696f24154f74fedcb586aaa645387836cdff2)
@@ -218,5 +218,5 @@
 		( this.runner ){};
 		init( this, "Main Processor", *mainCluster, 0p );
-		kernel_thread = pthread_self();
+		kernel_thread = real_pthread_self();
 
 		runner{ &this };
@@ -769,5 +769,5 @@
 	pthread_attr_t attr;
 
-	check( pthread_attr_init( &attr ), "pthread_attr_init" ); // initialize attribute
+	check( real_pthread_attr_init( &attr ), "pthread_attr_init" ); // initialize attribute
 
 	size_t stacksize = max( PTHREAD_STACK_MIN, DEFAULT_STACK_SIZE );
@@ -796,11 +796,11 @@
 	#endif
 
-	check( pthread_attr_setstack( &attr, stack, stacksize ), "pthread_attr_setstack" );
-	check( pthread_create( pthread, &attr, start, arg ), "pthread_create" );
+	check( real_pthread_attr_setstack( &attr, stack, stacksize ), "pthread_attr_setstack" );
+	check( real_pthread_create( pthread, &attr, start, arg ), "pthread_create" );
 	return stack;
 }
 
 void __destroy_pthread( pthread_t pthread, void * stack, void ** retval ) {
-	int err = pthread_join( pthread, retval );
+	int err = real_pthread_join( pthread, retval );
 	if( err != 0 ) abort("KERNEL ERROR: joining pthread %p caused error %s\n", (void*)pthread, strerror(err));
 
@@ -808,9 +808,9 @@
 		pthread_attr_t attr;
 
-		check( pthread_attr_init( &attr ), "pthread_attr_init" ); // initialize attribute
+		check( real_pthread_attr_init( &attr ), "pthread_attr_init" ); // initialize attribute
 
 		size_t stacksize;
 		// default stack size, normally defined by shell limit
-		check( pthread_attr_getstacksize( &attr, &stacksize ), "pthread_attr_getstacksize" );
+		check( real_pthread_attr_getstacksize( &attr, &stacksize ), "pthread_attr_getstacksize" );
 		assert( stacksize >= PTHREAD_STACK_MIN );
 		stacksize += __page_size;
Index: libcfa/src/concurrency/pthread.cfa
===================================================================
--- libcfa/src/concurrency/pthread.cfa	(revision a7d696f24154f74fedcb586aaa645387836cdff2)
+++ libcfa/src/concurrency/pthread.cfa	(revision a7d696f24154f74fedcb586aaa645387836cdff2)
@@ -0,0 +1,886 @@
+
+#define __cforall_thread__
+#define _GNU_SOURCE
+
+#include "locks.hfa"
+#include <fstream.hfa>
+#include <pthread.h>
+#include <clib/cfathread.h>
+#include <invoke.h>
+#include <bits/stack.hfa>
+
+#ifdef PNOOUTPUT 
+ #define PRINT( stmt ) 
+#else
+ #define PRINT( stmt ) stmt 
+#endif // NOOUTPUT
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wnonnull-compare"
+
+/* pthread key, pthread once*/
+static simple_owner_lock once_lock,key_lock,magic_mutex_check, concurrency_lock;
+//######################### Local Storage Helpers #########################
+
+#define PTHREADS_THR_MAX 256
+#define PTHREAD_KEYS_MAX 1024
+struct Pthread_values{
+    inline Seqable;
+    void* value;
+    bool in_use;
+};
+
+
+Pthread_values *& Back( Pthread_values * n ) {
+    return (Pthread_values *)Back( (Seqable *)n );
+}
+Pthread_values *& Next( Pthread_values * n ) {
+    return (Pthread_values *)Next( (Colable *)n );
+}
+
+struct Pthread_keys{
+    bool in_use;
+    void (*destructor)( void * );
+    Sequence(Pthread_values) threads;
+};  // Pthread keys
+
+static void ?{}(Pthread_keys& k){
+    //sout | "inited";
+    k.threads{};
+}
+
+// Create storage separately to ensure no constructors are called.
+//static char cfa_pthread_keys_storage[sizeof(Pthread_keys) * PTHREAD_KEYS_MAX] __attribute__((aligned (16))) = {0};
+static Pthread_keys cfa_pthread_keys_storage[PTHREAD_KEYS_MAX] __attribute__((aligned (16)));
+
+void init_pthread_storage(){
+    for (int i = 0; i < PTHREAD_KEYS_MAX; i++){
+        cfa_pthread_keys_storage[i]{};
+    }
+}
+
+#define cfa_pthread_keys ((Pthread_keys *)cfa_pthread_keys_storage)
+
+/* Controlling the iterations of destructors for thread-specific data.  */
+#define _POSIX_THREAD_DESTRUCTOR_ITERATIONS	4
+/* Number of iterations this implementation does.  */
+#define PTHREAD_DESTRUCTOR_ITERATIONS	_POSIX_THREAD_DESTRUCTOR_ITERATIONS
+
+//######################### Parallelism Helpers #########################
+
+struct Pthread_kernel_threads{
+    inline Colable;
+    processor p;
+};
+
+Pthread_kernel_threads *& Next( Pthread_kernel_threads * n ) {
+    return (Pthread_kernel_threads *)Next( (Colable *)n );
+}
+
+static Stack(Pthread_kernel_threads) cfa_pthreads_kernel_threads;
+static bool cfa_pthreads_kernel_threads_zero = false;	// set to zero ?
+static int cfa_pthreads_no_kernel_threads = 1;	// number of kernel threads
+
+
+//######################### Cond Helpers #########################
+
+typedef pthread_cond_var(simple_owner_lock) cfa2pthr_cond_var_t;
+
+/* condvar helper routines */
+static void init(pthread_cond_t* pcond){
+    cfa2pthr_cond_var_t* _cond = (cfa2pthr_cond_var_t*)pcond;
+    ?{}(*_cond);
+}
+
+static cfa2pthr_cond_var_t* get(pthread_cond_t* pcond){
+    return (cfa2pthr_cond_var_t*)pcond;
+}
+
+static void destroy(pthread_cond_t* cond){
+    ^?{}(*get(cond));
+}
+
+
+//######################### Mutex Helper #########################
+
+/* mutex helper routines */
+static void mutex_check(pthread_mutex_t* t){
+	// Use double check to improve performance. Check is safe on x86; volatile prevents compiler reordering
+	volatile pthread_mutex_t *const mutex_ = t;
+	// SKULLDUGGERY: not a portable way to access the kind field, /usr/include/x86_64-linux-gnu/bits/pthreadtypes.h
+	int _lock_val = ((pthread_mutex_t *)mutex_)->__data.__lock;
+    //sout | _lock_val ;
+	// kind is a small pthread enumerated type. If it greater than 32, it is a value in an uOwnerlock field.
+	if ( _lock_val == 0 ) {			// static initialized ?
+	    lock(magic_mutex_check);	// race
+	    _lock_val = ((pthread_mutex_t *)mutex_)->__data.__lock;
+	    if ( _lock_val == 0 ) {		// static initialized ?
+		pthread_mutex_init( t, NULL );
+	    } // if
+	    unlock(magic_mutex_check);	// race
+	} // if
+} // mutex_check
+
+
+static void init(pthread_mutex_t* plock){
+    simple_owner_lock* _lock = (simple_owner_lock*)plock;
+    ?{}(*_lock);
+}
+
+static simple_owner_lock* get(pthread_mutex_t* plock){
+    return (simple_owner_lock*)plock;
+}
+
+static void destroy(pthread_mutex_t* plock){
+    ^?{}(*get(plock));
+}
+
+//######################### Attr helpers #########################
+struct cfaPthread_attr_t {								// thread attributes
+		int contentionscope;
+		int detachstate;
+		size_t stacksize;
+		void *stackaddr;
+		int policy;
+		int inheritsched;
+		struct sched_param param;
+} typedef cfaPthread_attr_t;
+
+static const cfaPthread_attr_t default_attrs{
+    0,
+    0,
+    (size_t)65000,
+    (void *)NULL,
+    0,
+    0,
+    {0}
+};
+
+
+/*
+cfaPthread_attr_t default_attrs = {
+    PTHREAD_SCOPE_SYSTEM,
+    PTHREAD_CREATE_JOINABLE,
+    (size_t)DEFAULT_STACK_SIZE,
+    (void *)NULL,
+    0,
+    PTHREAD_EXPLICIT_SCHED,
+    {0}
+};
+*/
+
+
+static cfaPthread_attr_t* get(const pthread_attr_t* attr){
+    return (cfaPthread_attr_t*)attr;
+}
+
+
+//######################### Threads Helper #########################
+
+exception pthread_exit_exp {};
+static vtable(pthread_exit_exp) exp;
+
+thread cfaPthread{
+    cfaPthread_attr_t attr;
+    pthread_t pthreadId;
+    void *joinval;										// pthreads return value
+	pthread_attr_t pthread_attr;						// pthread attributes
+    void *(*start_routine)(void *);                     // routine start
+    void *arg;					                        // thread parameter                           
+    Pthread_values* pthreadData;
+    bool isTerminated;                                  // flag used for tryjoin
+};
+
+/* thread part routines */
+//  cfaPthread entry point
+void main(cfaPthread& _thread) with(_thread){
+    joinval =  start_routine(arg);
+    isTerminated = true;
+}
+
+// generate pthread_t by cfaPthread ptr
+static pthread_t create( cfaPthread *p ) {
+	    return (pthread_t)p;
+}
+
+static cfaPthread *lookup( pthread_t p ){
+    return (cfaPthread*)p;
+}
+
+void pthread_deletespecific_( Pthread_values* values )  { // see uMachContext::invokeTask
+    Pthread_values* value;
+    Pthread_keys* key;
+    bool destcalled = true;
+    if (values != NULL){
+        for ( int attempts = 0; attempts < PTHREAD_DESTRUCTOR_ITERATIONS && destcalled ; attempts += 1 ) {
+            destcalled = false;
+            lock(key_lock);
+            for (int i = 0; i < PTHREAD_KEYS_MAX; i++){
+                // for each valid key
+                if ( values[i].in_use){
+                    value = &values[i];
+                    key = &cfa_pthread_keys[i];
+                    value->in_use = false;
+                    remove(key->threads, *value);
+                    // if  a  key  value  has  a  non-NULL  destructor pointer,  and  the  thread  has  a  non-NULL  value associated with that key, 
+                    // the value of the key is set to NULL, and then the function pointed to is called with the previously associated value as its sole argument.
+                    if (value->value != NULL && key->destructor != NULL){
+                        unlock(key_lock);
+                        key->destructor(value->value); // run destructor
+                        lock(key_lock);
+                        destcalled = true;
+                    }   // if
+                    value->value = NULL;
+                }   // if
+            }   // for
+            unlock(key_lock);
+        }   // for
+        free(values);
+    }   // if
+}
+
+void ^?{}(cfaPthread & mutex t){
+    //^?{}((thread$&)t);
+    Pthread_values* values = t.pthreadData;
+    pthread_deletespecific_(values);
+    PRINT(sout | "thread exited" | t.pthreadId;)
+}
+
+static void ?{}(cfaPthread &t, pthread_t* _thread, const pthread_attr_t * _attr,void *(*start_routine)(void *), void * arg) {
+    
+    // set up user thread stackSize
+    cfaPthread_attr_t * attr = get(_attr);
+    ((thread&)t){ attr ? attr->stacksize: DEFAULT_STACK_SIZE };
+
+    // initialize _thread & cfaPthread id
+    t.pthreadId = create(&t);
+    *_thread = t.pthreadId;
+
+    // if attr null, self attr will be set as default_attrs; else set to attr 
+    t.attr = (attr != NULL ? *attr : default_attrs);
+
+    // init start routine and arguments
+    t.start_routine = start_routine;
+    t.arg = arg;
+    t.pthreadData = NULL;
+}   // not used
+
+
+extern "C"{
+    //######################### Pthread Attrs #########################
+
+    int pthread_attr_init(pthread_attr_t *attr){
+        cfaPthread_attr_t* _attr = get(attr);
+        ?{}(*_attr);
+        *_attr = default_attrs;
+        return 0;
+    }
+    int pthread_attr_destroy(pthread_attr_t *attr){
+        ^?{}(*get(attr));
+        return 0;
+    }
+    
+    int pthread_attr_setscope( pthread_attr_t *attr, int contentionscope ) {
+        get( attr )->contentionscope = contentionscope;
+        return 0;
+    } // pthread_attr_setscope
+
+    int pthread_attr_getscope( const pthread_attr_t *attr, int *contentionscope ) {
+        *contentionscope = get( attr )->contentionscope;
+        return 0;
+    } // pthread_attr_getscope
+
+    int pthread_attr_setdetachstate( pthread_attr_t *attr, int detachstate ) {
+        get( attr )->detachstate = detachstate;
+        return 0;
+    } // pthread_attr_setdetachstate
+
+    int pthread_attr_getdetachstate( const pthread_attr_t *attr, int *detachstate ) {
+        *detachstate = get( attr )->detachstate;
+        return 0;
+    } // pthread_attr_getdetachstate
+
+    int pthread_attr_setstacksize( pthread_attr_t *attr, size_t stacksize ) {
+        get( attr )->stacksize = stacksize;
+        return 0;
+    } // pthread_attr_setstacksize
+
+    int pthread_attr_getstacksize( const pthread_attr_t *attr, size_t *stacksize ) {
+        *stacksize = get( attr )->stacksize;
+        return 0;
+    } // pthread_attr_getstacksize
+
+    int pthread_attr_getguardsize( const pthread_attr_t * /* attr */, size_t * /* guardsize */ ) {
+	    return 0;
+    } // pthread_attr_getguardsize
+
+    int pthread_attr_setguardsize( pthread_attr_t * /* attr */, size_t /* guardsize */ ) {
+	    return 0;
+    } // pthread_attr_setguardsize
+
+    int pthread_attr_setstackaddr( pthread_attr_t *attr, void *stackaddr ) {
+        get( attr )->stackaddr = stackaddr;
+        return 0;
+    } // pthread_attr_setstackaddr
+
+    int pthread_attr_getstackaddr( const pthread_attr_t *attr, void **stackaddr ) {
+        *stackaddr = get( attr )->stackaddr;
+        return 0;
+    } // pthread_attr_getstackaddr
+
+    int pthread_attr_setstack( pthread_attr_t *attr, void *stackaddr, size_t stacksize ) {
+        get( attr )->stackaddr = stackaddr;
+        get( attr )->stacksize = stacksize;
+	    return 0;
+    } // pthread_attr_setstack
+
+    int pthread_attr_getstack( const pthread_attr_t *attr, void **stackaddr, size_t *stacksize ) {
+        *stackaddr = get( attr )->stackaddr;
+        *stacksize = get( attr )->stacksize;
+        return 0;
+    } // pthread_attr_getstack
+
+    // Initialize thread attribute *attr with attributes corresponding to the
+    // already running thread threadID. It shall be called on unitialized attr
+    // and destroyed with pthread_attr_destroy when no longer needed.
+    int pthread_getattr_np( pthread_t threadID, pthread_attr_t *attr ) __THROW { // GNU extension
+        // race condition during copy
+        cfaPthread_attr_t* _attr = get(attr);
+        ?{}(*_attr);
+        if (_attr == NULL){
+            return ENOMEM;
+        }   // if
+        *_attr = lookup( threadID )->attr; // copy all fields
+	    return 0;
+    } // pthread_getattr_np
+
+
+    //######################### Threads #########################
+
+    int pthread_create(pthread_t * _thread, const pthread_attr_t * attr, void *(*start_routine)(void *), void * arg){
+        cfaPthread *t = alloc();
+        (*t){_thread, attr, start_routine, arg};
+        //init_user_pthread(*t, _thread, attr, start_routine, arg);
+        if (t == NULL) return EAGAIN; //no resource
+        return 0;
+    }   //pthread_create_
+
+
+    int pthread_join(pthread_t _thread, void **value_ptr){
+        if (_thread == NULL) return EINVAL;   // if thread is invalid
+        if (_thread == pthread_self()) return EDEADLK;
+        cfaPthread* p = lookup(_thread);    // get user thr pointer    
+        try {
+            join(*p);
+        } catchResume (ThreadCancelled(cfaPthread) * cancel) {} // if thread called pthread_exit
+        if (value_ptr != NULL ) *value_ptr = p->joinval;   // fetch result
+        delete(p);
+        return 0;
+    }   //pthread_join_
+
+    int pthread_tryjoin_np(pthread_t _thread, void **value_ptr){
+        if (_thread == NULL) return EINVAL;  // if thread is invalid
+        if (_thread == pthread_self()) return EDEADLK;
+        cfaPthread* p = lookup(_thread);
+        if (!p->isTerminated) return EBUSY; // thread not finished ?
+        join( *p );
+        if (value_ptr != NULL ) *value_ptr = p->joinval;
+        delete(p);
+        return 0;
+    }   //pthread_join_
+
+    pthread_t pthread_self(void){
+        return (pthread_t)((char*)active_thread()-(sizeof(cfaPthread)-sizeof(thread$)));
+    }   //pthread_self_
+
+    void pthread_exit(void * status){
+        pthread_t pid = pthread_self();
+        cfaPthread* _thread = (cfaPthread*)pid;
+        _thread->joinval = status;  // set return value
+        _thread->isTerminated = 1;  // set terminated flag
+        cancel_stack((pthread_exit_exp){&exp});
+    }   //pthread_exit_
+
+
+
+    //######################### Mutex #########################
+
+    int pthread_mutex_init(pthread_mutex_t *_mutex, const pthread_mutexattr_t *attr){
+        if (_mutex == NULL) return EINVAL;
+
+        init(_mutex);
+        return 0;
+    }   //pthread_mutex_init_
+
+
+    int pthread_mutex_destroy(pthread_mutex_t *_mutex){
+        if (_mutex == NULL){
+             return EINVAL;
+        }   // if mutex invalid
+        simple_owner_lock* _lock = get(_mutex);
+        if (_lock->owner != NULL){
+            return EBUSY;
+        }
+        destroy(_mutex);
+        return 0;
+    }   //pthread_mutex_destroy_
+
+    int pthread_mutex_lock(pthread_mutex_t *_mutex){
+        if (_mutex == NULL) {
+            return EINVAL;
+        }   // if mutex invalid
+        mutex_check(_mutex);
+        simple_owner_lock* _lock = get(_mutex);
+        lock(*_lock);
+        return 0;
+    }   //pthread_mutex_lock_
+
+    int pthread_mutex_unlock(pthread_mutex_t *_mutex){
+        if (_mutex == NULL) {
+            return EINVAL;
+        } // invalid mutex
+        simple_owner_lock* _lock = get(_mutex);
+        if (_lock->owner != active_thread()){
+            return EPERM;
+        } // current thread does not hold the mutex
+        unlock(*_lock);
+        return 0;
+    }   //pthread_mutex_unlock_
+
+    int pthread_mutex_trylock(pthread_mutex_t *_mutex){
+        if (_mutex == NULL) {
+            return EINVAL;
+        }   // if mutex invalid
+        simple_owner_lock* _lock = get(_mutex);
+        if (_lock->owner != active_thread() && _lock->owner != NULL){
+            return EBUSY;
+        }   // if mutex is owned
+        lock(*_lock);
+        return 0;
+    }   //pthread_mutex_trylock_
+
+    //######################### Conditional Variable #########################
+
+    /* conditional variable routines */
+    int pthread_cond_init(pthread_cond_t *cond, const pthread_condattr_t *attr){
+        if (cond == NULL) return EINVAL;
+        init(cond);
+        return 0;
+    }
+    int pthread_cond_wait(pthread_cond_t *cond, pthread_mutex_t *_mutex){
+        if (cond == NULL || _mutex == NULL){
+            return EINVAL;
+        }   // invalid cond
+        wait(*get(cond), *get(_mutex));
+        return 0;
+    }
+
+    int pthread_cond_timedwait(pthread_cond_t * cond, pthread_mutex_t * _mutex, const struct timespec * abstime){
+        if (cond == NULL || _mutex == NULL){
+            return EINVAL;
+        }   // invalid cond
+        wait(*get(cond), *get(_mutex), *abstime);
+        return 0;
+    }
+
+
+    int pthread_cond_signal(pthread_cond_t *cond){
+        if (cond == NULL){
+            return EINVAL;
+        }   // invalid cond
+        return notify_one(*get(cond));
+    }
+    int pthread_cond_broadcast(pthread_cond_t *cond){
+        if (cond == NULL){
+            return EINVAL;
+        }   // invalid cond
+        return notify_all(*get(cond));
+    }
+    int pthread_cond_destroy(pthread_cond_t *cond){
+        if (cond == NULL){
+            return EINVAL;
+        }   // invalid cond
+        destroy(cond);
+        return 0;
+    }
+
+
+
+    //######################### Local storage #########################
+
+    int pthread_once(pthread_once_t *once_control, void (*init_routine)(void)){
+        lock(once_lock);
+        if ( *((int *)once_control) == 0 ) {
+            init_routine();
+            *((int *)once_control) = 1;
+	    } // if
+        unlock(once_lock);
+        return 0;
+    }
+
+    int pthread_key_create( pthread_key_t *key, void (*destructor)( void * ) ){
+        lock(key_lock);
+        for ( int i = 0; i < PTHREAD_KEYS_MAX; i += 1 ) {
+            if ( ! cfa_pthread_keys[i].in_use ) {
+                cfa_pthread_keys[i].in_use = true;
+                cfa_pthread_keys[i].destructor = destructor;
+                unlock( key_lock );
+                *key = i;
+                return 0;
+            } // if
+        } // for
+        unlock(key_lock);
+        return EAGAIN;
+    }   // pthread_key_create
+
+    int pthread_key_delete( pthread_key_t key ){
+        lock(key_lock);
+        if ( key >= PTHREAD_KEYS_MAX || ! cfa_pthread_keys[key].in_use ) {
+            unlock( key_lock );
+            return EINVAL;
+        } // if
+        cfa_pthread_keys[key].in_use = false;
+        cfa_pthread_keys[key].destructor = NULL;
+
+        // Remove key from all threads with a value.
+        Pthread_values& p;
+        Sequence(Pthread_values)& head = cfa_pthread_keys[key].threads;
+        for ( SeqIter(Pthread_values) iter = { head }; iter | p; ) {
+            remove(head, p);
+            p.in_use = false;
+        }
+        unlock(key_lock);
+        return 0;
+    }   // pthread_key_delete
+
+    int pthread_setspecific( pthread_key_t key, const void *value ){
+        // get current thread
+        cfaPthread* t = lookup(pthread_self());
+        // if current thread's pthreadData is NULL; initialize it
+        Pthread_values* values;
+        if (t->pthreadData == NULL){
+            values = anew( PTHREAD_KEYS_MAX);
+            t->pthreadData = values;
+            for (int i = 0;i < PTHREAD_KEYS_MAX; i++){
+                t->pthreadData[i].in_use = false;
+            }   // for
+        }   else {
+            values = t->pthreadData;
+        }   // if
+        // find corresponding key and set value
+        lock(key_lock);
+        // if invalid key
+        if ( key >= PTHREAD_KEYS_MAX || ! cfa_pthread_keys[key].in_use ) {
+            unlock( key_lock );
+            return EINVAL;
+        } // if
+        Pthread_values &entry = values[key];
+        if ( ! entry.in_use ) {
+            entry.in_use = true;
+            add(cfa_pthread_keys[key].threads, entry);
+        } // if
+        entry.value = (void *)value;
+        unlock(key_lock);
+        return 0;
+    } //pthread_setspecific
+
+    void* pthread_getspecific(pthread_key_t key){
+        if (key >= PTHREAD_KEYS_MAX || ! cfa_pthread_keys[key].in_use) return NULL;
+
+        // get current thread
+        cfaPthread* t = lookup(pthread_self());
+        if (t->pthreadData == NULL) return NULL;
+        lock(key_lock);
+        Pthread_values &entry = ((Pthread_values *)t->pthreadData)[key];
+        if ( ! entry.in_use ) {
+            unlock( key_lock );
+            return NULL;
+        } // if
+	    void *value = entry.value;
+        unlock(key_lock);
+
+        return value;
+    }   //pthread_get_specific
+
+    //######################### Parallelism #########################
+    void pthread_delete_kernel_threads_() {	// see uMain::~uMain
+        Pthread_kernel_threads& p;
+        for ( StackIter(Pthread_kernel_threads) iter = {cfa_pthreads_kernel_threads}; iter | p; ) {
+            delete(&p);
+        } // for
+    } // pthread_delete_kernel_threads_
+
+    int pthread_getconcurrency( void ) {	// XOPEN extension
+	    return cfa_pthreads_kernel_threads_zero ? 0 : cfa_pthreads_no_kernel_threads;
+    } // pthread_getconcurrency
+
+    int pthread_setconcurrency( int new_level ) { // XOPEN extension
+      if ( new_level < 0 ) return EINVAL;
+      if ( new_level == 0 ) {
+        cfa_pthreads_kernel_threads_zero = true;	// remember set to zero, but ignore
+        return 0;					// do not do kernel thread management
+      } // exit
+      cfa_pthreads_kernel_threads_zero = false;
+      lock( concurrency_lock );
+      for ( ; new_level > cfa_pthreads_no_kernel_threads; cfa_pthreads_no_kernel_threads += 1 ) { // add processors ?
+        push(cfa_pthreads_kernel_threads, *new() );
+      } // for
+      for ( ; new_level < cfa_pthreads_no_kernel_threads; cfa_pthreads_no_kernel_threads -= 1 ) { // remove processors ?
+        delete(&pop(cfa_pthreads_kernel_threads));
+      } // for
+      unlock( concurrency_lock );
+      return 0;
+    } // pthread_setconcurrency
+
+    //######################### Scheduling #########################
+
+
+    int pthread_setschedparam( pthread_t /* thread */, int /* policy */, const struct sched_param * /* param */ ) __THROW {
+	abort( "pthread_setschedparam : not implemented" );
+	return 0;
+    } // pthread_setschedparam
+
+    int pthread_getschedparam( pthread_t /* thread */, int */* policy */, struct sched_param * /* param */ ) __THROW {
+	abort( "pthread_getschedparam : not implemented" );
+	return 0;
+    } // pthread_getschedparam
+
+     //######################### Mutex Attr #########################
+
+    int pthread_mutexattr_init( pthread_mutexattr_t * /* attr */ ) __THROW {
+	return 0;
+    } // pthread_mutexattr_init
+
+    int pthread_mutexattr_destroy( pthread_mutexattr_t * /* attr */ ) __THROW {
+	return 0;
+    } // pthread_mutexattr_destroy
+
+    int pthread_mutexattr_setpshared( pthread_mutexattr_t * /* attr */, int /* pshared */ ) __THROW {
+	return 0;
+    } // pthread_mutexattr_setpshared
+
+    int pthread_mutexattr_getpshared( const pthread_mutexattr_t * /* attr */, int * /* pshared */ ) __THROW {
+	return 0;
+    } // pthread_mutexattr_getpshared
+
+    int pthread_mutexattr_setprotocol( pthread_mutexattr_t * /* attr */, int /* protocol */ ) __THROW {
+	return 0;
+    } // pthread_mutexattr_setprotocol
+
+    int pthread_mutexattr_getprotocol( const pthread_mutexattr_t * /* attr */, int * /* protocol */ ) __THROW {
+	return 0;
+    } // pthread_mutexattr_getprotocol
+
+    int pthread_mutexattr_setprioceiling( pthread_mutexattr_t * /* attr */, int /* prioceiling */ ) __THROW {
+	return 0;
+    } // pthread_mutexattr_setprioceiling
+
+    int pthread_mutexattr_getprioceiling( const pthread_mutexattr_t * /* attr */, int * /* ceiling */ ) __THROW {
+	return 0;
+    } // pthread_mutexattr_getprioceiling
+
+    int pthread_mutex_setprioceiling( pthread_mutex_t * /* mutex */, int /* prioceiling */, int * /* old_ceiling */ ) __THROW {
+	return 0;
+    } // pthread_mutex_setprioceiling
+
+    int pthread_mutex_getprioceiling( const pthread_mutex_t * /* mutex */, int * /* ceiling */ ) __THROW {
+	return 0;
+    } // pthread_mutex_getprioceiling
+
+    int pthread_mutexattr_gettype( __const pthread_mutexattr_t * __restrict /* __attr */, int * __restrict /* __kind */ ) __THROW {
+	return 0;
+    } // pthread_mutexattr_gettype
+
+    int pthread_mutexattr_settype( pthread_mutexattr_t * /* __attr */, int /* __kind */ ) __THROW {
+	return 0;
+    } // pthread_mutexattr_settype
+
+    //######################### Mutex #########################
+
+    int pthread_mutex_timedlock( pthread_mutex_t *__restrict /* __mutex */, __const struct timespec *__restrict /* __abstime */ ) __THROW {
+	    abort( "pthread_mutex_timedlock" );
+    } // pthread_mutex_timedlock
+
+    //######################### Condition #########################
+
+    int pthread_condattr_getclock( __const pthread_condattr_t * __restrict /* __attr */, __clockid_t *__restrict /* __clock_id */ ) __THROW {
+	    abort( "pthread_condattr_getclock" );
+    } // pthread_condattr_getclock
+
+    int pthread_condattr_setclock( pthread_condattr_t * /* __attr */, __clockid_t /* __clock_id */ ) __THROW {
+	    abort( "pthread_condattr_setclock" );
+    } // pthread_condattr_setclock
+
+    //######################### Spinlock #########################
+
+    int pthread_spin_init( pthread_spinlock_t * /* __lock */, int /*__pshared */ ) __THROW {
+	    abort( "pthread_spin_init" );
+    } // pthread_spin_init
+
+    int pthread_spin_destroy( pthread_spinlock_t * /* __lock */ ) __THROW {
+	    abort( "pthread_spin_destroy" );
+    } // pthread_spin_destroy
+
+    int pthread_spin_lock( pthread_spinlock_t * /* __lock */ ) __THROW {
+	    abort( "pthread_spin_lock" );
+    } // pthread_spin_lock
+
+    int pthread_spin_trylock( pthread_spinlock_t * /* __lock */ ) __THROW {
+	    abort( "pthread_spin_trylock" );
+    } // pthread_spin_trylock
+
+    int pthread_spin_unlock( pthread_spinlock_t * /* __lock */ ) __THROW {
+	    abort( "pthread_spin_unlock" );
+    } // pthread_spin_unlock
+
+    //######################### Barrier #########################
+
+    int pthread_barrier_init( pthread_barrier_t *__restrict /* __barrier */, __const pthread_barrierattr_t *__restrict /* __attr */, unsigned int /* __count */ ) __THROW {
+	    abort( "pthread_barrier_init" );
+    } // pthread_barrier_init
+
+    int pthread_barrier_destroy( pthread_barrier_t * /* __barrier */ ) __THROW {
+	    abort( "pthread_barrier_destroy" );
+    } // pthread_barrier_destroy
+
+    int pthread_barrier_wait( pthread_barrier_t * /* __barrier */ ) __THROW {
+	    abort( "pthread_barrier_wait" );
+    } // pthread_barrier_wait
+
+    int pthread_barrierattr_init( pthread_barrierattr_t * /* __attr */ ) __THROW {
+	    abort( "pthread_barrierattr_init" );
+    } // pthread_barrierattr_init
+
+    int pthread_barrierattr_destroy( pthread_barrierattr_t * /* __attr */ ) __THROW {
+	    abort( "pthread_barrierattr_destroy" );
+    } // pthread_barrierattr_destroy
+
+    int pthread_barrierattr_getpshared( __const pthread_barrierattr_t * __restrict /* __attr */, int *__restrict /* __pshared */ ) __THROW {
+	    abort( "pthread_barrierattr_getpshared" );
+    } // pthread_barrierattr_getpshared
+
+    int pthread_barrierattr_setpshared( pthread_barrierattr_t * /* __attr */, int /* __pshared */ ) __THROW {
+	    abort( "pthread_barrierattr_setpshared" );
+    } // pthread_barrierattr_setpshared
+
+    //######################### Clock #########################
+
+    int pthread_getcpuclockid( pthread_t /* __thread_id */, __clockid_t * /* __clock_id */ ) __THROW {
+	    abort( "pthread_getcpuclockid" );
+    } // pthread_getcpuclockid
+
+    // pthread_atfork()
+
+// UNIX98
+
+    //######################### Read/Write #########################
+
+    int pthread_rwlock_init( pthread_rwlock_t *__restrict /* __rwlock */, __const pthread_rwlockattr_t *__restrict /* __attr */ ) __THROW {
+	    abort( "pthread_rwlock_init" );
+    } // pthread_rwlock_init
+
+    int pthread_rwlock_destroy( pthread_rwlock_t * /* __rwlock */ ) __THROW {
+	    abort( "pthread_rwlock_destroy" );
+    } // pthread_rwlock_destroy
+
+    int pthread_rwlock_rdlock( pthread_rwlock_t * /* __rwlock */ ) __THROW {
+	    abort( "pthread_rwlock_rdlock" );
+    } // pthread_rwlock_rdlock
+
+    int pthread_rwlock_tryrdlock( pthread_rwlock_t * /* __rwlock */ ) __THROW {
+	    abort( "pthread_rwlock_tryrdlock" );
+    } // pthread_rwlock_tryrdlock
+
+    int pthread_rwlock_wrlock( pthread_rwlock_t * /* __rwlock */ ) __THROW {
+	    abort( "pthread_rwlock_wrlock" );
+    } // pthread_rwlock_wrlock
+
+    int pthread_rwlock_trywrlock( pthread_rwlock_t * /* __rwlock */ ) __THROW {
+	    abort( "pthread_rwlock_trywrlock" );
+    } // pthread_rwlock_trywrlock
+
+    int pthread_rwlock_unlock( pthread_rwlock_t * /* __rwlock */ ) __THROW {
+	    abort( "pthread_rwlock_unlock" );
+    } // pthread_rwlock_unlock
+
+    int pthread_rwlockattr_init( pthread_rwlockattr_t * /* __attr */ ) __THROW {
+	    abort( "pthread_rwlockattr_init" );
+    } // pthread_rwlockattr_init
+
+    int pthread_rwlockattr_destroy( pthread_rwlockattr_t * /*__attr */ ) __THROW {
+	    abort( "pthread_rwlockattr_destroy" );
+    } // pthread_rwlockattr_destroy
+
+    int pthread_rwlockattr_getpshared( __const pthread_rwlockattr_t * __restrict /* __attr */, int *__restrict /* __pshared */ ) __THROW {
+	    abort( "pthread_rwlockattr_getpshared" );
+    } // pthread_rwlockattr_getpshared
+
+    int pthread_rwlockattr_setpshared( pthread_rwlockattr_t * /* __attr */, int /* __pshared */ ) __THROW {
+	    abort( "pthread_rwlockattr_setpshared" );
+    } // pthread_rwlockattr_setpshared
+
+    int pthread_rwlockattr_getkind_np( __const pthread_rwlockattr_t * /* __attr */, int * /* __pref */ ) __THROW {
+	    abort( "pthread_rwlockattr_getkind_np" );
+    } // pthread_rwlockattr_getkind_np
+
+    int pthread_rwlockattr_setkind_np( pthread_rwlockattr_t * /* __attr */, int /* __pref */ ) __THROW {
+	    abort( "pthread_rwlockattr_setkind_np" );
+    } // pthread_rwlockattr_setkind_np
+
+// UNIX98 + XOPEN
+
+    int pthread_rwlock_timedrdlock( pthread_rwlock_t *__restrict  /* __rwlock */, __const struct timespec *__restrict /* __abstime */ ) __THROW {
+	    abort( "pthread_rwlock_timedrdlock" );
+    } // pthread_rwlock_timedrdlock
+
+    int pthread_rwlock_timedwrlock( pthread_rwlock_t *__restrict  /* __rwlock */, __const struct timespec *__restrict /* __abstime */ ) __THROW {
+	    abort( "pthread_rwlock_timedwrlock" );
+    } // pthread_rwlock_timedwrlock
+
+// GNU
+
+    //######################### Parallelism #########################
+
+    int pthread_setaffinity_np( pthread_t /* __th */, size_t /* __cpusetsize */, __const cpu_set_t * /* __cpuset */ ) __THROW {
+	    abort( "pthread_setaffinity_np" );
+    } // pthread_setaffinity_np
+
+    int pthread_getaffinity_np( pthread_t /* __th */, size_t /* __cpusetsize */, cpu_set_t * /* __cpuset */ ) __THROW {
+	    abort( "pthread_getaffinity_np" );
+    } // pthread_getaffinity_np
+
+    int pthread_attr_setaffinity_np( pthread_attr_t * /* __attr */, size_t /* __cpusetsize */, __const cpu_set_t * /* __cpuset */ ) __THROW {
+	    abort( "pthread_attr_setaffinity_np" );
+    } // pthread_attr_setaffinity_np
+
+    int pthread_attr_getaffinity_np( __const pthread_attr_t * /* __attr */, size_t /* __cpusetsize */, cpu_set_t * /* __cpuset */ ) __THROW {
+	    abort( "pthread_attr_getaffinity_np" );
+    } // pthread_attr_getaffinity_np
+
+    //######################### Cancellation #########################
+
+    void _pthread_cleanup_push_defer( struct _pthread_cleanup_buffer * /* __buffer */, void( * /* __routine */ )( void * ), void * /* __arg */ ) __THROW {
+	    abort( "_pthread_cleanup_push_defer" );
+    } // _pthread_cleanup_push_defer
+
+    void _pthread_cleanup_pop_restore( struct _pthread_cleanup_buffer * /* __buffer */, int /* __execute */ ) __THROW {
+	    abort( "_pthread_cleanup_pop_restore" );
+    } // _pthread_cleanup_pop_res
+
+    int pthread_cancel( pthread_t threadID ) __THROW {
+        abort("pthread cancel not implemented");
+	    return 0;
+    } // pthread_cancel
+
+    int pthread_setcancelstate( int state, int *oldstate ) __THROW {
+        abort("pthread_setcancelstate not implemented");
+	    return 0;
+    } // pthread_setcancelstate
+
+    int pthread_setcanceltype( int type, int *oldtype ) __THROW {
+        abort("pthread_setcanceltype not implemented");
+	    return 0;
+    } // pthread_setcanceltype
+}
+#pragma GCC diagnostic pop
+
Index: libcfa/src/interpose.cfa
===================================================================
--- libcfa/src/interpose.cfa	(revision c4c8571a1aaa3100966db2d603b3a32857faebdd)
+++ libcfa/src/interpose.cfa	(revision a7d696f24154f74fedcb586aaa645387836cdff2)
@@ -46,4 +46,5 @@
 
 	static void * library;
+	static void * pthread_library;
 	if ( ! library ) {
 		#if defined( RTLD_NEXT )
@@ -58,4 +59,16 @@
 		#endif
 	} // if
+	if ( ! pthread_library ) {
+		#if defined( RTLD_NEXT )
+			pthread_library = RTLD_NEXT;
+		#else
+			// missing RTLD_NEXT => must hard-code library name, assuming libstdc++
+			pthread_library = dlopen( "libpthread.so", RTLD_LAZY );
+			error = dlerror();
+			if ( error ) {
+				abort( "interpose_symbol : failed to open libpthread, %s\n", error );
+			}
+		#endif
+	} // if
 
 	union { generic_fptr_t fptr; void * ptr; } originalFunc;
@@ -72,5 +85,11 @@
 
 	error = dlerror();
-	if ( error ) abort( "interpose_symbol : internal error, %s\n", error );
+	if ( error ) {
+		originalFunc.ptr = dlsym( pthread_library, symbol );
+		error = dlerror();
+		if (error){
+			abort( "interpose_symbol : internal error, %s\n", error );
+		}	// if
+	}	// if
 
 	return originalFunc.fptr;
@@ -92,4 +111,21 @@
 	void (* exit)( int ) __attribute__(( __noreturn__ ));
 	void (* abort)( void ) __attribute__(( __noreturn__ ));
+	typeof(pthread_create) pthread_create;
+	typeof(pthread_join) pthread_join;
+	typeof(pthread_self) pthread_self;
+	typeof(pthread_cond_init) pthread_cond_init;
+	typeof(pthread_cond_destroy) pthread_cond_destroy;
+	typeof(pthread_cond_signal) pthread_cond_signal;
+	typeof(pthread_cond_broadcast) pthread_cond_broadcast;
+	typeof(pthread_cond_wait) pthread_cond_wait;
+	typeof(pthread_mutex_init) pthread_mutex_init;
+	typeof(pthread_mutex_lock) pthread_mutex_lock;
+	typeof(pthread_mutex_trylock) pthread_mutex_trylock;
+	typeof(pthread_mutex_unlock) pthread_mutex_unlock;
+	typeof(pthread_mutex_destroy) pthread_mutex_destroy;
+	typeof(pthread_attr_init ) pthread_attr_init;
+	typeof(pthread_attr_setstack ) pthread_attr_setstack;
+	typeof(pthread_attr_destroy) pthread_attr_destroy;
+	typeof(pthread_attr_getstacksize) pthread_attr_getstacksize;
 } __cabi_libc;
 
@@ -107,4 +143,20 @@
 		INTERPOSE_LIBC( abort, version );
 		INTERPOSE_LIBC( exit , version );
+		INTERPOSE_LIBC( pthread_create , version );
+		INTERPOSE_LIBC( pthread_join , version );
+		INTERPOSE_LIBC( pthread_self , version );
+		INTERPOSE_LIBC( pthread_mutex_init , version );
+		INTERPOSE_LIBC( pthread_mutex_lock , version );
+		INTERPOSE_LIBC( pthread_mutex_unlock , version );
+		INTERPOSE_LIBC( pthread_mutex_destroy , version );
+		INTERPOSE_LIBC( pthread_cond_init , version );
+		INTERPOSE_LIBC( pthread_cond_destroy , version );
+		INTERPOSE_LIBC( pthread_cond_signal , version );
+		INTERPOSE_LIBC( pthread_cond_broadcast , version );
+		INTERPOSE_LIBC( pthread_cond_wait , version );
+		INTERPOSE_LIBC( pthread_attr_init , version );
+		INTERPOSE_LIBC( pthread_attr_destroy , version );
+		INTERPOSE_LIBC( pthread_attr_setstack , version );
+		INTERPOSE_LIBC( pthread_attr_getstacksize , version );
 #pragma GCC diagnostic pop
 
@@ -168,4 +220,60 @@
 		__cabi_libc.exit( status );
 	}
+
+	libcfa_public int real_pthread_create(pthread_t *_thread, const pthread_attr_t *attr, void *(*start_routine) (void *), void *arg){
+		return __cabi_libc.pthread_create(_thread, attr, start_routine, arg);
+	}
+
+	libcfa_public int real_pthread_join(pthread_t _thread, void **retval){
+		return __cabi_libc.pthread_join(_thread, retval);
+	}
+
+	libcfa_public pthread_t real_pthread_self(void){
+		return __cabi_libc.pthread_self();
+	}
+	/* mutex Default attr is PTHREAD_MUTEX_RECURSIVE*/
+	libcfa_public int real_pthread_mutex_init(pthread_mutex_t *_mutex, const pthread_mutexattr_t *attr){
+		return __cabi_libc.pthread_mutex_init(_mutex, attr);
+	}
+	libcfa_public int real_pthread_mutex_destroy(pthread_mutex_t *_mutex){
+		return __cabi_libc.pthread_mutex_destroy(_mutex);
+	}
+	libcfa_public int real_pthread_mutex_lock(pthread_mutex_t *_mutex){
+		return __cabi_libc.pthread_mutex_lock(_mutex);
+	}
+	libcfa_public int real_pthread_mutex_unlock(pthread_mutex_t *_mutex){
+		return  __cabi_libc.pthread_mutex_unlock(_mutex);
+	}
+	libcfa_public int real_pthread_mutex_trylock(pthread_mutex_t *_mutex){
+		return __cabi_libc.pthread_mutex_trylock(_mutex);
+	}
+	libcfa_public int real_pthread_cond_init(pthread_cond_t *cond, const pthread_condattr_t *attr){
+		return __cabi_libc.pthread_cond_init(cond, attr);
+	}
+	libcfa_public int real_pthread_cond_wait(pthread_cond_t *cond, pthread_mutex_t *_mutex){
+		return __cabi_libc.pthread_cond_wait(cond, _mutex);
+	}
+	libcfa_public int real_pthread_cond_signal(pthread_cond_t *cond){
+		return __cabi_libc.pthread_cond_signal(cond);
+	}
+	libcfa_public int real_pthread_cond_broadcast(pthread_cond_t *cond){
+		return __cabi_libc.pthread_cond_broadcast(cond);
+	}
+	libcfa_public int real_pthread_cond_destroy(pthread_cond_t *cond){
+		return __cabi_libc.pthread_cond_destroy(cond);
+	}
+	libcfa_public int real_pthread_attr_init(pthread_attr_t *attr){
+		return __cabi_libc.pthread_attr_init(attr);
+	}
+	libcfa_public int real_pthread_attr_destroy(pthread_attr_t *attr){
+		return __cabi_libc.pthread_attr_destroy(attr);
+	}
+	libcfa_public int real_pthread_attr_setstack( pthread_attr_t *attr, void *stackaddr, size_t stacksize ){
+		return __cabi_libc.pthread_attr_setstack(attr, stackaddr, stacksize);
+	}
+	libcfa_public int pthread_attr_getstacksize( const pthread_attr_t *attr, size_t *stacksize ){
+		return __cabi_libc.pthread_attr_getstacksize(attr, stacksize);
+	}
+
 }
 
