Index: libcfa/src/bits/debug.hfa
===================================================================
--- libcfa/src/bits/debug.hfa	(revision e3fea427569271c0c15fd2ca6d3296f213856ead)
+++ libcfa/src/bits/debug.hfa	(revision b7d6a36126af369a09bd3958b2fe17c537999f77)
@@ -37,4 +37,5 @@
 	#include <stdarg.h>
 	#include <stdio.h>
+	#include <unistd.h>
 
 	extern void __cfaabi_bits_write( int fd, const char buffer[], int len );
@@ -49,13 +50,15 @@
 #endif
 
+// #define __CFA_DEBUG_PRINT__
+
 #ifdef __CFA_DEBUG_PRINT__
 	#define __cfaabi_dbg_write( buffer, len )         __cfaabi_bits_write( STDERR_FILENO, buffer, len )
 	#define __cfaabi_dbg_acquire()                    __cfaabi_bits_acquire()
 	#define __cfaabi_dbg_release()                    __cfaabi_bits_release()
-	#define __cfaabi_dbg_print_safe(...)              __cfaabi_bits_print_safe   (__VA_ARGS__)
-	#define __cfaabi_dbg_print_nolock(...)            __cfaabi_bits_print_nolock (__VA_ARGS__)
-	#define __cfaabi_dbg_print_buffer(...)            __cfaabi_bits_print_buffer (__VA_ARGS__)
-	#define __cfaabi_dbg_print_buffer_decl(...)       char __dbg_text[256]; int __dbg_len = snprintf( __dbg_text, 256, __VA_ARGS__ ); __cfaabi_bits_write( __dbg_text, __dbg_len );
-	#define __cfaabi_dbg_print_buffer_local(...)      __dbg_len = snprintf( __dbg_text, 256, __VA_ARGS__ ); __cfaabi_dbg_write( __dbg_text, __dbg_len );
+	#define __cfaabi_dbg_print_safe(...)              __cfaabi_bits_print_safe  ( STDERR_FILENO, __VA_ARGS__ )
+	#define __cfaabi_dbg_print_nolock(...)            __cfaabi_bits_print_nolock( STDERR_FILENO, __VA_ARGS__ )
+	#define __cfaabi_dbg_print_buffer(...)            __cfaabi_bits_print_buffer( STDERR_FILENO, __VA_ARGS__ )
+	#define __cfaabi_dbg_print_buffer_decl(...)       char __dbg_text[256]; int __dbg_len = snprintf( __dbg_text, 256, __VA_ARGS__ ); __cfaabi_bits_write( STDERR_FILENO, __dbg_text, __dbg_len );
+	#define __cfaabi_dbg_print_buffer_local(...)      __dbg_len = snprintf( __dbg_text, 256, __VA_ARGS__ ); __cfaabi_bits_write( STDERR_FILENO, __dbg_text, __dbg_len );
 #else
 	#define __cfaabi_dbg_write(...)               ((void)0)
Index: libcfa/src/bits/defs.hfa
===================================================================
--- libcfa/src/bits/defs.hfa	(revision e3fea427569271c0c15fd2ca6d3296f213856ead)
+++ libcfa/src/bits/defs.hfa	(revision b7d6a36126af369a09bd3958b2fe17c537999f77)
@@ -54,2 +54,74 @@
     return ( (unsigned long long)lo)|( ((unsigned long long)hi)<<32 );
 }
+
+// #define __CFA_NO_BIT_TEST_AND_SET__
+
+#if defined( __i386 )
+static inline bool __atomic_bts(volatile unsigned long int * target, unsigned long int bit ) {
+	#if defined(__CFA_NO_BIT_TEST_AND_SET__)
+        unsigned long int mask = 1ul << bit;
+        unsigned long int ret = __atomic_fetch_or(target, mask, (int)__ATOMIC_RELAXED);
+        return (ret & mask) != 0;
+    #else
+        int result = 0;
+        asm volatile(
+            "LOCK btsl %[bit], %[target]\n\t"
+            : "=@ccc" (result)
+            : [target] "m" (*target), [bit] "r" (bit)
+        );
+        return result != 0;
+    #endif
+}
+
+static inline bool __atomic_btr(volatile unsigned long int * target, unsigned long int bit ) {
+	#if defined(__CFA_NO_BIT_TEST_AND_SET__)
+        unsigned long int mask = 1ul << bit;
+        unsigned long int ret = __atomic_fetch_and(target, ~mask, (int)__ATOMIC_RELAXED);
+        return (ret & mask) != 0;
+	#else
+        int result = 0;
+        asm volatile(
+            "LOCK btrl %[bit], %[target]\n\t"
+            :"=@ccc" (result)
+            : [target] "m" (*target), [bit] "r" (bit)
+        );
+        return result != 0;
+    #endif
+}
+#elif defined( __x86_64 )
+static inline bool __atomic_bts(volatile unsigned long long int * target, unsigned long long int bit ) {
+	#if defined(__CFA_NO_BIT_TEST_AND_SET__)
+        unsigned long long int mask = 1ul << bit;
+        unsigned long long int ret = __atomic_fetch_or(target, mask, (int)__ATOMIC_RELAXED);
+        return (ret & mask) != 0;
+    #else
+        int result = 0;
+        asm volatile(
+            "LOCK btsq %[bit], %[target]\n\t"
+            : "=@ccc" (result)
+            : [target] "m" (*target), [bit] "r" (bit)
+        );
+        return result != 0;
+    #endif
+}
+
+static inline bool __atomic_btr(volatile unsigned long long int * target, unsigned long long int bit ) {
+	#if defined(__CFA_NO_BIT_TEST_AND_SET__)
+        unsigned long long int mask = 1ul << bit;
+        unsigned long long int ret = __atomic_fetch_and(target, ~mask, (int)__ATOMIC_RELAXED);
+        return (ret & mask) != 0;
+	#else
+        int result = 0;
+        asm volatile(
+            "LOCK btrq %[bit], %[target]\n\t"
+            :"=@ccc" (result)
+            : [target] "m" (*target), [bit] "r" (bit)
+        );
+        return result != 0;
+    #endif
+}
+#elif defined( __ARM_ARCH )
+    #error __atomic_bts and __atomic_btr not implemented for arm
+#else
+	#error uknown hardware architecture
+#endif
