Index: libcfa/src/concurrency/kernel/fwd.hfa
===================================================================
--- libcfa/src/concurrency/kernel/fwd.hfa	(revision e660761701e94bf2b2ae0130750e911d4122366f)
+++ libcfa/src/concurrency/kernel/fwd.hfa	(revision 3e2b9c93dff631bddb430d876a83bcc3bafe9d16)
@@ -14,9 +14,11 @@
 //
 
+#pragma once
+
 #include "bits/defs.hfa"
 #include "bits/debug.hfa"
 
-#if !defined(__cforall_thread__)
-#error non-thread source file includes kernel/fwd.hfa
+#ifdef __cforall
+#include "bits/random.hfa"
 #endif
 
@@ -25,7 +27,11 @@
 struct cluster;
 
+enum __Preemption_Reason { __NO_PREEMPTION, __ALARM_PREEMPTION, __POLL_PREEMPTION, __MANUAL_PREEMPTION };
+
+#define KERNEL_STORAGE(T,X) __attribute((aligned(__alignof__(T)))) static char storage_##X[sizeof(T)]
+
 #ifdef __cforall
 extern "C" {
-      extern "Cforall" {
+	extern "Cforall" {
 		extern __attribute__((aligned(128))) thread_local struct KernelThreadData {
 			struct $thread    * volatile this_thread;
@@ -45,37 +51,74 @@
 			#endif
 		} kernelTLS __attribute__ ((tls_model ( "initial-exec" )));
+
+		static inline uint64_t __tls_rand() {
+			#if defined(__SIZEOF_INT128__)
+				return __lehmer64( kernelTLS.rand_seed );
+			#else
+				return __xorshift64( kernelTLS.rand_seed );
+			#endif
+		}
 	}
 
-      #ifdef __ARM_ARCH
-            // function prototypes are only really used by these macros on ARM
-            void disable_global_interrupts();
-            void enable_global_interrupts();
+	#ifdef __ARM_ARCH
+		// function prototypes are only really used by these macros on ARM
+		void disable_global_interrupts();
+		void enable_global_interrupts();
 
-            #define TL_GET( member ) ( { __typeof__( kernelTLS.member ) target; \
-                  disable_global_interrupts(); \
-                  target = kernelTLS.member; \
-                  enable_global_interrupts(); \
-                  target; } )
-            #define TL_SET( member, value ) disable_global_interrupts(); \
-                  kernelTLS.member = value; \
-                  enable_global_interrupts();
-      #else
-            #define TL_GET( member ) kernelTLS.member
-            #define TL_SET( member, value ) kernelTLS.member = value;
-      #endif
+		#define TL_GET( member ) ( { __typeof__( kernelTLS.member ) target; \
+			disable_global_interrupts(); \
+			target = kernelTLS.member; \
+			enable_global_interrupts(); \
+			target; } )
+		#define TL_SET( member, value ) disable_global_interrupts(); \
+			kernelTLS.member = value; \
+			enable_global_interrupts();
+	#else
+		#define TL_GET( member ) kernelTLS.member
+		#define TL_SET( member, value ) kernelTLS.member = value;
+	#endif
 
-      extern void disable_interrupts();
-      extern void enable_interrupts_noPoll();
+	extern void disable_interrupts();
+	extern void enable_interrupts_noPoll();
 	extern void enable_interrupts( __cfaabi_dbg_ctx_param );
 
-	enum __Preemption_Reason { __NO_PREEMPTION, __ALARM_PREEMPTION, __POLL_PREEMPTION, __MANUAL_PREEMPTION };
+	extern "Cforall" {
+		extern void park( __cfaabi_dbg_ctx_param );
+		extern void unpark( struct $thread * this __cfaabi_dbg_ctx_param2 );
+		static inline struct $thread * active_thread () { return TL_GET( this_thread ); }
 
-      extern "Cforall" {
-            extern void park( __cfaabi_dbg_ctx_param );
-            extern void unpark( struct $thread * this __cfaabi_dbg_ctx_param2 );
-            static inline struct $thread * active_thread () { return TL_GET( this_thread ); }
+		extern bool force_yield( enum __Preemption_Reason );
 
-            extern bool force_yield( enum __Preemption_Reason );
-      }
+		static inline void yield() {
+			force_yield(__MANUAL_PREEMPTION);
+		}
+
+		// Yield: yield N times
+		static inline void yield( unsigned times ) {
+			for( times ) {
+				yield();
+			}
+		}
+
+		//-----------------------------------------------------------------------
+		// Statics call at the end of each thread to register statistics
+		#if !defined(__CFA_NO_STATISTICS__)
+			static inline struct __stats_t * __tls_stats() {
+				/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+				/* paranoid */ verify( kernelTLS.this_stats );
+				return kernelTLS.this_stats;
+			}
+
+			#define __STATS__(in_kernel, ...) { \
+				if( !(in_kernel) ) disable_interrupts(); \
+				with( *__tls_stats() ) { \
+					__VA_ARGS__ \
+				} \
+				if( !(in_kernel) ) enable_interrupts( __cfaabi_dbg_ctx ); \
+			}
+		#else
+			#define __STATS__(in_kernel, ...)
+		#endif
+	}
 }
 #endif
Index: libcfa/src/concurrency/kernel/startup.cfa
===================================================================
--- libcfa/src/concurrency/kernel/startup.cfa	(revision e660761701e94bf2b2ae0130750e911d4122366f)
+++ libcfa/src/concurrency/kernel/startup.cfa	(revision 3e2b9c93dff631bddb430d876a83bcc3bafe9d16)
@@ -91,6 +91,4 @@
 //-----------------------------------------------------------------------------
 // Kernel storage
-#warning duplicated in preemption.cfa
-#define KERNEL_STORAGE(T,X) __attribute((aligned(__alignof__(T)))) static char storage_##X[sizeof(T)]
 KERNEL_STORAGE(cluster,	             mainCluster);
 KERNEL_STORAGE(processor,            mainProcessor);
