Index: libcfa/src/concurrency/kernel.cfa
===================================================================
--- libcfa/src/concurrency/kernel.cfa	(revision 8b58baeb5312bd49746f84b760dfbdcb2c8d577e)
+++ libcfa/src/concurrency/kernel.cfa	(revision c34ebf2e1c30febd459338ca6bff481ddda03a67)
@@ -240,4 +240,9 @@
 	runner.proc = &this;
 
+	#if !defined(__CFA_NO_STATISTICS__)
+		print_stats = false;
+		print_halts = false;
+	#endif
+
 	idle{};
 
@@ -293,5 +298,5 @@
 	#if !defined(__CFA_NO_STATISTICS__)
 		if(this.print_stats) {
-			__print_stats( this.stats );
+			__print_stats( this.stats, true, this.name, (void*)&this );
 		}
 		free( this.stats );
@@ -563,4 +568,7 @@
 	#if !defined(__CFA_NO_STATISTICS__)
 		__tally_stats(proc->cltr->stats, &local_stats);
+		if(proc->print_stats) {
+			__print_stats( &local_stats, true, proc->name, (void*)proc );
+		}
 	#endif
 
@@ -853,4 +861,9 @@
 		id = -1u;
 
+		#if !defined(__CFA_NO_STATISTICS__)
+			print_stats = false;
+			print_halts = false;
+		#endif
+
 		runner{ &this };
 		__cfadbg_print_safe(runtime_core, "Kernel : constructed main processor context %p\n", &runner);
@@ -1004,5 +1017,17 @@
 	}
 
+	#if !defined(__CFA_NO_STATISTICS__)
+		if(this->print_halts) {
+			printf("%d - %lld 0\n", this->id, rdtscl());
+		}
+	#endif
+
 	wait( idle );
+
+	#if !defined(__CFA_NO_STATISTICS__)
+		if(this->print_halts) {
+			printf("%d - %lld 1\n", this->id, rdtscl());
+		}
+	#endif
 }
 
@@ -1190,4 +1215,13 @@
 	return true;
 }
+
+//-----------------------------------------------------------------------------
+// Statistics
+#if !defined(__CFA_NO_STATISTICS__)
+	void print_halts( processor & this ) {
+		this.print_halts = true;
+		printf("Processor : %d - %s (%p)\n", this.id, this.name, (void*)&this);
+	}
+#endif
 // Local Variables: //
 // mode: c //
Index: libcfa/src/concurrency/kernel.hfa
===================================================================
--- libcfa/src/concurrency/kernel.hfa	(revision 8b58baeb5312bd49746f84b760dfbdcb2c8d577e)
+++ libcfa/src/concurrency/kernel.hfa	(revision c34ebf2e1c30febd459338ca6bff481ddda03a67)
@@ -104,4 +104,9 @@
 	// Link lists fields
 	Link(processor) link;
+
+	#if !defined(__CFA_NO_STATISTICS__)
+		bool print_stats;
+		bool print_halts;
+	#endif
 
 #ifdef __CFA_DEBUG__
@@ -227,4 +232,10 @@
 		this.print_stats = true;
 	}
+
+	static inline void print_stats_at_exit( processor & this ) {
+		this.print_stats = true;
+	}
+
+	void print_halts( processor & this );
 #endif
 
Index: libcfa/src/concurrency/stats.cfa
===================================================================
--- libcfa/src/concurrency/stats.cfa	(revision 8b58baeb5312bd49746f84b760dfbdcb2c8d577e)
+++ libcfa/src/concurrency/stats.cfa	(revision c34ebf2e1c30febd459338ca6bff481ddda03a67)
@@ -73,5 +73,5 @@
 	}
 
-	void __print_stats( struct __stats_t * stats ) with( *stats ) {
+	void __print_stats( struct __stats_t * stats, bool cluster, const char * name, void * id ) with( *stats ) {
 
 		double push_sur = (100.0 * ((double)ready.pick.push.success) / ready.pick.push.attempt);
@@ -108,5 +108,5 @@
 
 		__cfaabi_bits_print_safe( STDOUT_FILENO,
-			"----- Ready Q Stats -----\n"
+			"----- %s \"%s\" (%p) - Ready Q Stats -----\n"
 			"- total threads run      : %'15lu\n"
 			"- total threads scheduled: %'15lu\n"
@@ -121,7 +121,18 @@
 			"-- schedule wake         : %'15lu\n"
 			"-- wake on exit          : %'15lu\n"
-			#if defined(HAVE_LINUX_IO_URING_H)
-				"\n"
-				"----- I/O Stats -----\n"
+			"\n"
+			, cluster ? "Cluster" : "Processor",  name, id
+			, ready.pick.pop.success
+			, ready.pick.push.success
+			, push_len, push_sur, ready.pick.push.attempt
+			, pop_len , pop_sur , ready.pick.pop .attempt
+			, lpush_len, lpush_sur, ready.pick.push.local
+			, lpop_len , lpop_sur , ready.pick.pop .local
+			, ready.threads.migration
+			, ready.sleep.halts, ready.sleep.cancels, ready.sleep.wakes, ready.sleep.exits
+		);
+		#if defined(HAVE_LINUX_IO_URING_H)
+			__cfaabi_bits_print_safe( STDOUT_FILENO,
+				"----- %s \"%s\" (%p) - I/O Stats -----\n"
 				"- total submit calls     : %'15lu\n"
 				"- avg ready entries      : %'18.2lf\n"
@@ -136,29 +147,17 @@
 				"- total wait calls       : %'15lu   (%'lu slow, %'lu fast)\n"
 				"- avg completion/wait    : %'18.2lf\n"
-			#endif
-			, ready.pick.pop.success
-			, ready.pick.push.success
-			, push_len, push_sur, ready.pick.push.attempt
-			, pop_len , pop_sur , ready.pick.pop .attempt
-			, lpush_len, lpush_sur, ready.pick.push.local
-			, lpop_len , lpop_sur , ready.pick.pop .local
-			, ready.threads.migration
-			, ready.sleep.halts, ready.sleep.cancels, ready.sleep.wakes, ready.sleep.exits
-			#if defined(HAVE_LINUX_IO_URING_H)
+				"\n"
+				, cluster ? "Cluster" : "Processor",  name, id
 				, io.submit_q.submit_avg.cnt
-				, avgrdy
-				, avgcsm
-				, avgavl
+				, avgrdy, avgcsm, avgavl
 				, io.submit_q.look_avg.cnt
-				, lavgv
-				, lavgb
+				, lavgv, lavgb
 				, io.submit_q.alloc_avg.cnt
-				, aavgv
-				, aavgb
+				, aavgv, aavgb
 				, io.complete_q.completed_avg.slow_cnt + io.complete_q.completed_avg.fast_cnt
 				, io.complete_q.completed_avg.slow_cnt,  io.complete_q.completed_avg.fast_cnt
 				, ((double)io.complete_q.completed_avg.val) / (io.complete_q.completed_avg.slow_cnt + io.complete_q.completed_avg.fast_cnt)
-			#endif
-		);
+			);
+		#endif
 	}
 #endif
Index: libcfa/src/concurrency/stats.hfa
===================================================================
--- libcfa/src/concurrency/stats.hfa	(revision 8b58baeb5312bd49746f84b760dfbdcb2c8d577e)
+++ libcfa/src/concurrency/stats.hfa	(revision c34ebf2e1c30febd459338ca6bff481ddda03a67)
@@ -7,5 +7,5 @@
 	static inline void __init_stats( struct __stats_t * ) {}
 	static inline void __tally_stats( struct __stats_t *, struct __stats_t * ) {}
-	static inline void __print_stats( struct __stats_t * ) {}
+	static inline void __print_stats( struct __stats_t *, bool, const char *, void * ) {}
 #else
 	struct __attribute__((aligned(64))) __stats_readQ_t {
@@ -96,5 +96,5 @@
 	void __init_stats ( struct __stats_t * );
 	void __tally_stats( struct __stats_t *, struct __stats_t * );
-	void __print_stats( struct __stats_t * );
+	void __print_stats( struct __stats_t *, bool, const char *, void * );
 #endif
 
