Index: libcfa/src/concurrency/kernel.cfa
===================================================================
--- libcfa/src/concurrency/kernel.cfa	(revision 61d7bec2829dd5abf116ff6e9af14da1d34d58be)
+++ libcfa/src/concurrency/kernel.cfa	(revision b388ee817181b3a403189e26e13ce3bdd381cf21)
@@ -125,12 +125,14 @@
 //-----------------------------------------------------------------------------
 // Kernel storage
-KERNEL_STORAGE(cluster,		mainCluster);
-KERNEL_STORAGE(processor,	mainProcessor);
-KERNEL_STORAGE($thread,	mainThread);
-KERNEL_STORAGE(__stack_t, 	mainThreadCtx);
-
-cluster     * mainCluster;
-processor   * mainProcessor;
-$thread * mainThread;
+KERNEL_STORAGE(cluster,	             mainCluster);
+KERNEL_STORAGE(processor,            mainProcessor);
+KERNEL_STORAGE($thread,	             mainThread);
+KERNEL_STORAGE(__stack_t,            mainThreadCtx);
+KERNEL_STORAGE(__scheduler_RWLock_t, __scheduler_lock);
+
+cluster              * mainCluster;
+processor            * mainProcessor;
+$thread              * mainThread;
+__scheduler_RWLock_t * __scheduler_lock;
 
 extern "C" {
@@ -262,5 +264,4 @@
 	this.preemption_rate = preemption_rate;
 	ready_queue{};
-	ready_lock{};
 
 	#if !defined(__CFA_NO_STATISTICS__)
@@ -299,5 +300,5 @@
 	// register the processor unless it's the main thread which is handled in the boot sequence
 	if(this != mainProcessor) {
-		this->id = doregister2(this->cltr, this);
+		this->id = doregister(this);
 		ready_queue_grow( this->cltr );
 	}
@@ -345,5 +346,5 @@
 	if(this != mainProcessor) {
 		ready_queue_shrink( this->cltr );
-		unregister2(this->cltr, this);
+		unregister(this);
 	}
 	else {
@@ -622,9 +623,9 @@
 	if (thrd->preempted == __NO_PREEMPTION) thrd->state = Ready;
 
-	ready_schedule_lock(thrd->curr_cluster, kernelTLS.this_processor);
+	ready_schedule_lock( kernelTLS.this_processor );
 		push( thrd->curr_cluster, thrd );
 
 		__wake_one(thrd->curr_cluster);
-	ready_schedule_unlock(thrd->curr_cluster, kernelTLS.this_processor);
+	ready_schedule_unlock( kernelTLS.this_processor );
 
 	/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
@@ -635,7 +636,7 @@
 	/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
 
-	ready_schedule_lock(this, kernelTLS.this_processor);
+	ready_schedule_lock( kernelTLS.this_processor );
 		$thread * head = pop( this );
-	ready_schedule_unlock(this, kernelTLS.this_processor);
+	ready_schedule_unlock( kernelTLS.this_processor );
 
 	/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
@@ -749,4 +750,8 @@
 	__cfa_dbg_global_clusters.lock{};
 
+	// Initialize the global scheduler lock
+	__scheduler_lock = (__scheduler_RWLock_t*)&storage___scheduler_lock;
+	(*__scheduler_lock){};
+
 	// Initialize the main cluster
 	mainCluster = (cluster *)&storage_mainCluster;
@@ -793,5 +798,5 @@
 	(*mainProcessor){};
 
-	mainProcessor->id = doregister2(mainCluster, mainProcessor);
+	mainProcessor->id = doregister(mainProcessor);
 
 	//initialize the global state variables
@@ -848,5 +853,5 @@
 	kernel_stop_preemption();
 
-	unregister2(mainCluster, mainProcessor);
+	unregister(mainProcessor);
 
 	// Destroy the main processor and its context in reverse order of construction
@@ -866,4 +871,6 @@
 
 	^(*mainCluster){};
+
+	^(*__scheduler_lock){};
 
 	^(__cfa_dbg_global_clusters.list){};
Index: libcfa/src/concurrency/kernel.hfa
===================================================================
--- libcfa/src/concurrency/kernel.hfa	(revision 61d7bec2829dd5abf116ff6e9af14da1d34d58be)
+++ libcfa/src/concurrency/kernel.hfa	(revision b388ee817181b3a403189e26e13ce3bdd381cf21)
@@ -125,33 +125,4 @@
 //-----------------------------------------------------------------------------
 // Cluster Tools
-
-// Cells use by the reader writer lock
-// while not generic it only relies on a opaque pointer
-struct __processor_id;
-
-// Reader-Writer lock protecting the ready-queue
-// while this lock is mostly generic some aspects
-// have been hard-coded to for the ready-queue for
-// simplicity and performance
-struct __clusterRWLock_t {
-	// total cachelines allocated
-	unsigned int max;
-
-	// cachelines currently in use
-	volatile unsigned int alloc;
-
-	// cachelines ready to itereate over
-	// (!= to alloc when thread is in second half of doregister)
-	volatile unsigned int ready;
-
-	// writer lock
-	volatile bool lock;
-
-	// data pointer
-	__processor_id * data;
-};
-
-void  ?{}(__clusterRWLock_t & this);
-void ^?{}(__clusterRWLock_t & this);
 
 // Intrusives lanes which are used by the relaxed ready queue
@@ -236,7 +207,4 @@
 // Cluster
 struct cluster {
-	// Ready queue locks
-	__clusterRWLock_t ready_lock;
-
 	// Ready queue for threads
 	__ready_queue_t ready_queue;
Index: libcfa/src/concurrency/kernel_private.hfa
===================================================================
--- libcfa/src/concurrency/kernel_private.hfa	(revision 61d7bec2829dd5abf116ff6e9af14da1d34d58be)
+++ libcfa/src/concurrency/kernel_private.hfa	(revision b388ee817181b3a403189e26e13ce3bdd381cf21)
@@ -106,4 +106,6 @@
 // Cluster lock API
 //=======================================================================
+// Cells use by the reader writer lock
+// while not generic it only relies on a opaque pointer
 struct __attribute__((aligned(64))) __processor_id {
 	processor * volatile handle;
@@ -113,8 +115,8 @@
 // Lock-Free registering/unregistering of threads
 // Register a processor to a given cluster and get its unique id in return
-unsigned doregister2( struct cluster * cltr, struct processor * proc );
+unsigned doregister( struct processor * proc );
 
 // Unregister a processor from a given cluster using its id, getting back the original pointer
-void     unregister2( struct cluster * cltr, struct processor * proc );
+void     unregister( struct processor * proc );
 
 //=======================================================================
@@ -146,7 +148,35 @@
 
 //-----------------------------------------------------------------------
+// Reader-Writer lock protecting the ready-queues
+// while this lock is mostly generic some aspects
+// have been hard-coded to for the ready-queue for
+// simplicity and performance
+struct __scheduler_RWLock_t {
+	// total cachelines allocated
+	unsigned int max;
+
+	// cachelines currently in use
+	volatile unsigned int alloc;
+
+	// cachelines ready to itereate over
+	// (!= to alloc when thread is in second half of doregister)
+	volatile unsigned int ready;
+
+	// writer lock
+	volatile bool lock;
+
+	// data pointer
+	__processor_id * data;
+};
+
+void  ?{}(__scheduler_RWLock_t & this);
+void ^?{}(__scheduler_RWLock_t & this);
+
+extern __scheduler_RWLock_t * __scheduler_lock;
+
+//-----------------------------------------------------------------------
 // Reader side : acquire when using the ready queue to schedule but not
 //  creating/destroying queues
-static inline void ready_schedule_lock( struct cluster * cltr, struct processor * proc) with(cltr->ready_lock) {
+static inline void ready_schedule_lock( struct processor * proc) with(*__scheduler_lock) {
 	unsigned iproc = proc->id;
 	/*paranoid*/ verify(data[iproc].handle == proc);
@@ -167,5 +197,5 @@
 }
 
-static inline void ready_schedule_unlock( struct cluster * cltr, struct processor * proc) with(cltr->ready_lock) {
+static inline void ready_schedule_unlock( struct processor * proc) with(*__scheduler_lock) {
 	unsigned iproc = proc->id;
 	/*paranoid*/ verify(data[iproc].handle == proc);
@@ -178,7 +208,7 @@
 // Writer side : acquire when changing the ready queue, e.g. adding more
 //  queues or removing them.
-uint_fast32_t ready_mutate_lock( struct cluster & cltr );
-
-void ready_mutate_unlock( struct cluster & cltr, uint_fast32_t /* value returned by lock */ );
+uint_fast32_t ready_mutate_lock( void );
+
+void ready_mutate_unlock( uint_fast32_t /* value returned by lock */ );
 
 //=======================================================================
Index: libcfa/src/concurrency/ready_queue.cfa
===================================================================
--- libcfa/src/concurrency/ready_queue.cfa	(revision 61d7bec2829dd5abf116ff6e9af14da1d34d58be)
+++ libcfa/src/concurrency/ready_queue.cfa	(revision b388ee817181b3a403189e26e13ce3bdd381cf21)
@@ -29,5 +29,5 @@
 // fall back to a magic number
 #ifndef __CFA_MAX_PROCESSORS__
-	#define __CFA_MAX_PROCESSORS__ 128
+	#define __CFA_MAX_PROCESSORS__ 1024
 #endif
 
@@ -57,5 +57,5 @@
 // Cluster wide reader-writer lock
 //=======================================================================
-void  ?{}(__clusterRWLock_t & this) {
+void  ?{}(__scheduler_RWLock_t & this) {
 	this.max   = __max_processors();
 	this.alloc = 0;
@@ -70,5 +70,5 @@
 
 }
-void ^?{}(__clusterRWLock_t & this) {
+void ^?{}(__scheduler_RWLock_t & this) {
 	free(this.data);
 }
@@ -81,6 +81,6 @@
 //=======================================================================
 // Lock-Free registering/unregistering of threads
-unsigned doregister2( struct cluster * cltr, struct processor * proc ) with(cltr->ready_lock) {
-	__cfadbg_print_safe(ready_queue, "Kernel : Registering proc %p with cluster %p\n", proc, cltr);
+unsigned doregister( struct processor * proc ) with(*__scheduler_lock) {
+	__cfadbg_print_safe(ready_queue, "Kernel : Registering proc %p for RW-Lock\n", proc);
 
 	// Step - 1 : check if there is already space in the data
@@ -99,9 +99,9 @@
 	}
 
-	if(max <= alloc) abort("Trying to create more than %ud processors", cltr->ready_lock.max);
+	if(max <= alloc) abort("Trying to create more than %ud processors", __scheduler_lock->max);
 
 	// Step - 2 : F&A to get a new spot in the array.
 	uint_fast32_t n = __atomic_fetch_add(&alloc, 1, __ATOMIC_SEQ_CST);
-	if(max <= n) abort("Trying to create more than %ud processors", cltr->ready_lock.max);
+	if(max <= n) abort("Trying to create more than %ud processors", __scheduler_lock->max);
 
 	// Step - 3 : Mark space as used and then publish it.
@@ -125,5 +125,5 @@
 }
 
-void unregister2( struct cluster * cltr, struct processor * proc ) with(cltr->ready_lock) {
+void unregister( struct processor * proc ) with(*__scheduler_lock) {
 	unsigned id = proc->id;
 	/*paranoid*/ verify(id < ready);
@@ -137,5 +137,5 @@
 // Writer side : acquire when changing the ready queue, e.g. adding more
 //  queues or removing them.
-uint_fast32_t ready_mutate_lock( struct cluster & cltr ) with(cltr.ready_lock) {
+uint_fast32_t ready_mutate_lock( void ) with(*__scheduler_lock) {
 	// Step 1 : lock global lock
 	// It is needed to avoid processors that register mid Critical-Section
@@ -155,5 +155,5 @@
 }
 
-void ready_mutate_unlock( struct cluster & cltr, uint_fast32_t last_s ) with(cltr.ready_lock) {
+void ready_mutate_unlock( uint_fast32_t last_s ) with(*__scheduler_lock) {
 	// Step 1 : release local locks
 	// This must be done while the global lock is held to avoid
@@ -811,5 +811,5 @@
 void ready_queue_grow  (struct cluster * cltr) {
 	// Lock the RWlock so no-one pushes/pops while we are changing the queue
-	uint_fast32_t last_size = ready_mutate_lock( *cltr );
+	uint_fast32_t last_size = ready_mutate_lock();
 
 	__cfadbg_print_safe(ready_queue, "Kernel : Growing ready queue\n");
@@ -858,5 +858,5 @@
 
 	// Unlock the RWlock
-	ready_mutate_unlock( *cltr, last_size );
+	ready_mutate_unlock( last_size );
 }
 
@@ -864,5 +864,5 @@
 void ready_queue_shrink(struct cluster * cltr) {
 	// Lock the RWlock so no-one pushes/pops while we are changing the queue
-	uint_fast32_t last_size = ready_mutate_lock( *cltr );
+	uint_fast32_t last_size = ready_mutate_lock();
 
 	__cfadbg_print_safe(ready_queue, "Kernel : Shrinking ready queue\n");
@@ -956,5 +956,5 @@
 
 	// Unlock the RWlock
-	ready_mutate_unlock( *cltr, last_size );
+	ready_mutate_unlock( last_size );
 }