Index: benchmark/ctxswitch/cfa_cor_then.cfa
===================================================================
--- benchmark/ctxswitch/cfa_cor_then.cfa	(revision 5b11c25668bf48e43c6bf9f0e7c460acb2ca76f5)
+++ benchmark/ctxswitch/cfa_cor_then.cfa	(revision 5b11c25668bf48e43c6bf9f0e7c460acb2ca76f5)
@@ -0,0 +1,32 @@
+#include <stdio.h>
+#include <kernel.hfa>
+#include <thread.hfa>
+
+#include "bench.h"
+
+void noOp(void) {}
+
+coroutine GreatSuspender {};
+
+void ?{}( GreatSuspender & this ) {
+	prime(this);
+}
+
+void main( __attribute__((unused)) GreatSuspender & this ) {
+	while( true ) {
+		suspend_then(noOp);
+	}
+}
+
+int main(int argc, char* argv[]) {
+	GreatSuspender s;
+
+	BENCH(
+		for (size_t i = 0; i < n; i++) {
+			resume( s );
+		},
+		result
+	)
+
+	printf("%llu\n", result);
+}
Index: libcfa/src/concurrency/CtxSwitch-x86_64.S
===================================================================
--- libcfa/src/concurrency/CtxSwitch-x86_64.S	(revision 673cd637ca3377db61b5ca17aa0ec910e12ae819)
+++ libcfa/src/concurrency/CtxSwitch-x86_64.S	(revision 5b11c25668bf48e43c6bf9f0e7c460acb2ca76f5)
@@ -40,4 +40,6 @@
 #define FP_OFFSET	( 1 * PTR_BYTE )
 
+//-----------------------------------------------------------------------------
+// Regular context switch routine which enables switching from one context to anouther
 	.text
 	.align 2
@@ -77,10 +79,66 @@
 	.size  CtxSwitch, .-CtxSwitch
 
-.text
+//-----------------------------------------------------------------------------
+// Part of a 2 part context switch routine, use with CtxRet, stores the current context and then makes a function call
+	.text
 	.align 2
-.globl	CtxInvokeStub
+	.globl CtxStore
+	.type  CtxStore, @function
+CtxStore:
+
+	// Save volatile registers on the stack.
+
+	pushq %r15
+	pushq %r14
+	pushq %r13
+	pushq %r12
+	pushq %rbx
+
+	// Save old context in the "from" area.
+
+	movq %rsp,SP_OFFSET(%rdi)
+	movq %rbp,FP_OFFSET(%rdi)
+
+	// Don't load a new context, directly jump to the desired function
+
+	jmp *%rsi
+	.size  CtxStore, .-CtxStore
+
+//-----------------------------------------------------------------------------
+// Part of a 2 part context switch routine, use with CtxStore, context switches to the desired target without saving the current context
+	.text
+	.align 2
+	.globl CtxRet
+	.type  CtxRet, @function
+CtxRet:
+	// Load new context from the "to" area.
+
+	movq SP_OFFSET(%rdi),%rsp
+	movq FP_OFFSET(%rdi),%rbp
+
+	// Load volatile registers from the stack.
+
+	popq %rbx
+	popq %r12
+	popq %r13
+	popq %r14
+	popq %r15
+
+	// Return to thread.
+
+	ret
+	.size  CtxRet, .-CtxRet
+
+
+//-----------------------------------------------------------------------------
+// Stub used to create new stacks which are ready to be context switched to
+	.text
+	.align 2
+	.globl CtxInvokeStub
+	.type	 CtxInvokeStub, @function
 CtxInvokeStub:
 	movq %rbx, %rdi
 	jmp *%r12
+	.size  CtxInvokeStub, .-CtxInvokeStub
 
 // Local Variables: //
Index: libcfa/src/concurrency/coroutine.hfa
===================================================================
--- libcfa/src/concurrency/coroutine.hfa	(revision 673cd637ca3377db61b5ca17aa0ec910e12ae819)
+++ libcfa/src/concurrency/coroutine.hfa	(revision 5b11c25668bf48e43c6bf9f0e7c460acb2ca76f5)
@@ -68,6 +68,6 @@
 
 	extern void CtxSwitch( struct __stack_context_t * from, struct __stack_context_t * to ) asm ("CtxSwitch");
-	// void CtxStore ( void * this ) asm ("CtxStore");
-	// void CtxRet   ( void * dst  ) asm ("CtxRet");
+	extern void CtxStore ( struct __stack_context_t * from, __attribute__((noreturn)) void (*__callback)(void) ) asm ("CtxStore");
+	extern void CtxRet   ( struct __stack_context_t * to ) asm ("CtxRet") __attribute__ ((__noreturn__));
 }
 
@@ -172,35 +172,52 @@
 }
 
-
-
-// static inline bool suspend_checkpoint(void) {
-// 	// optimization : read TLS once and reuse it
-// 	// Safety note: this is preemption safe since if
-// 	// preemption occurs after this line, the pointer
-// 	// will also migrate which means this value will
-// 	// stay in syn with the TLS
-// 	// set state of current coroutine to inactive
-//       this->state = Checkpoint;
-
-//       // context switch to specified coroutine
-//       assert( src->stack.context );
-
-//       CtxStore(src->stack.context);
-
-// 	bool ret = this->state == Checkpoint;
-
-//       // set state of new coroutine to active
-//       src->state = Active;
-
-//       enable_interrupts( __cfaabi_dbg_ctx );
-//       // Safety note : This could cause some false positives due to preemption
-//       verify( TL_GET( preemption_state.enabled ) || TL_GET( this_processor )->do_terminate );
-
-//       if( unlikely(src->cancellation != NULL) ) {
-//             _CtxCoroutine_Unwind(src->cancellation);
-//       }
-
-// 	return ret;
-// }
+static inline void suspend_then(fptr_t call) {
+	// optimization : read TLS once and reuse it
+	// Safety note: this is preemption safe since if
+	// preemption occurs after this line, the pointer
+	// will also migrate which means this value will
+	// stay in syn with the TLS
+	coroutine_desc * src = TL_GET( this_thread )->curr_cor;
+
+	assertf( src->last != 0,
+		"Attempt to suspend coroutine \"%.256s\" (%p) that has never been resumed.\n"
+		"Possible cause is a suspend executed in a member called by a coroutine user rather than by the coroutine main.",
+		src->name, src );
+	assertf( src->last->state != Halted,
+		"Attempt by coroutine \"%.256s\" (%p) to suspend back to terminated coroutine \"%.256s\" (%p).\n"
+		"Possible cause is terminated coroutine's main routine has already returned.",
+		src->name, src, src->last->name, src->last );
+
+	src->state = PreInactive;
+
+      // context switch to specified coroutine
+      assert( src->context.SP );
+
+	__attribute__((noreturn)) void __suspend_callback(void) {
+		call();
+
+		// set state of current coroutine to inactive
+		src->state = src->state == Halted ? Halted : Inactive;
+
+		TL_GET( this_thread )->curr_cor = src->last;
+
+		// context switch to specified coroutine
+		assert( src->last->context.SP );
+		CtxRet( &src->last->context );
+
+		abort();
+	}
+      CtxStore( &src->context, __suspend_callback );
+	// when CtxStore returns we are back in the src coroutine
+
+	// set state of new coroutine to active
+	src->state = Active;
+
+	if( unlikely(src->cancellation != NULL) ) {
+		_CtxCoroutine_Unwind(src->cancellation, src);
+	}
+
+	return;
+}
 
 // static inline void suspend_return(void) {
Index: libcfa/src/concurrency/invoke.h
===================================================================
--- libcfa/src/concurrency/invoke.h	(revision 673cd637ca3377db61b5ca17aa0ec910e12ae819)
+++ libcfa/src/concurrency/invoke.h	(revision 5b11c25668bf48e43c6bf9f0e7c460acb2ca76f5)
@@ -93,5 +93,5 @@
 	};
 
-	enum coroutine_state { Halted, Start, Inactive, Active, Primed };
+	enum coroutine_state { Halted, Start, Inactive, Active, Primed, PreInactive };
 
 	struct coroutine_desc {
Index: tests/concurrent/coroutineThen.cfa
===================================================================
--- tests/concurrent/coroutineThen.cfa	(revision 5b11c25668bf48e43c6bf9f0e7c460acb2ca76f5)
+++ tests/concurrent/coroutineThen.cfa	(revision 5b11c25668bf48e43c6bf9f0e7c460acb2ca76f5)
@@ -0,0 +1,94 @@
+#include <fstream.hfa>
+#include <kernel.hfa>
+#include <stdlib.hfa>
+#include <thread.hfa>
+#include <monitor.hfa>
+#include <time.hfa>
+
+#define __kick_rate 150000ul
+#include "long_tests.hfa"
+
+#ifndef PREEMPTION_RATE
+#define PREEMPTION_RATE 10`ms
+#endif
+
+Duration default_preemption() {
+	return PREEMPTION_RATE;
+}
+
+#ifdef TEST_LONG
+static const unsigned long N = 600_000ul;
+#else
+static const unsigned long N = 1_000ul;
+#endif
+
+monitor Printer {};
+#if !defined(TEST_FOREVER)
+	static inline void print(Printer & mutex this, const char * const text ) {
+		sout | text;
+	}
+#else
+	static inline void print(Printer & this, const char * const text ) {}
+#endif
+Printer printer;
+
+coroutine Coroutine {};
+
+volatile bool done = false;
+Coroutine * volatile the_cor = 0p;
+
+void store(Coroutine * volatile * target, Coroutine * value) {
+	long long int val = value;
+	volatile long long int * ptr = target;
+	__atomic_store(ptr, &val, __ATOMIC_SEQ_CST);
+}
+
+Coroutine * exchange(Coroutine * volatile * target) {
+	long long int ret;
+	volatile long long int * ptr = target;
+	long long int val = 0;
+	ret = __atomic_exchange_n(ptr, &val, __ATOMIC_SEQ_CST);
+	assert(ret == 0 || *ptr == 0);
+	return (Coroutine *)ret;
+}
+
+void main(Coroutine& this) {
+	for(int i = 0; TEST(i < N); i++) {
+
+		print(printer, "Coroutine 1");
+		void publish() {
+			assert(!the_cor);
+			store( &the_cor, &this );
+		}
+		suspend_then(publish);
+		print(printer, "Coroutine 2");
+		KICK_WATCHDOG;
+		yield();
+	}
+	done = true;
+}
+
+thread Thread {};
+void main(Thread & this) {
+	Coroutine * mine = 0p;
+	while(!done) {
+		yield();
+
+		mine = exchange( &the_cor );
+		if(!mine) continue;
+
+		print(printer, "Thread 1");
+		resume(*mine);
+		print(printer, "Thread 2");
+	}
+}
+
+
+int main(int argc, char* argv[]) {
+	processor p[2];
+	Coroutine c;
+	the_cor = &c;
+	{
+		Thread t[2];
+	}
+}
Index: tests/coroutine/.expect/suspend_then.txt
===================================================================
--- tests/coroutine/.expect/suspend_then.txt	(revision 5b11c25668bf48e43c6bf9f0e7c460acb2ca76f5)
+++ tests/coroutine/.expect/suspend_then.txt	(revision 5b11c25668bf48e43c6bf9f0e7c460acb2ca76f5)
@@ -0,0 +1,30 @@
+Then!
+Then!
+0 0
+Then!
+Then!
+1 1
+Then!
+Then!
+1 1
+Then!
+Then!
+2 2
+Then!
+Then!
+3 3
+Then!
+Then!
+5 5
+Then!
+Then!
+8 8
+Then!
+Then!
+13 13
+Then!
+Then!
+21 21
+Then!
+Then!
+34 34
Index: tests/coroutine/suspend_then.cfa
===================================================================
--- tests/coroutine/suspend_then.cfa	(revision 5b11c25668bf48e43c6bf9f0e7c460acb2ca76f5)
+++ tests/coroutine/suspend_then.cfa	(revision 5b11c25668bf48e43c6bf9f0e7c460acb2ca76f5)
@@ -0,0 +1,52 @@
+//
+// Cforall Version 1.0.0 Copyright (C) 2019 University of Waterloo
+//
+// The contents of this file are covered under the licence agreement in the
+// file "LICENCE" distributed with Cforall.
+//
+// suspend_then.cfa --
+//
+// Author           : Peter A. Buhr
+// Created On       : Mon Apr 29 12:01:35 2019
+// Last Modified By :
+// Last Modified On :
+// Update Count     :
+//
+
+#include <fstream.hfa>
+#include <coroutine.hfa>
+
+void then() {
+	sout | "Then!";
+}
+
+coroutine Fibonacci { int fn; };						// used for communication
+
+void main( Fibonacci & fib ) with( fib ) {				// called on first resume
+	int fn1, fn2;								// retained between resumes
+	fn = 0;  fn1 = fn;							// 1st case
+	suspend_then(then);							// restart last resume
+	fn = 1;  fn2 = fn1;  fn1 = fn;					// 2nd case
+	suspend_then(then);							// restart last resume
+	for () {
+		fn = fn1 + fn2;  fn2 = fn1;  fn1 = fn;			// general case
+		suspend_then(then);						// restart last resume
+	} // for
+}
+
+int next( Fibonacci & fib ) with( fib ) {
+	resume( fib );								// restart last suspend
+	return fn;
+}
+
+int main() {
+	Fibonacci f1, f2;
+	for ( 10 ) {								// print N Fibonacci values
+		sout | next( f1 ) | next( f2 );
+	} // for
+}
+
+// Local Variables: //
+// tab-width: 4 //
+// compile-command: "cfa runningTotal.cfa" //
+// End: //
