Index: libcfa/src/concurrency/exception.cfa
===================================================================
--- libcfa/src/concurrency/exception.cfa	(revision 912cc7d77cdec9d2b823fd8bbc0df7ec460af004)
+++ libcfa/src/concurrency/exception.cfa	(revision ab8c6a6efe9f4120bf5d5eed0b649cad34d89af3)
@@ -19,4 +19,6 @@
 #include <unwind.h>
 #undef HIDE_EXPORTS
+
+extern void __cfactx_thrd_leave();
 }
 
@@ -52,6 +54,6 @@
 
 STOP_AT_END_FUNCTION(thread_cancelstop,
-	// TODO: Instead pass information to the joiner.
-	abort();
+    __cfactx_thrd_leave();
+    __cabi_abort( "Resumed cancelled thread" );
 )
 
@@ -85,4 +87,6 @@
 		stop_param = (void *)0x22;
 	} else {
+		this_thread->self_cor.cancellation = unwind_exception;
+
 		stop_func = thread_cancelstop;
 		stop_param = this_thread;
Index: libcfa/src/concurrency/monitor.cfa
===================================================================
--- libcfa/src/concurrency/monitor.cfa	(revision 912cc7d77cdec9d2b823fd8bbc0df7ec460af004)
+++ libcfa/src/concurrency/monitor.cfa	(revision ab8c6a6efe9f4120bf5d5eed0b649cad34d89af3)
@@ -306,15 +306,4 @@
 	/* paranoid */ verify( thrd->state == Halted );
 	unpark( new_owner );
-}
-
-// Join a thread
-forall( dtype T | is_thread(T) )
-T & join( T & this ) {
-	$monitor *    m = get_monitor(this);
-	void (*dtor)(T& mutex this) = ^?{};
-	monitor_dtor_guard_t __guard = { &m, (fptr_t)dtor, true };
-	{
-		return this;
-	}
 }
 
Index: libcfa/src/concurrency/thread.cfa
===================================================================
--- libcfa/src/concurrency/thread.cfa	(revision 912cc7d77cdec9d2b823fd8bbc0df7ec460af004)
+++ libcfa/src/concurrency/thread.cfa	(revision ab8c6a6efe9f4120bf5d5eed0b649cad34d89af3)
@@ -19,4 +19,5 @@
 
 #include "kernel_private.hfa"
+#include "exception.hfa"
 
 #define __CFA_INVOKE_PRIVATE__
@@ -58,4 +59,65 @@
 }
 
+FORALL_DATA_INSTANCE(ThreadCancelled, (dtype thread_t), (thread_t))
+
+forall(dtype T)
+void copy(ThreadCancelled(T) * dst, ThreadCancelled(T) * src) {
+	dst->virtual_table = src->virtual_table;
+	dst->the_thread = src->the_thread;
+	dst->the_exception = src->the_exception;
+}
+
+forall(dtype T)
+const char * msg(ThreadCancelled(T) *) {
+	return "ThreadCancelled";
+}
+
+struct __cfaehm_node {
+	struct _Unwind_Exception unwind_exception;
+	struct __cfaehm_node * next;
+	int handler_index;
+};
+
+forall(dtype T)
+static void default_thread_cancel_handler(ThreadCancelled(T) & ) {
+	abort( "Unhandled thread cancellation.\n" );
+}
+
+forall(dtype T | is_thread(T) | IS_EXCEPTION(ThreadCancelled, (T)))
+void ?{}( thread_dtor_guard_t & this,
+		T & thrd, void(*defaultResumptionHandler)(ThreadCancelled(T) &)) {
+	$monitor * m = get_monitor(thrd);
+	void (*dtor)(T& mutex this) = ^?{};
+	bool join = defaultResumptionHandler != (void(*)(ThreadCancelled(T)&))0;
+	(this.mg){&m, (void(*)())dtor, join};
+	{
+		$thread * desc = get_thread(thrd);
+		struct _Unwind_Exception * cancellation = desc->self_cor.cancellation;
+		if ( likely(0p == cancellation) ) {
+			return;
+		} else if ( Cancelled == desc->state ) {
+			return;
+		}
+		desc->state = Cancelled;
+		if (!join) {
+			defaultResumptionHandler = default_thread_cancel_handler;
+		}
+		ThreadCancelled(T) except;
+		// TODO: Remove explitate vtable set once trac#186 is fixed.
+		except.virtual_table = &get_exception_vtable(&except);
+		except.the_thread = &thrd;
+		except.the_exception = (exception_t *)(1 + (__cfaehm_node *)cancellation);
+		throwResume except;
+
+		except.the_exception->virtual_table->free( except.the_exception );
+		free( cancellation );
+		desc->self_cor.cancellation = 0p;
+	}
+}
+
+void ^?{}( thread_dtor_guard_t & this ) {
+	^(this.mg){};
+}
+
 //-----------------------------------------------------------------------------
 // Starting and stopping threads
@@ -93,4 +155,11 @@
 }
 
+//-----------------------------------------------------------------------------
+forall(dtype T | is_thread(T) | IS_RESUMPTION_EXCEPTION(ThreadCancelled, (T)))
+T & join( T & this ) {
+	thread_dtor_guard_t guard = { this, defaultResumptionHandler };
+	return this;
+}
+
 // Local Variables: //
 // mode: c //
Index: libcfa/src/concurrency/thread.hfa
===================================================================
--- libcfa/src/concurrency/thread.hfa	(revision 912cc7d77cdec9d2b823fd8bbc0df7ec460af004)
+++ libcfa/src/concurrency/thread.hfa	(revision ab8c6a6efe9f4120bf5d5eed0b649cad34d89af3)
@@ -22,12 +22,24 @@
 #include "kernel.hfa"
 #include "monitor.hfa"
+#include "exception.hfa"
 
 //-----------------------------------------------------------------------------
 // thread trait
 trait is_thread(dtype T) {
-      void ^?{}(T& mutex this);
-      void main(T& this);
-      $thread* get_thread(T& this);
+	void ^?{}(T& mutex this);
+	void main(T& this);
+	$thread* get_thread(T& this);
 };
+
+FORALL_DATA_EXCEPTION(ThreadCancelled, (dtype thread_t), (thread_t)) (
+	thread_t * the_thread;
+	exception_t * the_exception;
+);
+
+forall(dtype T)
+void copy(ThreadCancelled(T) * dst, ThreadCancelled(T) * src);
+
+forall(dtype T)
+const char * msg(ThreadCancelled(T) *);
 
 // define that satisfies the trait without using the thread keyword
@@ -65,4 +77,12 @@
 static inline void ?{}($thread & this, const char * const name, struct cluster & cl )                   { this{ name, cl, 0p, 65000 }; }
 static inline void ?{}($thread & this, const char * const name, struct cluster & cl, size_t stackSize ) { this{ name, cl, 0p, stackSize }; }
+
+struct thread_dtor_guard_t {
+	monitor_dtor_guard_t mg;
+};
+
+forall( dtype T | is_thread(T) | IS_EXCEPTION(ThreadCancelled, (T)) )
+void ?{}( thread_dtor_guard_t & this, T & thrd, void(*)(ThreadCancelled(T) &) );
+void ^?{}( thread_dtor_guard_t & this );
 
 //-----------------------------------------------------------------------------
@@ -108,5 +128,5 @@
 //----------
 // join
-forall( dtype T | is_thread(T) )
+forall( dtype T | is_thread(T) | IS_RESUMPTION_EXCEPTION(ThreadCancelled, (T)) )
 T & join( T & this );
 
Index: src/Concurrency/Keywords.cc
===================================================================
--- src/Concurrency/Keywords.cc	(revision 912cc7d77cdec9d2b823fd8bbc0df7ec460af004)
+++ src/Concurrency/Keywords.cc	(revision ab8c6a6efe9f4120bf5d5eed0b649cad34d89af3)
@@ -46,4 +46,12 @@
 	}
 
+	// Only detects threads constructed with the keyword thread.
+	inline static bool isThread( DeclarationWithType * decl ) {
+		Type * baseType = decl->get_type()->stripDeclarator();
+		StructInstType * instType = dynamic_cast<StructInstType *>( baseType );
+		if ( nullptr == instType ) { return false; }
+		return instType->baseStruct->is_thread();
+	}
+
 	//=============================================================================================
 	// Pass declarations
@@ -119,5 +127,5 @@
 			"get_thread",
 			"thread keyword requires threads to be in scope, add #include <thread.hfa>\n",
-			"",
+			"ThreadCancelled",
 			true,
 			AggregateDecl::Thread
@@ -290,6 +298,7 @@
 		std::list<DeclarationWithType*> findMutexArgs( FunctionDecl*, bool & first );
 		void validate( DeclarationWithType * );
-		void addDtorStatments( FunctionDecl* func, CompoundStmt *, const std::list<DeclarationWithType * > &);
-		void addStatments( FunctionDecl* func, CompoundStmt *, const std::list<DeclarationWithType * > &);
+		void addDtorStatements( FunctionDecl* func, CompoundStmt *, const std::list<DeclarationWithType * > &);
+		void addStatements( FunctionDecl* func, CompoundStmt *, const std::list<DeclarationWithType * > &);
+		void addThreadDtorStatements( FunctionDecl* func, CompoundStmt * body, const std::list<DeclarationWithType * > & args );
 
 		static void implement( std::list< Declaration * > & translationUnit ) {
@@ -302,4 +311,5 @@
 		StructDecl* guard_decl = nullptr;
 		StructDecl* dtor_guard_decl = nullptr;
+		StructDecl* thread_guard_decl = nullptr;
 
 		static std::unique_ptr< Type > generic_func;
@@ -801,5 +811,5 @@
 		bool first = false;
 		std::list<DeclarationWithType*> mutexArgs = findMutexArgs( decl, first );
-		bool isDtor = CodeGen::isDestructor( decl->name );
+		bool const isDtor = CodeGen::isDestructor( decl->name );
 
 		// Is this function relevant to monitors
@@ -849,9 +859,15 @@
 
 		// Instrument the body
-		if( isDtor ) {
-			addDtorStatments( decl, body, mutexArgs );
+		if ( isDtor && isThread( mutexArgs.front() ) ) {
+			if( !thread_guard_decl ) {
+				SemanticError( decl, "thread destructor requires threads to be in scope, add #include <thread.hfa>\n" );
+			}
+			addThreadDtorStatements( decl, body, mutexArgs );
+		}
+		else if ( isDtor ) {
+			addDtorStatements( decl, body, mutexArgs );
 		}
 		else {
-			addStatments( decl, body, mutexArgs );
+			addStatements( decl, body, mutexArgs );
 		}
 	}
@@ -870,4 +886,8 @@
 			assert( !dtor_guard_decl );
 			dtor_guard_decl = decl;
+		}
+		else if( decl->name == "thread_dtor_guard_t" && decl->body ) {
+			assert( !thread_guard_decl );
+			thread_guard_decl = decl;
 		}
 	}
@@ -908,5 +928,5 @@
 	}
 
-	void MutexKeyword::addDtorStatments( FunctionDecl* func, CompoundStmt * body, const std::list<DeclarationWithType * > & args ) {
+	void MutexKeyword::addDtorStatements( FunctionDecl* func, CompoundStmt * body, const std::list<DeclarationWithType * > & args ) {
 		Type * arg_type = args.front()->get_type()->clone();
 		arg_type->set_mutex( false );
@@ -957,8 +977,44 @@
 
 		//$monitor * __monitors[] = { get_monitor(a), get_monitor(b) };
-		body->push_front( new DeclStmt( monitors) );
-	}
-
-	void MutexKeyword::addStatments( FunctionDecl* func, CompoundStmt * body, const std::list<DeclarationWithType * > & args ) {
+		body->push_front( new DeclStmt( monitors ) );
+	}
+
+	void MutexKeyword::addThreadDtorStatements(
+			FunctionDecl*, CompoundStmt * body,
+			const std::list<DeclarationWithType * > & args ) {
+		assert( args.size() == 1 );
+		DeclarationWithType * arg = args.front();
+		Type * arg_type = arg->get_type()->clone();
+		assert( arg_type->get_mutex() );
+		arg_type->set_mutex( false );
+
+		// thread_dtor_guard_t __guard = { this, intptr( 0 ) };
+		body->push_front(
+			new DeclStmt( new ObjectDecl(
+				"__guard",
+				noStorageClasses,
+				LinkageSpec::Cforall,
+				nullptr,
+				new StructInstType(
+					noQualifiers,
+					thread_guard_decl
+				),
+				new ListInit(
+					{
+						new SingleInit( new CastExpr( new VariableExpr( arg ), arg_type ) ),
+						new SingleInit( new UntypedExpr(
+							new NameExpr( "intptr" ), {
+								new ConstantExpr( Constant::from_int( 0 ) ),
+							}
+						) ),
+					},
+					noDesignators,
+					true
+				)
+			))
+		);
+	}
+
+	void MutexKeyword::addStatements( FunctionDecl* func, CompoundStmt * body, const std::list<DeclarationWithType * > & args ) {
 		ObjectDecl * monitors = new ObjectDecl(
 			"__monitors",
Index: src/GenPoly/Specialize.cc
===================================================================
--- src/GenPoly/Specialize.cc	(revision 912cc7d77cdec9d2b823fd8bbc0df7ec460af004)
+++ src/GenPoly/Specialize.cc	(revision ab8c6a6efe9f4120bf5d5eed0b649cad34d89af3)
@@ -321,7 +321,21 @@
 	}
 
+	// Fold it into Specialize if we find a good way.
+	struct StaticThunks final : public WithShortCircuiting {
+		void previsit( Declaration * ) {
+			visit_children = false;
+		}
+		void postvisit( FunctionDecl * decl ) {
+			if ( isPrefix( decl->name, "_thunk" ) ) {
+				decl->storageClasses.is_static = true;
+			}
+		}
+	};
+
 	void convertSpecializations( std::list< Declaration* >& translationUnit ) {
 		PassVisitor<Specialize> spec;
 		mutateAll( translationUnit, spec );
+		PassVisitor<StaticThunks> staticThunks;
+		acceptAll( translationUnit, staticThunks );
 	}
 } // namespace GenPoly
Index: tests/exceptions/cancel/.expect/thread.txt
===================================================================
--- tests/exceptions/cancel/.expect/thread.txt	(revision ab8c6a6efe9f4120bf5d5eed0b649cad34d89af3)
+++ tests/exceptions/cancel/.expect/thread.txt	(revision ab8c6a6efe9f4120bf5d5eed0b649cad34d89af3)
@@ -0,0 +1,2 @@
+0112345
+0112345
Index: tests/exceptions/cancel/thread.cfa
===================================================================
--- tests/exceptions/cancel/thread.cfa	(revision ab8c6a6efe9f4120bf5d5eed0b649cad34d89af3)
+++ tests/exceptions/cancel/thread.cfa	(revision ab8c6a6efe9f4120bf5d5eed0b649cad34d89af3)
@@ -0,0 +1,56 @@
+// Try cancelling a thread.
+
+#include <thread.hfa>
+#include <exception.hfa>
+
+TRIVIAL_EXCEPTION(internal_error);
+
+thread WillCancel {};
+
+const char * msg(ThreadCancelled(WillCancel) * this) {
+	return "ThreadCancelled(WillCancel)";
+}
+
+void main(WillCancel &) {
+	printf("1");
+	cancel_stack((internal_error){});
+	printf("!");
+}
+
+void explicit() {
+	try {
+		printf("0");
+		WillCancel cancel;
+		printf("1");
+		join(cancel);
+		printf("4");
+	} catchResume (ThreadCancelled(WillCancel) * error) {
+		printf("2");
+		if ((virtual internal_error *)error->the_exception) {
+			printf("3");
+		}
+	}
+	printf("5\n");
+}
+
+void implicit() {
+	try {
+		{
+			printf("0");
+			WillCancel cancel;
+			printf("1");
+		}
+		printf("4");
+	} catchResume (ThreadCancelled(WillCancel) * error) {
+		printf("2");
+		if ((virtual internal_error *)error->the_exception) {
+			printf("3");
+		}
+	}
+	printf("5\n");
+}
+
+int main(int argc, char * argv[]) {
+	explicit();
+	implicit();
+}
