Index: src/libcfa/concurrency/CtxSwitch-x86_64.S
===================================================================
--- src/libcfa/concurrency/CtxSwitch-x86_64.S	(revision dd0b96160826f69b3ebeccd0edfccb59699d00ab)
+++ src/libcfa/concurrency/CtxSwitch-x86_64.S	(revision ffc3b26efde87fa75bb6274c1b7a445bacd98e78)
@@ -49,4 +49,7 @@
 	// Save volatile registers on the stack.
 
+	subq   $8,%rsp
+	stmxcsr 0(%rsp)         // 4 bytes
+	fnstcw  4(%rsp)         // 2 bytes
 	pushq %r15
 	pushq %r14
@@ -72,4 +75,7 @@
 	popq %r14
 	popq %r15
+	fldcw   4(%rsp)
+	ldmxcsr 0(%rsp)
+	addq $8,%rsp
 
 	// Return to thread.
Index: src/libcfa/concurrency/invoke.c
===================================================================
--- src/libcfa/concurrency/invoke.c	(revision dd0b96160826f69b3ebeccd0edfccb59699d00ab)
+++ src/libcfa/concurrency/invoke.c	(revision ffc3b26efde87fa75bb6274c1b7a445bacd98e78)
@@ -109,5 +109,7 @@
       struct FakeStack {
             void *fixedRegisters[5];			// fixed registers rbx, r12, r13, r14, r15
-            void *rturn;					// where to go on return from uSwitch
+            uint32_t mxcr;			            // SSE Status and Control bits (control bits are preserved across function calls)
+            uint16_t fcw;			            // X97 FPU control word (preserved across function calls)
+            void *rturn;				      // where to go on return from uSwitch
             void *dummyReturn;				// NULL return address to provide proper alignment
       };
@@ -120,4 +122,7 @@
       ((struct FakeStack *)(((struct machine_context_t *)stack->context)->SP))->fixedRegisters[0] = this;
       ((struct FakeStack *)(((struct machine_context_t *)stack->context)->SP))->fixedRegisters[1] = invoke;
+      ((struct FakeStack *)(((struct machine_context_t *)stack->context)->SP))->mxcr = 0; //SSE control word must be zeroed out to be valid
+      // ((struct FakeStack *)(((struct machine_context_t *)stack->context)->SP))->fcw = 0x34;
+      // ((struct FakeStack *)(((struct machine_context_t *)stack->context)->SP))->padding = 0x56;
 #else
       #error Only __i386__ and __x86_64__ is supported for threads in cfa
