Index: benchmark/Makefile.am
===================================================================
--- benchmark/Makefile.am	(revision e73d44905c72c8c7253f4760fc3de0c316343b79)
+++ benchmark/Makefile.am	(revision d4a60ac94898e14a0a0d632c2c6b4f020a8f0625)
@@ -22,5 +22,5 @@
 
 AM_CFLAGS = -O2 -Wall -I$(srcdir) -lrt -pthread
-AM_CFAFLAGS = -quiet -in-tree -nodebug
+AM_CFAFLAGS = -quiet -in-tree -nodebug -std=c++14
 AM_UPPFLAGS = -quiet -nodebug -multi
 
@@ -139,4 +139,7 @@
 	$(BENCH_V_CC)$(COMPILE) -DBENCH_N=500000000  $(srcdir)/fetch_add.c
 
+tls-fetch_add$(EXEEXT):
+	$(BENCH_V_CC)$(COMPILE) -DBENCH_N=500000000  $(srcdir)/tls-fetch_add.c
+
 ## =========================================================================================================
 CTXSWITCH_DEPEND  =                 \
@@ -144,4 +147,5 @@
 	function.run			\
 	fetch_add.run			\
+	tls-fetch_add.run			\
 	ctxswitch-pthread.run		\
 	ctxswitch-cfa_coroutine.run	\
Index: benchmark/Makefile.in
===================================================================
--- benchmark/Makefile.in	(revision e73d44905c72c8c7253f4760fc3de0c316343b79)
+++ benchmark/Makefile.in	(revision d4a60ac94898e14a0a0d632c2c6b4f020a8f0625)
@@ -372,5 +372,5 @@
 # applies to both programs
 AM_CFLAGS = -O2 -Wall -I$(srcdir) -lrt -pthread
-AM_CFAFLAGS = -quiet -in-tree -nodebug
+AM_CFAFLAGS = -quiet -in-tree -nodebug -std=c++14
 AM_UPPFLAGS = -quiet -nodebug -multi
 BENCH_V_CC = $(__bench_v_CC_$(__quiet))
@@ -402,9 +402,9 @@
 dummy_SOURCES = dummyC.c dummyCXX.cpp
 CTXSWITCH_DEPEND = loop.run function.run fetch_add.run \
-	ctxswitch-pthread.run ctxswitch-cfa_coroutine.run \
-	ctxswitch-cfa_thread.run ctxswitch-cfa_thread2.run \
-	ctxswitch-upp_coroutine.run ctxswitch-upp_thread.run \
-	ctxswitch-goroutine.run ctxswitch-java_thread.run \
-	$(am__append_1)
+	tls-fetch_add.run ctxswitch-pthread.run \
+	ctxswitch-cfa_coroutine.run ctxswitch-cfa_thread.run \
+	ctxswitch-cfa_thread2.run ctxswitch-upp_coroutine.run \
+	ctxswitch-upp_thread.run ctxswitch-goroutine.run \
+	ctxswitch-java_thread.run $(am__append_1)
 testdir = $(top_srcdir)/tests
 all: all-am
@@ -799,4 +799,7 @@
 	$(BENCH_V_CC)$(COMPILE) -DBENCH_N=500000000  $(srcdir)/fetch_add.c
 
+tls-fetch_add$(EXEEXT):
+	$(BENCH_V_CC)$(COMPILE) -DBENCH_N=500000000  $(srcdir)/tls-fetch_add.c
+
 @WITH_LIBFIBRE_TRUE@ctxswitch-kos_fibre$(EXEEXT):
 @WITH_LIBFIBRE_TRUE@	$(BENCH_V_CXX)$(CXXCOMPILE) -DBENCH_N=50000000 $(srcdir)/ctxswitch/kos_fibre.cpp  -I$(LIBFIBRE_DIR) -lfibre
Index: benchmark/tls-fetch_add.c
===================================================================
--- benchmark/tls-fetch_add.c	(revision d4a60ac94898e14a0a0d632c2c6b4f020a8f0625)
+++ benchmark/tls-fetch_add.c	(revision d4a60ac94898e14a0a0d632c2c6b4f020a8f0625)
@@ -0,0 +1,27 @@
+#include <stdbool.h>
+#include <stdio.h>
+
+#include "bench.h"
+
+#define thread_local _Thread_local
+
+volatile thread_local bool value;
+
+void __attribute__((noinline)) do_call() {
+	__atomic_store_n( &value, true, __ATOMIC_RELAXED );
+	__atomic_signal_fence(__ATOMIC_ACQUIRE);
+	asm volatile ("");
+	__atomic_store_n( &value, false, __ATOMIC_RELAXED );
+	__atomic_signal_fence(__ATOMIC_RELEASE);
+}
+
+int main(int argc, char* argv[]) {
+	BENCH(
+		for (size_t i = 0; i < n; i++) {
+			do_call();
+		},
+		result
+	)
+
+	printf("%llu\n", result);
+}
