Index: libcfa/src/concurrency/stats.cfa
===================================================================
--- libcfa/src/concurrency/stats.cfa	(revision ce9ed845f77d05467e8877a11d3be2d3fdde3d1a)
+++ libcfa/src/concurrency/stats.cfa	(revision 45b9b210ba1573f2cbe0ef1dfa968d6a501c6469)
@@ -68,54 +68,64 @@
 	}
 
+	static inline void tally_one( volatile uint64_t * agg, volatile uint64_t * val) {
+		uint64_t add = __atomic_exchange_n(val, 0_l64u, __ATOMIC_RELAXED);
+		__atomic_fetch_add(agg, add, __ATOMIC_RELAXED);
+	}
+
+	static inline void tally_one( volatile int64_t * agg, volatile int64_t * val) {
+		int64_t add = __atomic_exchange_n(val, 0_l64, __ATOMIC_RELAXED);
+		__atomic_fetch_add(agg, add, __ATOMIC_RELAXED);
+	}
+
 	void __tally_stats( struct __stats_t * cltr, struct __stats_t * proc ) {
-		__atomic_fetch_add( &cltr->ready.push.local.attempt, proc->ready.push.local.attempt, __ATOMIC_SEQ_CST ); proc->ready.push.local.attempt = 0;
-		__atomic_fetch_add( &cltr->ready.push.local.success, proc->ready.push.local.success, __ATOMIC_SEQ_CST ); proc->ready.push.local.success = 0;
-		__atomic_fetch_add( &cltr->ready.push.share.attempt, proc->ready.push.share.attempt, __ATOMIC_SEQ_CST ); proc->ready.push.share.attempt = 0;
-		__atomic_fetch_add( &cltr->ready.push.share.success, proc->ready.push.share.success, __ATOMIC_SEQ_CST ); proc->ready.push.share.success = 0;
-		__atomic_fetch_add( &cltr->ready.push.extrn.attempt, proc->ready.push.extrn.attempt, __ATOMIC_SEQ_CST ); proc->ready.push.extrn.attempt = 0;
-		__atomic_fetch_add( &cltr->ready.push.extrn.success, proc->ready.push.extrn.success, __ATOMIC_SEQ_CST ); proc->ready.push.extrn.success = 0;
-		__atomic_fetch_add( &cltr->ready.pop.local .attempt, proc->ready.pop.local .attempt, __ATOMIC_SEQ_CST ); proc->ready.pop.local .attempt = 0;
-		__atomic_fetch_add( &cltr->ready.pop.local .success, proc->ready.pop.local .success, __ATOMIC_SEQ_CST ); proc->ready.pop.local .success = 0;
-		__atomic_fetch_add( &cltr->ready.pop.local .elock  , proc->ready.pop.local .elock  , __ATOMIC_SEQ_CST ); proc->ready.pop.local .elock   = 0;
-		__atomic_fetch_add( &cltr->ready.pop.local .eempty , proc->ready.pop.local .eempty , __ATOMIC_SEQ_CST ); proc->ready.pop.local .eempty  = 0;
-		__atomic_fetch_add( &cltr->ready.pop.local .espec  , proc->ready.pop.local .espec  , __ATOMIC_SEQ_CST ); proc->ready.pop.local .espec   = 0;
-		__atomic_fetch_add( &cltr->ready.pop.help  .attempt, proc->ready.pop.help  .attempt, __ATOMIC_SEQ_CST ); proc->ready.pop.help  .attempt = 0;
-		__atomic_fetch_add( &cltr->ready.pop.help  .success, proc->ready.pop.help  .success, __ATOMIC_SEQ_CST ); proc->ready.pop.help  .success = 0;
-		__atomic_fetch_add( &cltr->ready.pop.help  .elock  , proc->ready.pop.help  .elock  , __ATOMIC_SEQ_CST ); proc->ready.pop.help  .elock   = 0;
-		__atomic_fetch_add( &cltr->ready.pop.help  .eempty , proc->ready.pop.help  .eempty , __ATOMIC_SEQ_CST ); proc->ready.pop.help  .eempty  = 0;
-		__atomic_fetch_add( &cltr->ready.pop.help  .espec  , proc->ready.pop.help  .espec  , __ATOMIC_SEQ_CST ); proc->ready.pop.help  .espec   = 0;
-		__atomic_fetch_add( &cltr->ready.pop.steal .attempt, proc->ready.pop.steal .attempt, __ATOMIC_SEQ_CST ); proc->ready.pop.steal .attempt = 0;
-		__atomic_fetch_add( &cltr->ready.pop.steal .success, proc->ready.pop.steal .success, __ATOMIC_SEQ_CST ); proc->ready.pop.steal .success = 0;
-		__atomic_fetch_add( &cltr->ready.pop.steal .elock  , proc->ready.pop.steal .elock  , __ATOMIC_SEQ_CST ); proc->ready.pop.steal .elock   = 0;
-		__atomic_fetch_add( &cltr->ready.pop.steal .eempty , proc->ready.pop.steal .eempty , __ATOMIC_SEQ_CST ); proc->ready.pop.steal .eempty  = 0;
-		__atomic_fetch_add( &cltr->ready.pop.steal .espec  , proc->ready.pop.steal .espec  , __ATOMIC_SEQ_CST ); proc->ready.pop.steal .espec   = 0;
-		__atomic_fetch_add( &cltr->ready.pop.search.attempt, proc->ready.pop.search.attempt, __ATOMIC_SEQ_CST ); proc->ready.pop.search.attempt = 0;
-		__atomic_fetch_add( &cltr->ready.pop.search.success, proc->ready.pop.search.success, __ATOMIC_SEQ_CST ); proc->ready.pop.search.success = 0;
-		__atomic_fetch_add( &cltr->ready.pop.search.elock  , proc->ready.pop.search.elock  , __ATOMIC_SEQ_CST ); proc->ready.pop.search.elock   = 0;
-		__atomic_fetch_add( &cltr->ready.pop.search.eempty , proc->ready.pop.search.eempty , __ATOMIC_SEQ_CST ); proc->ready.pop.search.eempty  = 0;
-		__atomic_fetch_add( &cltr->ready.pop.search.espec  , proc->ready.pop.search.espec  , __ATOMIC_SEQ_CST ); proc->ready.pop.search.espec   = 0;
-		__atomic_fetch_add( &cltr->ready.threads.migration , proc->ready.threads.migration , __ATOMIC_SEQ_CST ); proc->ready.threads.migration  = 0;
-		__atomic_fetch_add( &cltr->ready.threads.extunpark , proc->ready.threads.extunpark , __ATOMIC_SEQ_CST ); proc->ready.threads.extunpark  = 0;
-		__atomic_fetch_add( &cltr->ready.threads.threads   , proc->ready.threads.threads   , __ATOMIC_SEQ_CST ); proc->ready.threads.threads    = 0;
-		__atomic_fetch_add( &cltr->ready.sleep.halts       , proc->ready.sleep.halts       , __ATOMIC_SEQ_CST ); proc->ready.sleep.halts        = 0;
-		__atomic_fetch_add( &cltr->ready.sleep.cancels     , proc->ready.sleep.cancels     , __ATOMIC_SEQ_CST ); proc->ready.sleep.cancels      = 0;
-		__atomic_fetch_add( &cltr->ready.sleep.wakes       , proc->ready.sleep.wakes       , __ATOMIC_SEQ_CST ); proc->ready.sleep.wakes        = 0;
-		__atomic_fetch_add( &cltr->ready.sleep.exits       , proc->ready.sleep.exits       , __ATOMIC_SEQ_CST ); proc->ready.sleep.exits        = 0;
+		tally_one( &cltr->ready.push.local.attempt, &proc->ready.push.local.attempt );
+		tally_one( &cltr->ready.push.local.success, &proc->ready.push.local.success );
+		tally_one( &cltr->ready.push.share.attempt, &proc->ready.push.share.attempt );
+		tally_one( &cltr->ready.push.share.success, &proc->ready.push.share.success );
+		tally_one( &cltr->ready.push.extrn.attempt, &proc->ready.push.extrn.attempt );
+		tally_one( &cltr->ready.push.extrn.success, &proc->ready.push.extrn.success );
+		tally_one( &cltr->ready.pop.local .attempt, &proc->ready.pop.local .attempt );
+		tally_one( &cltr->ready.pop.local .success, &proc->ready.pop.local .success );
+		tally_one( &cltr->ready.pop.local .elock  , &proc->ready.pop.local .elock   );
+		tally_one( &cltr->ready.pop.local .eempty , &proc->ready.pop.local .eempty  );
+		tally_one( &cltr->ready.pop.local .espec  , &proc->ready.pop.local .espec   );
+		tally_one( &cltr->ready.pop.help  .attempt, &proc->ready.pop.help  .attempt );
+		tally_one( &cltr->ready.pop.help  .success, &proc->ready.pop.help  .success );
+		tally_one( &cltr->ready.pop.help  .elock  , &proc->ready.pop.help  .elock   );
+		tally_one( &cltr->ready.pop.help  .eempty , &proc->ready.pop.help  .eempty  );
+		tally_one( &cltr->ready.pop.help  .espec  , &proc->ready.pop.help  .espec   );
+		tally_one( &cltr->ready.pop.steal .attempt, &proc->ready.pop.steal .attempt );
+		tally_one( &cltr->ready.pop.steal .success, &proc->ready.pop.steal .success );
+		tally_one( &cltr->ready.pop.steal .elock  , &proc->ready.pop.steal .elock   );
+		tally_one( &cltr->ready.pop.steal .eempty , &proc->ready.pop.steal .eempty  );
+		tally_one( &cltr->ready.pop.steal .espec  , &proc->ready.pop.steal .espec   );
+		tally_one( &cltr->ready.pop.search.attempt, &proc->ready.pop.search.attempt );
+		tally_one( &cltr->ready.pop.search.success, &proc->ready.pop.search.success );
+		tally_one( &cltr->ready.pop.search.elock  , &proc->ready.pop.search.elock   );
+		tally_one( &cltr->ready.pop.search.eempty , &proc->ready.pop.search.eempty  );
+		tally_one( &cltr->ready.pop.search.espec  , &proc->ready.pop.search.espec   );
+		tally_one( &cltr->ready.threads.migration , &proc->ready.threads.migration  );
+		tally_one( &cltr->ready.threads.extunpark , &proc->ready.threads.extunpark  );
+		tally_one( &cltr->ready.threads.threads   , &proc->ready.threads.threads    );
+		tally_one( &cltr->ready.sleep.halts       , &proc->ready.sleep.halts        );
+		tally_one( &cltr->ready.sleep.cancels     , &proc->ready.sleep.cancels      );
+		tally_one( &cltr->ready.sleep.wakes       , &proc->ready.sleep.wakes        );
+		tally_one( &cltr->ready.sleep.exits       , &proc->ready.sleep.exits        );
 
 		#if defined(CFA_HAVE_LINUX_IO_URING_H)
-			__atomic_fetch_add( &cltr->io.alloc.fast       , proc->io.alloc.fast       , __ATOMIC_SEQ_CST ); proc->io.alloc.fast        = 0;
-			__atomic_fetch_add( &cltr->io.alloc.slow       , proc->io.alloc.slow       , __ATOMIC_SEQ_CST ); proc->io.alloc.slow        = 0;
-			__atomic_fetch_add( &cltr->io.alloc.fail       , proc->io.alloc.fail       , __ATOMIC_SEQ_CST ); proc->io.alloc.fail        = 0;
-			__atomic_fetch_add( &cltr->io.alloc.revoke     , proc->io.alloc.revoke     , __ATOMIC_SEQ_CST ); proc->io.alloc.revoke      = 0;
-			__atomic_fetch_add( &cltr->io.alloc.block      , proc->io.alloc.block      , __ATOMIC_SEQ_CST ); proc->io.alloc.block       = 0;
-			__atomic_fetch_add( &cltr->io.submit.fast      , proc->io.submit.fast      , __ATOMIC_SEQ_CST ); proc->io.submit.fast       = 0;
-			__atomic_fetch_add( &cltr->io.submit.slow      , proc->io.submit.slow      , __ATOMIC_SEQ_CST ); proc->io.submit.slow       = 0;
-			__atomic_fetch_add( &cltr->io.flush.external   , proc->io.flush.external   , __ATOMIC_SEQ_CST ); proc->io.flush.external    = 0;
-			__atomic_fetch_add( &cltr->io.calls.flush      , proc->io.calls.flush      , __ATOMIC_SEQ_CST ); proc->io.calls.flush       = 0;
-			__atomic_fetch_add( &cltr->io.calls.submitted  , proc->io.calls.submitted  , __ATOMIC_SEQ_CST ); proc->io.calls.submitted   = 0;
-			__atomic_fetch_add( &cltr->io.calls.drain      , proc->io.calls.drain      , __ATOMIC_SEQ_CST ); proc->io.calls.drain       = 0;
-			__atomic_fetch_add( &cltr->io.calls.completed  , proc->io.calls.completed  , __ATOMIC_SEQ_CST ); proc->io.calls.completed   = 0;
-			__atomic_fetch_add( &cltr->io.calls.errors.busy, proc->io.calls.errors.busy, __ATOMIC_SEQ_CST ); proc->io.calls.errors.busy = 0;
-			__atomic_fetch_add( &cltr->io.poller.sleeps    , proc->io.poller.sleeps    , __ATOMIC_SEQ_CST ); proc->io.poller.sleeps     = 0;
+			tally_one( &cltr->io.alloc.fast       , &proc->io.alloc.fast        );
+			tally_one( &cltr->io.alloc.slow       , &proc->io.alloc.slow        );
+			tally_one( &cltr->io.alloc.fail       , &proc->io.alloc.fail        );
+			tally_one( &cltr->io.alloc.revoke     , &proc->io.alloc.revoke      );
+			tally_one( &cltr->io.alloc.block      , &proc->io.alloc.block       );
+			tally_one( &cltr->io.submit.fast      , &proc->io.submit.fast       );
+			tally_one( &cltr->io.submit.slow      , &proc->io.submit.slow       );
+			tally_one( &cltr->io.flush.external   , &proc->io.flush.external    );
+			tally_one( &cltr->io.calls.flush      , &proc->io.calls.flush       );
+			tally_one( &cltr->io.calls.submitted  , &proc->io.calls.submitted   );
+			tally_one( &cltr->io.calls.drain      , &proc->io.calls.drain       );
+			tally_one( &cltr->io.calls.completed  , &proc->io.calls.completed   );
+			tally_one( &cltr->io.calls.errors.busy, &proc->io.calls.errors.busy );
+			tally_one( &cltr->io.poller.sleeps    , &proc->io.poller.sleeps     );
 		#endif
 	}
