#include #include // uintptr_t #include "bench.h" // Does a "lock xchg" on entry but a simple "mov" on exit => cheaper as 0 contention. While it has much more code, the // bulk is never run. #define CALIGN __attribute__(( aligned (CACHE_ALIGN) )) #define CACHE_ALIGN 128 #if defined( __i386 ) || defined( __x86_64 ) #define Pause() __asm__ __volatile__ ( "pause" : : : ) #elif defined( __ARM_ARCH ) #define Pause() __asm__ __volatile__ ( "YIELD" : : : ) #else #error unsupported architecture #endif typedef uintptr_t TYPE; // addressable word-size static volatile TYPE lock __attribute__(( aligned (128) )); // Intel recommendation static TYPE PAD CALIGN __attribute__(( unused )); // protect further false sharing static inline void spin_lock( volatile TYPE *lock ) { enum { SPIN_START = 4, SPIN_END = 64 * 1024, }; unsigned int spin = SPIN_START; for ( unsigned int i = 1;; i += 1 ) { if ( *lock == 0 && __atomic_test_and_set( lock, __ATOMIC_ACQUIRE ) == 0 ) break; for ( volatile unsigned int s = 0; s < spin; s += 1 ) Pause(); // exponential spin //spin += spin; // powers of 2 if ( i % 64 == 0 ) spin += spin; // slowly increase by powers of 2 if ( spin > SPIN_END ) spin = SPIN_START; // prevent overflow } // for } // spin_lock static inline void spin_unlock( volatile TYPE *lock ) { __atomic_clear( lock, __ATOMIC_RELEASE ); } // spin_unlock void __attribute__((noinline)) do_call() { spin_lock( &lock ); // asm volatile (""); spin_unlock( &lock ); } int main( int argc, char * argv[] ) { BENCH_START() BENCH( for (size_t i = 0; i < times; i++) { do_call(); }, result ) printf( "%g\n", result ); } // Local Variables: // // tab-width: 4 // // End: //