| 1 | #include <stdio.h>
 | 
|---|
| 2 | #include <stdint.h>                                                                             // uintptr_t
 | 
|---|
| 3 | 
 | 
|---|
| 4 | #include "bench.h"
 | 
|---|
| 5 | 
 | 
|---|
| 6 | // Does a "lock xchg" on entry but a simple "mov" on exit => cheaper as 0 contention. While it has much more code, the
 | 
|---|
| 7 | // bulk is never run.
 | 
|---|
| 8 | 
 | 
|---|
| 9 | #define CALIGN __attribute__(( aligned (CACHE_ALIGN) ))
 | 
|---|
| 10 | #define CACHE_ALIGN 128
 | 
|---|
| 11 | #if defined( __i386 ) || defined( __x86_64 )
 | 
|---|
| 12 |         #define Pause() __asm__ __volatile__ ( "pause" : : : )
 | 
|---|
| 13 | #elif defined( __ARM_ARCH )
 | 
|---|
| 14 |         #define Pause() __asm__ __volatile__ ( "YIELD" : : : )
 | 
|---|
| 15 | #else
 | 
|---|
| 16 |         #error unsupported architecture
 | 
|---|
| 17 | #endif
 | 
|---|
| 18 | 
 | 
|---|
| 19 | typedef uintptr_t TYPE;                                                                 // addressable word-size
 | 
|---|
| 20 | static volatile TYPE lock __attribute__(( aligned (128) )); // Intel recommendation
 | 
|---|
| 21 | static TYPE PAD CALIGN __attribute__(( unused ));               // protect further false sharing
 | 
|---|
| 22 | 
 | 
|---|
| 23 | static inline void spin_lock( volatile TYPE *lock ) {
 | 
|---|
| 24 |         enum { SPIN_START = 4, SPIN_END = 64 * 1024, };
 | 
|---|
| 25 |         unsigned int spin = SPIN_START;
 | 
|---|
| 26 | 
 | 
|---|
| 27 |         for ( unsigned int i = 1;; i += 1 ) {
 | 
|---|
| 28 |           if ( *lock == 0 && __atomic_test_and_set( lock, __ATOMIC_ACQUIRE ) == 0 ) break;
 | 
|---|
| 29 |                 for ( volatile unsigned int s = 0; s < spin; s += 1 ) Pause(); // exponential spin
 | 
|---|
| 30 |                 //spin += spin;                                                                 // powers of 2
 | 
|---|
| 31 |                 if ( i % 64 == 0 ) spin += spin;                                // slowly increase by powers of 2
 | 
|---|
| 32 |                 if ( spin > SPIN_END ) spin = SPIN_START;               // prevent overflow
 | 
|---|
| 33 |         } // for
 | 
|---|
| 34 | } // spin_lock
 | 
|---|
| 35 | 
 | 
|---|
| 36 | static inline void spin_unlock( volatile TYPE *lock ) {
 | 
|---|
| 37 |         __atomic_clear( lock, __ATOMIC_RELEASE );
 | 
|---|
| 38 | } // spin_unlock
 | 
|---|
| 39 | 
 | 
|---|
| 40 | void __attribute__((noinline)) do_call() {
 | 
|---|
| 41 |         spin_lock( &lock );
 | 
|---|
| 42 | //      asm volatile ("");
 | 
|---|
| 43 |         spin_unlock( &lock );
 | 
|---|
| 44 | }
 | 
|---|
| 45 | 
 | 
|---|
| 46 | int main( int argc, char * argv[] ) {
 | 
|---|
| 47 |         BENCH_START()
 | 
|---|
| 48 |         BENCH(
 | 
|---|
| 49 |                 for (size_t i = 0; i < times; i++) {
 | 
|---|
| 50 |                         do_call();
 | 
|---|
| 51 |                 },
 | 
|---|
| 52 |                 result
 | 
|---|
| 53 |         )
 | 
|---|
| 54 |         printf( "%g\n", result );
 | 
|---|
| 55 | }
 | 
|---|
| 56 | 
 | 
|---|
| 57 | // Local Variables: //
 | 
|---|
| 58 | // tab-width: 4 //
 | 
|---|
| 59 | // End: //
 | 
|---|