1 | #include <stdio.h>
|
---|
2 | #include <stdint.h> // uintptr_t
|
---|
3 |
|
---|
4 | #include "bench.h"
|
---|
5 |
|
---|
6 | // Does a "lock xchg" on entry but a simple "mov" on exit => cheaper as 0 contention. While it has much more code, the
|
---|
7 | // bulk is never run.
|
---|
8 |
|
---|
9 | #define CALIGN __attribute__(( aligned (CACHE_ALIGN) ))
|
---|
10 | #define CACHE_ALIGN 128
|
---|
11 | #if defined( __i386 ) || defined( __x86_64 )
|
---|
12 | #define Pause() __asm__ __volatile__ ( "pause" : : : )
|
---|
13 | #elif defined( __ARM_ARCH )
|
---|
14 | #define Pause() __asm__ __volatile__ ( "YIELD" : : : )
|
---|
15 | #else
|
---|
16 | #error unsupported architecture
|
---|
17 | #endif
|
---|
18 |
|
---|
19 | typedef uintptr_t TYPE; // addressable word-size
|
---|
20 | static volatile TYPE lock __attribute__(( aligned (128) )); // Intel recommendation
|
---|
21 | static TYPE PAD CALIGN __attribute__(( unused )); // protect further false sharing
|
---|
22 |
|
---|
23 | static inline void spin_lock( volatile TYPE *lock ) {
|
---|
24 | enum { SPIN_START = 4, SPIN_END = 64 * 1024, };
|
---|
25 | unsigned int spin = SPIN_START;
|
---|
26 |
|
---|
27 | for ( unsigned int i = 1;; i += 1 ) {
|
---|
28 | if ( *lock == 0 && __atomic_test_and_set( lock, __ATOMIC_ACQUIRE ) == 0 ) break;
|
---|
29 | for ( volatile unsigned int s = 0; s < spin; s += 1 ) Pause(); // exponential spin
|
---|
30 | //spin += spin; // powers of 2
|
---|
31 | if ( i % 64 == 0 ) spin += spin; // slowly increase by powers of 2
|
---|
32 | if ( spin > SPIN_END ) spin = SPIN_START; // prevent overflow
|
---|
33 | } // for
|
---|
34 | } // spin_lock
|
---|
35 |
|
---|
36 | static inline void spin_unlock( volatile TYPE *lock ) {
|
---|
37 | __atomic_clear( lock, __ATOMIC_RELEASE );
|
---|
38 | } // spin_unlock
|
---|
39 |
|
---|
40 | void __attribute__((noinline)) do_call() {
|
---|
41 | spin_lock( &lock );
|
---|
42 | // asm volatile ("");
|
---|
43 | spin_unlock( &lock );
|
---|
44 | }
|
---|
45 |
|
---|
46 | int main( int argc, char * argv[] ) {
|
---|
47 | BENCH_START()
|
---|
48 | BENCH(
|
---|
49 | for (size_t i = 0; i < times; i++) {
|
---|
50 | do_call();
|
---|
51 | },
|
---|
52 | result
|
---|
53 | )
|
---|
54 | printf( "%g\n", result );
|
---|
55 | }
|
---|
56 |
|
---|
57 | // Local Variables: //
|
---|
58 | // tab-width: 4 //
|
---|
59 | // End: //
|
---|