| 1 | #include <cassert>
|
|---|
| 2 |
|
|---|
| 3 | #include <atomic>
|
|---|
| 4 | #include <new>
|
|---|
| 5 | #include <type_traits>
|
|---|
| 6 |
|
|---|
| 7 | struct processor;
|
|---|
| 8 |
|
|---|
| 9 | struct __attribute__((aligned(64))) processor_id {
|
|---|
| 10 | std::atomic<processor *> handle;
|
|---|
| 11 | std::atomic<bool> lock;
|
|---|
| 12 |
|
|---|
| 13 | processor_id() = default;
|
|---|
| 14 | processor_id(processor * proc) : handle(proc), lock() {
|
|---|
| 15 | /*paranoid*/ assert(std::atomic_is_lock_free(&lock));
|
|---|
| 16 | }
|
|---|
| 17 | };
|
|---|
| 18 |
|
|---|
| 19 | extern unsigned num();
|
|---|
| 20 |
|
|---|
| 21 | #define ERROR throw 1
|
|---|
| 22 |
|
|---|
| 23 | class processor_list {
|
|---|
| 24 | private:
|
|---|
| 25 |
|
|---|
| 26 | static const constexpr std::size_t cache_line_size = 64;
|
|---|
| 27 |
|
|---|
| 28 | static_assert(sizeof (processor_id) <= cache_line_size, "ERROR: Instances must fit in one cache line" );
|
|---|
| 29 | static_assert(alignof(processor_id) == cache_line_size, "ERROR: Instances must aligned to one cache line" );
|
|---|
| 30 |
|
|---|
| 31 | const unsigned max; // total cachelines allocated
|
|---|
| 32 | std::atomic_uint alloc; // cachelines currently in use
|
|---|
| 33 | std::atomic_uint ready; // cachelines ready to iterate over (!= to alloc when thread is in second half of doregister)
|
|---|
| 34 | std::atomic<bool> lock; // writerlock
|
|---|
| 35 | processor_id * data; // data pointer
|
|---|
| 36 |
|
|---|
| 37 | private:
|
|---|
| 38 | inline void acquire(std::atomic<bool> & ll) {
|
|---|
| 39 | while( __builtin_expect(ll.exchange(true),false) ) {
|
|---|
| 40 | while(ll.load(std::memory_order_relaxed))
|
|---|
| 41 | asm volatile("pause");
|
|---|
| 42 | }
|
|---|
| 43 | /* paranoid */ assert(ll);
|
|---|
| 44 | }
|
|---|
| 45 |
|
|---|
| 46 | public:
|
|---|
| 47 | processor_list()
|
|---|
| 48 | : max(num())
|
|---|
| 49 | , alloc(0)
|
|---|
| 50 | , ready(0)
|
|---|
| 51 | , lock{false}
|
|---|
| 52 | , data( new processor_id[max] )
|
|---|
| 53 | {
|
|---|
| 54 | /*paranoid*/ assert(num() == max);
|
|---|
| 55 | /*paranoid*/ assert(std::atomic_is_lock_free(&alloc));
|
|---|
| 56 | /*paranoid*/ assert(std::atomic_is_lock_free(&ready));
|
|---|
| 57 | }
|
|---|
| 58 |
|
|---|
| 59 | ~processor_list() {
|
|---|
| 60 | delete[] data;
|
|---|
| 61 | }
|
|---|
| 62 |
|
|---|
| 63 | //=======================================================================
|
|---|
| 64 | // Lock-Free registering/unregistering of threads
|
|---|
| 65 | unsigned doregister(processor * proc) {
|
|---|
| 66 | // Step - 1 : check if there is already space in the data
|
|---|
| 67 | uint_fast32_t s = ready;
|
|---|
| 68 |
|
|---|
| 69 | // Check among all the ready
|
|---|
| 70 | for(uint_fast32_t i = 0; i < s; i++) {
|
|---|
| 71 | processor * null = nullptr; // Re-write every loop since compare thrashes it
|
|---|
| 72 | if( data[i].handle.load(std::memory_order_relaxed) == null
|
|---|
| 73 | && data[i].handle.compare_exchange_strong(null, proc)) {
|
|---|
| 74 | /*paranoid*/ assert(i < ready);
|
|---|
| 75 | /*paranoid*/ assert(alignof(decltype(data[i])) == cache_line_size);
|
|---|
| 76 | /*paranoid*/ assert((uintptr_t(&data[i]) % cache_line_size) == 0);
|
|---|
| 77 | return i;
|
|---|
| 78 | }
|
|---|
| 79 | }
|
|---|
| 80 |
|
|---|
| 81 | if(max <= alloc) ERROR;
|
|---|
| 82 |
|
|---|
| 83 | // Step - 2 : F&A to get a new spot in the array.
|
|---|
| 84 | uint_fast32_t n = alloc++;
|
|---|
| 85 | if(max <= n) ERROR;
|
|---|
| 86 |
|
|---|
| 87 | // Step - 3 : Mark space as used and then publish it.
|
|---|
| 88 | void * storage = &data[n];
|
|---|
| 89 | new (storage) processor_id( proc );
|
|---|
| 90 | while(true) {
|
|---|
| 91 | unsigned copy = n;
|
|---|
| 92 | if( ready.load(std::memory_order_relaxed) == n
|
|---|
| 93 | && ready.compare_exchange_weak(copy, n + 1) )
|
|---|
| 94 | break;
|
|---|
| 95 | asm volatile("pause");
|
|---|
| 96 | }
|
|---|
| 97 |
|
|---|
| 98 | // Return new spot.
|
|---|
| 99 | /*paranoid*/ assert(n < ready);
|
|---|
| 100 | /*paranoid*/ assert(alignof(decltype(data[n])) == cache_line_size);
|
|---|
| 101 | /*paranoid*/ assert((uintptr_t(&data[n]) % cache_line_size) == 0);
|
|---|
| 102 | return n;
|
|---|
| 103 | }
|
|---|
| 104 |
|
|---|
| 105 | processor * unregister(unsigned iproc) {
|
|---|
| 106 | /*paranoid*/ assert(iproc < ready);
|
|---|
| 107 | auto ret = data[iproc].handle.load(std::memory_order_relaxed);
|
|---|
| 108 | data[iproc].handle = nullptr;
|
|---|
| 109 | return ret;
|
|---|
| 110 | }
|
|---|
| 111 |
|
|---|
| 112 | // Reset all registration
|
|---|
| 113 | // Unsafe in most cases, use for testing only.
|
|---|
| 114 | void reset() {
|
|---|
| 115 | alloc = 0;
|
|---|
| 116 | ready = 0;
|
|---|
| 117 | }
|
|---|
| 118 |
|
|---|
| 119 | processor * get(unsigned iproc) {
|
|---|
| 120 | return data[iproc].handle.load(std::memory_order_relaxed);
|
|---|
| 121 | }
|
|---|
| 122 |
|
|---|
| 123 | //=======================================================================
|
|---|
| 124 | // Reader-writer lock implementation
|
|---|
| 125 | // Concurrent with doregister/unregister,
|
|---|
| 126 | // i.e., threads can be added at any point during or between the entry/exit
|
|---|
| 127 |
|
|---|
| 128 | //-----------------------------------------------------------------------
|
|---|
| 129 | // Reader side
|
|---|
| 130 | void read_lock(unsigned iproc) {
|
|---|
| 131 | /*paranoid*/ assert(iproc < ready);
|
|---|
| 132 |
|
|---|
| 133 | // Step 1 : make sure no writer are in the middle of the critical section
|
|---|
| 134 | while(lock.load(std::memory_order_relaxed))
|
|---|
| 135 | asm volatile("pause");
|
|---|
| 136 |
|
|---|
| 137 | // Fence needed because we don't want to start trying to acquire the lock
|
|---|
| 138 | // before we read a false.
|
|---|
| 139 | // Not needed on x86
|
|---|
| 140 | // std::atomic_thread_fence(std::memory_order_seq_cst);
|
|---|
| 141 |
|
|---|
| 142 | // Step 2 : acquire our local lock
|
|---|
| 143 | acquire( data[iproc].lock );
|
|---|
| 144 | /*paranoid*/ assert(data[iproc].lock);
|
|---|
| 145 | }
|
|---|
| 146 |
|
|---|
| 147 | void read_unlock(unsigned iproc) {
|
|---|
| 148 | /*paranoid*/ assert(iproc < ready);
|
|---|
| 149 | /*paranoid*/ assert(data[iproc].lock);
|
|---|
| 150 | data[iproc].lock.store(false, std::memory_order_release);
|
|---|
| 151 | }
|
|---|
| 152 |
|
|---|
| 153 | //-----------------------------------------------------------------------
|
|---|
| 154 | // Writer side
|
|---|
| 155 | uint_fast32_t write_lock() {
|
|---|
| 156 | // Step 1 : lock global lock
|
|---|
| 157 | // It is needed to avoid processors that register mid Critical-Section
|
|---|
| 158 | // to simply lock their own lock and enter.
|
|---|
| 159 | acquire(lock);
|
|---|
| 160 |
|
|---|
| 161 | // Step 2 : lock per-proc lock
|
|---|
| 162 | // Processors that are currently being registered aren't counted
|
|---|
| 163 | // but can't be in read_lock or in the critical section.
|
|---|
| 164 | // All other processors are counted
|
|---|
| 165 | uint_fast32_t s = ready;
|
|---|
| 166 | for(uint_fast32_t i = 0; i < s; i++) {
|
|---|
| 167 | acquire( data[i].lock );
|
|---|
| 168 | }
|
|---|
| 169 |
|
|---|
| 170 | return s;
|
|---|
| 171 | }
|
|---|
| 172 |
|
|---|
| 173 | void write_unlock(uint_fast32_t last_s) {
|
|---|
| 174 | // Step 1 : release local locks
|
|---|
| 175 | // This must be done while the global lock is held to avoid
|
|---|
| 176 | // threads that where created mid critical section
|
|---|
| 177 | // to race to lock their local locks and have the writer
|
|---|
| 178 | // immidiately unlock them
|
|---|
| 179 | // Alternative solution : return s in write_lock and pass it to write_unlock
|
|---|
| 180 | for(uint_fast32_t i = 0; i < last_s; i++) {
|
|---|
| 181 | assert(data[i].lock);
|
|---|
| 182 | data[i].lock.store(false, std::memory_order_release);
|
|---|
| 183 | }
|
|---|
| 184 |
|
|---|
| 185 | // Step 2 : release global lock
|
|---|
| 186 | /*paranoid*/ assert(true == lock);
|
|---|
| 187 | lock.store(false, std::memory_order_release);
|
|---|
| 188 | }
|
|---|
| 189 |
|
|---|
| 190 | //-----------------------------------------------------------------------
|
|---|
| 191 | // Checking support
|
|---|
| 192 | uint_fast32_t epoch_check() {
|
|---|
| 193 | // Step 1 : lock global lock
|
|---|
| 194 | // It is needed to avoid processors that register mid Critical-Section
|
|---|
| 195 | // to simply lock their own lock and enter.
|
|---|
| 196 | while(lock.load(std::memory_order_relaxed))
|
|---|
| 197 | asm volatile("pause");
|
|---|
| 198 |
|
|---|
| 199 | // Step 2 : lock per-proc lock
|
|---|
| 200 | // Processors that are currently being registered aren't counted
|
|---|
| 201 | // but can't be in read_lock or in the critical section.
|
|---|
| 202 | // All other processors are counted
|
|---|
| 203 | uint_fast32_t s = ready;
|
|---|
| 204 | for(uint_fast32_t i = 0; i < s; i++) {
|
|---|
| 205 | while(data[i].lock.load(std::memory_order_relaxed))
|
|---|
| 206 | asm volatile("pause");
|
|---|
| 207 | }
|
|---|
| 208 |
|
|---|
| 209 | return s;
|
|---|
| 210 | }
|
|---|
| 211 |
|
|---|
| 212 | public:
|
|---|
| 213 | };
|
|---|
| 214 |
|
|---|
| 215 | #undef ERROR
|
|---|