Changeset bf8b77e
- Timestamp:
- Mar 3, 2022, 1:37:31 PM (3 years ago)
- Branches:
- ADT, ast-experimental, enum, master, pthread-emulation, qualifiedEnum
- Children:
- 40a606d2, ba897d21
- Parents:
- 9c5aef9 (diff), b0d0285 (diff)
Note: this is a merge changeset, the changes displayed below correspond to the merge itself.
Use the(diff)
links above to see all the changes relative to each parent. - Files:
-
- 4 added
- 11 edited
Legend:
- Unmodified
- Added
- Removed
-
benchmark/io/http/protocol.cfa
r9c5aef9 rbf8b77e 173 173 } 174 174 175 static void zero_sqe(struct io_uring_sqe * sqe) {176 sqe->flags = 0;177 sqe->ioprio = 0;178 sqe->fd = 0;179 sqe->off = 0;180 sqe->addr = 0;181 sqe->len = 0;182 sqe->fsync_flags = 0;183 sqe->__pad2[0] = 0;184 sqe->__pad2[1] = 0;185 sqe->__pad2[2] = 0;186 sqe->fd = 0;187 sqe->off = 0;188 sqe->addr = 0;189 sqe->len = 0;190 }191 192 175 enum FSM_STATE { 193 176 Initial, -
doc/theses/mubeen_zulfiqar_MMath/benchmarks.tex
r9c5aef9 rbf8b77e 216 216 \paragraph{Relevant Knobs} 217 217 *** FIX ME: Insert Relevant Knobs 218 219 220 221 \section{Existing Memory Allocators}222 With dynamic allocation being an important feature of C, there are many stand-alone memory allocators that have been designed for different purposes. For this thesis, we chose 7 of the most popular and widely used memory allocators.223 224 \paragraph{dlmalloc}225 dlmalloc (FIX ME: cite allocator) is a thread-safe allocator that is single threaded and single heap. dlmalloc maintains free-lists of different sizes to store freed dynamic memory. (FIX ME: cite wasik)226 227 \paragraph{hoard}228 Hoard (FIX ME: cite allocator) is a thread-safe allocator that is multi-threaded and using a heap layer framework. It has per-thread heaps that have thread-local free-lists, and a global shared heap. (FIX ME: cite wasik)229 230 \paragraph{jemalloc}231 jemalloc (FIX ME: cite allocator) is a thread-safe allocator that uses multiple arenas. Each thread is assigned an arena. Each arena has chunks that contain contagious memory regions of same size. An arena has multiple chunks that contain regions of multiple sizes.232 233 \paragraph{ptmalloc}234 ptmalloc (FIX ME: cite allocator) is a modification of dlmalloc. It is a thread-safe multi-threaded memory allocator that uses multiple heaps. ptmalloc heap has similar design to dlmalloc's heap.235 236 \paragraph{rpmalloc}237 rpmalloc (FIX ME: cite allocator) is a thread-safe allocator that is multi-threaded and uses per-thread heap. Each heap has multiple size-classes and each size-class contains memory regions of the relevant size.238 239 \paragraph{tbb malloc}240 tbb malloc (FIX ME: cite allocator) is a thread-safe allocator that is multi-threaded and uses private heap for each thread. Each private-heap has multiple bins of different sizes. Each bin contains free regions of the same size.241 242 \paragraph{tc malloc}243 tcmalloc (FIX ME: cite allocator) is a thread-safe allocator. It uses per-thread cache to store free objects that prevents contention on shared resources in multi-threaded application. A central free-list is used to refill per-thread cache when it gets empty. -
doc/theses/mubeen_zulfiqar_MMath/performance.tex
r9c5aef9 rbf8b77e 18 18 \noindent 19 19 ==================== 20 21 \section{Machine Specification} 22 23 The performance experiments were run on three different multicore systems to determine if there is consistency across platforms: 24 \begin{itemize} 25 \item 26 AMD EPYC 7662, 64-core socket $\times$ 2, 2.0 GHz 27 \item 28 Huawei ARM TaiShan 2280 V2 Kunpeng 920, 24-core socket $\times$ 4, 2.6 GHz 29 \item 30 Intel Xeon Gold 5220R, 48-core socket $\times$ 2, 2.20GHz 31 \end{itemize} 32 33 34 \section{Existing Memory Allocators} 35 With dynamic allocation being an important feature of C, there are many stand-alone memory allocators that have been designed for different purposes. For this thesis, we chose 7 of the most popular and widely used memory allocators. 36 37 \paragraph{dlmalloc} 38 dlmalloc (FIX ME: cite allocator) is a thread-safe allocator that is single threaded and single heap. dlmalloc maintains free-lists of different sizes to store freed dynamic memory. (FIX ME: cite wasik) 39 40 \paragraph{hoard} 41 Hoard (FIX ME: cite allocator) is a thread-safe allocator that is multi-threaded and using a heap layer framework. It has per-thread heaps that have thread-local free-lists, and a global shared heap. (FIX ME: cite wasik) 42 43 \paragraph{jemalloc} 44 jemalloc (FIX ME: cite allocator) is a thread-safe allocator that uses multiple arenas. Each thread is assigned an arena. Each arena has chunks that contain contagious memory regions of same size. An arena has multiple chunks that contain regions of multiple sizes. 45 46 \paragraph{ptmalloc} 47 ptmalloc (FIX ME: cite allocator) is a modification of dlmalloc. It is a thread-safe multi-threaded memory allocator that uses multiple heaps. ptmalloc heap has similar design to dlmalloc's heap. 48 49 \paragraph{rpmalloc} 50 rpmalloc (FIX ME: cite allocator) is a thread-safe allocator that is multi-threaded and uses per-thread heap. Each heap has multiple size-classes and each size-class contains memory regions of the relevant size. 51 52 \paragraph{tbb malloc} 53 tbb malloc (FIX ME: cite allocator) is a thread-safe allocator that is multi-threaded and uses private heap for each thread. Each private-heap has multiple bins of different sizes. Each bin contains free regions of the same size. 54 55 \paragraph{tc malloc} 56 tcmalloc (FIX ME: cite allocator) is a thread-safe allocator. It uses per-thread cache to store free objects that prevents contention on shared resources in multi-threaded application. A central free-list is used to refill per-thread cache when it gets empty. 57 20 58 21 59 \section{Memory Allocators} -
libcfa/src/concurrency/io.cfa
r9c5aef9 rbf8b77e 287 287 //============================================================================================= 288 288 // submission 289 static inline void __submit ( struct $io_context * ctx, __u32 idxs[], __u32 have, bool lazy) {289 static inline void __submit_only( struct $io_context * ctx, __u32 idxs[], __u32 have) { 290 290 // We can proceed to the fast path 291 291 // Get the right objects … … 306 306 ctx->proc->io.pending = true; 307 307 ctx->proc->io.dirty = true; 308 } 309 310 static inline void __submit( struct $io_context * ctx, __u32 idxs[], __u32 have, bool lazy) { 311 __sub_ring_t & sq = ctx->sq; 312 __submit_only(ctx, idxs, have); 313 308 314 if(sq.to_submit > 30) { 309 315 __tls_stats()->io.flush.full++; … … 402 408 // I/O Arbiter 403 409 //============================================================================================= 404 static inline void block(__outstanding_io_queue & queue, __outstanding_io & item) { 410 static inline bool enqueue(__outstanding_io_queue & queue, __outstanding_io & item) { 411 bool was_empty; 412 405 413 // Lock the list, it's not thread safe 406 414 lock( queue.lock __cfaabi_dbg_ctx2 ); 407 415 { 416 was_empty = empty(queue.queue); 417 408 418 // Add our request to the list 409 419 add( queue.queue, item ); … … 414 424 unlock( queue.lock ); 415 425 416 wait( item.sem );426 return was_empty; 417 427 } 418 428 … … 432 442 pa.want = want; 433 443 434 block(this.pending, (__outstanding_io&)pa); 444 enqueue(this.pending, (__outstanding_io&)pa); 445 446 wait( pa.sem ); 435 447 436 448 return pa.ctx; … … 485 497 ei.lazy = lazy; 486 498 487 block(ctx->ext_sq, (__outstanding_io&)ei); 499 bool we = enqueue(ctx->ext_sq, (__outstanding_io&)ei); 500 501 ctx->proc->io.pending = true; 502 503 if( we ) { 504 sigval_t value = { PREEMPT_IO }; 505 pthread_sigqueue(ctx->proc->kernel_thread, SIGUSR1, value); 506 } 507 508 wait( ei.sem ); 488 509 489 510 __cfadbg_print_safe(io, "Kernel I/O : %u submitted from arbiter\n", have); … … 501 522 __external_io & ei = (__external_io&)drop( ctx.ext_sq.queue ); 502 523 503 __submit (&ctx, ei.idxs, ei.have, ei.lazy);524 __submit_only(&ctx, ei.idxs, ei.have); 504 525 505 526 post( ei.sem ); -
libcfa/src/concurrency/io/setup.cfa
r9c5aef9 rbf8b77e 56 56 57 57 #include "bitmanip.hfa" 58 #include "fstream.hfa" 58 59 #include "kernel_private.hfa" 59 60 #include "thread.hfa" … … 258 259 struct __sub_ring_t & sq = this.sq; 259 260 struct __cmp_ring_t & cq = this.cq; 261 { 262 __u32 fhead = sq.free_ring.head; 263 __u32 ftail = sq.free_ring.tail; 264 265 __u32 total = *sq.num; 266 __u32 avail = ftail - fhead; 267 268 if(avail != total) abort | "Processor (" | (void*)this.proc | ") tearing down ring with" | (total - avail) | "entries allocated but not submitted, out of" | total; 269 } 260 270 261 271 // unmap the submit queue entries -
libcfa/src/concurrency/iofwd.hfa
r9c5aef9 rbf8b77e 19 19 extern "C" { 20 20 #include <asm/types.h> 21 #include <sys/stat.h> // needed for mode_t 21 22 #if CFA_HAVE_LINUX_IO_URING_H 22 23 #include <linux/io_uring.h> … … 133 134 // Check if a function is blocks a only the user thread 134 135 bool has_user_level_blocking( fptr_t func ); 136 137 #if CFA_HAVE_LINUX_IO_URING_H 138 static inline void zero_sqe(struct io_uring_sqe * sqe) { 139 sqe->flags = 0; 140 sqe->ioprio = 0; 141 sqe->fd = 0; 142 sqe->off = 0; 143 sqe->addr = 0; 144 sqe->len = 0; 145 sqe->fsync_flags = 0; 146 sqe->__pad2[0] = 0; 147 sqe->__pad2[1] = 0; 148 sqe->__pad2[2] = 0; 149 sqe->fd = 0; 150 sqe->off = 0; 151 sqe->addr = 0; 152 sqe->len = 0; 153 } 154 #endif -
libcfa/src/concurrency/kernel/fwd.hfa
r9c5aef9 rbf8b77e 347 347 struct oneshot * want = expected == 0p ? 1p : 2p; 348 348 if(__atomic_compare_exchange_n(&this.ptr, &expected, want, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) { 349 if( expected == 0p ) { /* paranoid */ verify( this.ptr == 1p);return 0p; }349 if( expected == 0p ) { return 0p; } 350 350 thread$ * ret = post( *expected, do_unpark ); 351 351 __atomic_store_n( &this.ptr, 1p, __ATOMIC_SEQ_CST); -
libcfa/src/concurrency/kernel_private.hfa
r9c5aef9 rbf8b77e 60 60 extern bool __preemption_enabled(); 61 61 62 enum { 63 PREEMPT_NORMAL = 0, 64 PREEMPT_TERMINATE = 1, 65 PREEMPT_IO = 2, 66 }; 67 62 68 static inline void __disable_interrupts_checked() { 63 69 /* paranoid */ verify( __preemption_enabled() ); -
libcfa/src/concurrency/preemption.cfa
r9c5aef9 rbf8b77e 96 96 lock{}; 97 97 } 98 99 enum {100 PREEMPT_NORMAL = 0,101 PREEMPT_TERMINATE = 1,102 };103 98 104 99 //============================================================================================= … … 664 659 choose(sfp->si_value.sival_int) { 665 660 case PREEMPT_NORMAL : ;// Normal case, nothing to do here 661 case PREEMPT_IO : ;// I/O asked to stop spinning, nothing to do here 666 662 case PREEMPT_TERMINATE: verify( __atomic_load_n( &__cfaabi_tls.this_processor->do_terminate, __ATOMIC_SEQ_CST ) ); 667 663 default: -
src/Concurrency/Keywords.cc
r9c5aef9 rbf8b77e 422 422 ; 423 423 else if ( auto param = isMainFor( decl, cast_target ) ) { 424 // This should never trigger. 425 assert( vtable_decl ); 424 if ( !vtable_decl ) { 425 SemanticError( decl, context_error ); 426 } 426 427 // Should be safe because of isMainFor. 427 428 StructInstType * struct_type = static_cast<StructInstType *>( -
tests/io/many_read.cfa
r9c5aef9 rbf8b77e 5 5 // file "LICENCE" distributed with Cforall. 6 6 // 7 // many_read.cfa -- Make sure that multiple concurrent reads tomess up.7 // many_read.cfa -- Make sure that multiple concurrent reads don't mess up. 8 8 // 9 9 // Author : Thierry Delisle
Note: See TracChangeset
for help on using the changeset viewer.