Changeset bf8b77e


Ignore:
Timestamp:
Mar 3, 2022, 1:37:31 PM (2 years ago)
Author:
m3zulfiq <m3zulfiq@…>
Branches:
ADT, ast-experimental, enum, master, pthread-emulation, qualifiedEnum
Children:
40a606d2, ba897d21
Parents:
9c5aef9 (diff), b0d0285 (diff)
Note: this is a merge changeset, the changes displayed below correspond to the merge itself.
Use the (diff) links above to see all the changes relative to each parent.
Message:

Merge branch 'master' of plg.uwaterloo.ca:software/cfa/cfa-cc

Files:
4 added
11 edited

Legend:

Unmodified
Added
Removed
  • benchmark/io/http/protocol.cfa

    r9c5aef9 rbf8b77e  
    173173}
    174174
    175 static void zero_sqe(struct io_uring_sqe * sqe) {
    176         sqe->flags = 0;
    177         sqe->ioprio = 0;
    178         sqe->fd = 0;
    179         sqe->off = 0;
    180         sqe->addr = 0;
    181         sqe->len = 0;
    182         sqe->fsync_flags = 0;
    183         sqe->__pad2[0] = 0;
    184         sqe->__pad2[1] = 0;
    185         sqe->__pad2[2] = 0;
    186         sqe->fd = 0;
    187         sqe->off = 0;
    188         sqe->addr = 0;
    189         sqe->len = 0;
    190 }
    191 
    192175enum FSM_STATE {
    193176        Initial,
  • doc/theses/mubeen_zulfiqar_MMath/benchmarks.tex

    r9c5aef9 rbf8b77e  
    216216\paragraph{Relevant Knobs}
    217217*** FIX ME: Insert Relevant Knobs
    218 
    219 
    220 
    221 \section{Existing Memory Allocators}
    222 With dynamic allocation being an important feature of C, there are many stand-alone memory allocators that have been designed for different purposes. For this thesis, we chose 7 of the most popular and widely used memory allocators.
    223 
    224 \paragraph{dlmalloc}
    225 dlmalloc (FIX ME: cite allocator) is a thread-safe allocator that is single threaded and single heap. dlmalloc maintains free-lists of different sizes to store freed dynamic memory. (FIX ME: cite wasik)
    226 
    227 \paragraph{hoard}
    228 Hoard (FIX ME: cite allocator) is a thread-safe allocator that is multi-threaded and using a heap layer framework. It has per-thread heaps that have thread-local free-lists, and a global shared heap. (FIX ME: cite wasik)
    229 
    230 \paragraph{jemalloc}
    231 jemalloc (FIX ME: cite allocator) is a thread-safe allocator that uses multiple arenas. Each thread is assigned an arena. Each arena has chunks that contain contagious memory regions of same size. An arena has multiple chunks that contain regions of multiple sizes.
    232 
    233 \paragraph{ptmalloc}
    234 ptmalloc (FIX ME: cite allocator) is a modification of dlmalloc. It is a thread-safe multi-threaded memory allocator that uses multiple heaps. ptmalloc heap has similar design to dlmalloc's heap.
    235 
    236 \paragraph{rpmalloc}
    237 rpmalloc (FIX ME: cite allocator) is a thread-safe allocator that is multi-threaded and uses per-thread heap. Each heap has multiple size-classes and each size-class contains memory regions of the relevant size.
    238 
    239 \paragraph{tbb malloc}
    240 tbb malloc (FIX ME: cite allocator) is a thread-safe allocator that is multi-threaded and uses private heap for each thread. Each private-heap has multiple bins of different sizes. Each bin contains free regions of the same size.
    241 
    242 \paragraph{tc malloc}
    243 tcmalloc (FIX ME: cite allocator) is a thread-safe allocator. It uses per-thread cache to store free objects that prevents contention on shared resources in multi-threaded application. A central free-list is used to refill per-thread cache when it gets empty.
  • doc/theses/mubeen_zulfiqar_MMath/performance.tex

    r9c5aef9 rbf8b77e  
    1818\noindent
    1919====================
     20
     21\section{Machine Specification}
     22
     23The performance experiments were run on three different multicore systems to determine if there is consistency across platforms:
     24\begin{itemize}
     25\item
     26AMD EPYC 7662, 64-core socket $\times$ 2, 2.0 GHz
     27\item
     28Huawei ARM TaiShan 2280 V2 Kunpeng 920, 24-core socket $\times$ 4, 2.6 GHz
     29\item
     30Intel Xeon Gold 5220R, 48-core socket $\times$ 2, 2.20GHz
     31\end{itemize}
     32
     33
     34\section{Existing Memory Allocators}
     35With dynamic allocation being an important feature of C, there are many stand-alone memory allocators that have been designed for different purposes. For this thesis, we chose 7 of the most popular and widely used memory allocators.
     36
     37\paragraph{dlmalloc}
     38dlmalloc (FIX ME: cite allocator) is a thread-safe allocator that is single threaded and single heap. dlmalloc maintains free-lists of different sizes to store freed dynamic memory. (FIX ME: cite wasik)
     39
     40\paragraph{hoard}
     41Hoard (FIX ME: cite allocator) is a thread-safe allocator that is multi-threaded and using a heap layer framework. It has per-thread heaps that have thread-local free-lists, and a global shared heap. (FIX ME: cite wasik)
     42
     43\paragraph{jemalloc}
     44jemalloc (FIX ME: cite allocator) is a thread-safe allocator that uses multiple arenas. Each thread is assigned an arena. Each arena has chunks that contain contagious memory regions of same size. An arena has multiple chunks that contain regions of multiple sizes.
     45
     46\paragraph{ptmalloc}
     47ptmalloc (FIX ME: cite allocator) is a modification of dlmalloc. It is a thread-safe multi-threaded memory allocator that uses multiple heaps. ptmalloc heap has similar design to dlmalloc's heap.
     48
     49\paragraph{rpmalloc}
     50rpmalloc (FIX ME: cite allocator) is a thread-safe allocator that is multi-threaded and uses per-thread heap. Each heap has multiple size-classes and each size-class contains memory regions of the relevant size.
     51
     52\paragraph{tbb malloc}
     53tbb malloc (FIX ME: cite allocator) is a thread-safe allocator that is multi-threaded and uses private heap for each thread. Each private-heap has multiple bins of different sizes. Each bin contains free regions of the same size.
     54
     55\paragraph{tc malloc}
     56tcmalloc (FIX ME: cite allocator) is a thread-safe allocator. It uses per-thread cache to store free objects that prevents contention on shared resources in multi-threaded application. A central free-list is used to refill per-thread cache when it gets empty.
     57
    2058
    2159\section{Memory Allocators}
  • libcfa/src/concurrency/io.cfa

    r9c5aef9 rbf8b77e  
    287287        //=============================================================================================
    288288        // submission
    289         static inline void __submit( struct $io_context * ctx, __u32 idxs[], __u32 have, bool lazy) {
     289        static inline void __submit_only( struct $io_context * ctx, __u32 idxs[], __u32 have) {
    290290                // We can proceed to the fast path
    291291                // Get the right objects
     
    306306                ctx->proc->io.pending = true;
    307307                ctx->proc->io.dirty   = true;
     308        }
     309
     310        static inline void __submit( struct $io_context * ctx, __u32 idxs[], __u32 have, bool lazy) {
     311                __sub_ring_t & sq = ctx->sq;
     312                __submit_only(ctx, idxs, have);
     313
    308314                if(sq.to_submit > 30) {
    309315                        __tls_stats()->io.flush.full++;
     
    402408// I/O Arbiter
    403409//=============================================================================================
    404         static inline void block(__outstanding_io_queue & queue, __outstanding_io & item) {
     410        static inline bool enqueue(__outstanding_io_queue & queue, __outstanding_io & item) {
     411                bool was_empty;
     412
    405413                // Lock the list, it's not thread safe
    406414                lock( queue.lock __cfaabi_dbg_ctx2 );
    407415                {
     416                        was_empty = empty(queue.queue);
     417
    408418                        // Add our request to the list
    409419                        add( queue.queue, item );
     
    414424                unlock( queue.lock );
    415425
    416                 wait( item.sem );
     426                return was_empty;
    417427        }
    418428
     
    432442                pa.want = want;
    433443
    434                 block(this.pending, (__outstanding_io&)pa);
     444                enqueue(this.pending, (__outstanding_io&)pa);
     445
     446                wait( pa.sem );
    435447
    436448                return pa.ctx;
     
    485497                ei.lazy = lazy;
    486498
    487                 block(ctx->ext_sq, (__outstanding_io&)ei);
     499                bool we = enqueue(ctx->ext_sq, (__outstanding_io&)ei);
     500
     501                ctx->proc->io.pending = true;
     502
     503                if( we ) {
     504                        sigval_t value = { PREEMPT_IO };
     505                        pthread_sigqueue(ctx->proc->kernel_thread, SIGUSR1, value);
     506                }
     507
     508                wait( ei.sem );
    488509
    489510                __cfadbg_print_safe(io, "Kernel I/O : %u submitted from arbiter\n", have);
     
    501522                                        __external_io & ei = (__external_io&)drop( ctx.ext_sq.queue );
    502523
    503                                         __submit(&ctx, ei.idxs, ei.have, ei.lazy);
     524                                        __submit_only(&ctx, ei.idxs, ei.have);
    504525
    505526                                        post( ei.sem );
  • libcfa/src/concurrency/io/setup.cfa

    r9c5aef9 rbf8b77e  
    5656
    5757        #include "bitmanip.hfa"
     58        #include "fstream.hfa"
    5859        #include "kernel_private.hfa"
    5960        #include "thread.hfa"
     
    258259                struct __sub_ring_t & sq = this.sq;
    259260                struct __cmp_ring_t & cq = this.cq;
     261                {
     262                        __u32 fhead = sq.free_ring.head;
     263                        __u32 ftail = sq.free_ring.tail;
     264
     265                        __u32 total = *sq.num;
     266                        __u32 avail = ftail - fhead;
     267
     268                        if(avail != total) abort | "Processor (" | (void*)this.proc | ") tearing down ring with" | (total - avail) | "entries allocated but not submitted, out of" | total;
     269                }
    260270
    261271                // unmap the submit queue entries
  • libcfa/src/concurrency/iofwd.hfa

    r9c5aef9 rbf8b77e  
    1919extern "C" {
    2020        #include <asm/types.h>
     21        #include <sys/stat.h> // needed for mode_t
    2122        #if CFA_HAVE_LINUX_IO_URING_H
    2223                #include <linux/io_uring.h>
     
    133134// Check if a function is blocks a only the user thread
    134135bool has_user_level_blocking( fptr_t func );
     136
     137#if CFA_HAVE_LINUX_IO_URING_H
     138        static inline void zero_sqe(struct io_uring_sqe * sqe) {
     139                sqe->flags = 0;
     140                sqe->ioprio = 0;
     141                sqe->fd = 0;
     142                sqe->off = 0;
     143                sqe->addr = 0;
     144                sqe->len = 0;
     145                sqe->fsync_flags = 0;
     146                sqe->__pad2[0] = 0;
     147                sqe->__pad2[1] = 0;
     148                sqe->__pad2[2] = 0;
     149                sqe->fd = 0;
     150                sqe->off = 0;
     151                sqe->addr = 0;
     152                sqe->len = 0;
     153        }
     154#endif
  • libcfa/src/concurrency/kernel/fwd.hfa

    r9c5aef9 rbf8b77e  
    347347                                        struct oneshot * want = expected == 0p ? 1p : 2p;
    348348                                        if(__atomic_compare_exchange_n(&this.ptr, &expected, want, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) {
    349                                                 if( expected == 0p ) { /* paranoid */ verify( this.ptr == 1p); return 0p; }
     349                                                if( expected == 0p ) { return 0p; }
    350350                                                thread$ * ret = post( *expected, do_unpark );
    351351                                                __atomic_store_n( &this.ptr, 1p, __ATOMIC_SEQ_CST);
  • libcfa/src/concurrency/kernel_private.hfa

    r9c5aef9 rbf8b77e  
    6060extern bool __preemption_enabled();
    6161
     62enum {
     63        PREEMPT_NORMAL    = 0,
     64        PREEMPT_TERMINATE = 1,
     65        PREEMPT_IO = 2,
     66};
     67
    6268static inline void __disable_interrupts_checked() {
    6369        /* paranoid */ verify( __preemption_enabled() );
  • libcfa/src/concurrency/preemption.cfa

    r9c5aef9 rbf8b77e  
    9696        lock{};
    9797}
    98 
    99 enum {
    100         PREEMPT_NORMAL    = 0,
    101         PREEMPT_TERMINATE = 1,
    102 };
    10398
    10499//=============================================================================================
     
    664659        choose(sfp->si_value.sival_int) {
    665660                case PREEMPT_NORMAL   : ;// Normal case, nothing to do here
     661                case PREEMPT_IO       : ;// I/O asked to stop spinning, nothing to do here
    666662                case PREEMPT_TERMINATE: verify( __atomic_load_n( &__cfaabi_tls.this_processor->do_terminate, __ATOMIC_SEQ_CST ) );
    667663                default:
  • src/Concurrency/Keywords.cc

    r9c5aef9 rbf8b77e  
    422422                        ;
    423423                else if ( auto param = isMainFor( decl, cast_target ) ) {
    424                         // This should never trigger.
    425                         assert( vtable_decl );
     424                        if ( !vtable_decl ) {
     425                                SemanticError( decl, context_error );
     426                        }
    426427                        // Should be safe because of isMainFor.
    427428                        StructInstType * struct_type = static_cast<StructInstType *>(
  • tests/io/many_read.cfa

    r9c5aef9 rbf8b77e  
    55// file "LICENCE" distributed with Cforall.
    66//
    7 // many_read.cfa -- Make sure that multiple concurrent reads to mess up.
     7// many_read.cfa -- Make sure that multiple concurrent reads don't mess up.
    88//
    99// Author           : Thierry Delisle
Note: See TracChangeset for help on using the changeset viewer.