Context Navigation

Reverse Diff

Changes in / [6091b88a:d45ed83]

Files:

: 1 added
: 11 edited

benchmark/io/readv.cfa (modified) (6 diffs)
examples/io/simple/server.cfa (modified) (5 diffs)
libcfa/prelude/defines.hfa.in (modified) (1 diff)
libcfa/src/bits/locks.hfa (modified) (2 diffs)
libcfa/src/bits/signal.hfa (modified) (1 diff)
libcfa/src/concurrency/io.cfa (modified) (12 diffs)
libcfa/src/concurrency/iofwd.hfa (added)
libcfa/src/concurrency/kernel.cfa (modified) (23 diffs)
libcfa/src/concurrency/kernel.hfa (modified) (7 diffs)
libcfa/src/concurrency/kernel_private.hfa (modified) (1 diff)
libcfa/src/concurrency/preemption.cfa (modified) (4 diffs)
tools/gdb/utils-gdb.py (modified) (3 diffs)

Legend:

: Unmodified
: Added
: Removed

benchmark/io/readv.cfa

-              r6091b88a
+              rd45ed83
 #include <time.hfa>
+#if !defined(HAVE_LINUX_IO_URING_H)
+#warning no io uring
+#endif
 extern bool traceHeapOn();
 extern ssize_t async_preadv2(int fd, const struct iovec *iov, int iovcnt, off_t offset, int flags);
+extern ssize_t cfa_preadv2(int fd, const struct iovec *iov, int iovcnt, off_t offset, int flags);
 int fd;
 …
 unsigned long int buflen = 50;
+cluster * the_cluster;
 thread Reader {};
+void ?{}( Reader & this ) {
+        ((thread&)this){ "Reader Thread", *the_cluster };
+}
+struct my_processor {
+        processor p;
+};
+void ?{}( my_processor & this ) {
+        (this.p){ "I/O Processor", *the_cluster };
+}
 void main( Reader & ) {
         while(!__atomic_load_n(&run, __ATOMIC_RELAXED)) yield();
 …
         while(__atomic_load_n(&run, __ATOMIC_RELAXED)) {
+                async_preadv2(fd, &iov, 1, 0, 0);
+                int r = cfa_preadv2(fd, &iov, 1, 0, 0);
+                if(r < 0) abort(strerror(-r));
                 __atomic_fetch_add( &count, 1, __ATOMIC_SEQ_CST );
+        }
 …
         printf("Setting local\n");
         setlocale(LC_NUMERIC, "");
         arg_loop:
 …
+        }
         int fd = open(__FILE__, 0);
+        fd = open(__FILE__, 0);
         if(fd < 0) {
                 fprintf(stderr, "Could not open source file\n");
 …
         printf("Running %lu threads over %lu processors for %lf seconds\n", nthreads, nprocs, duration);
-        Time start, end;
+        {
+                processor procs[nprocs - 1];
+                Time start, end;
+                cluster cl = { "IO Cluster" };
+                the_cluster = &cl;
+                #if !defined(__CFA_NO_STATISTICS__)
+                        print_stats_at_exit( cl );
+                #endif
+                {
+                        Reader threads[nthreads];
+                        my_processor procs[nprocs];
+                        {
+                                Reader threads[nthreads];
+                        printf("Starting\n");
+                        start = getTime();
+                        run = true;
+                        do {
+                                sleep(500`ms);
+                                printf("Starting\n");
+                                start = getTime();
+                                run = true;
+                                do {
+                                        sleep(500`ms);
+                                        end = getTime();
+                                } while( (end - start) < duration`s );
+                                run = false;
                                 end = getTime();
+                        } while( (end - start) < duration`s );
+                        run = false;
+                        end = getTime();
+                                printf("Done\n");
+                        }
+                }
+                printf("Took %ld ms\n", (end - start)`ms);
+                printf("Total reads:      %'zu\n", count);
+                printf("Reads per second: %'lf\n", ((double)count) / (end - start)`s);
+        }
-        printf("Took %ld ms\n", (end - start)`ms);
-        printf("Total reads:      %'zu\n", count);
-        printf("Reads per second: %'lf\n", ((double)count) / (end - start)`s);
         close(fd);
-        printf("Done\n");
+}

examples/io/simple/server.cfa

-              r6091b88a
+              rd45ed83
 //----------
 extern ssize_t async_recvmsg(int sockfd, struct msghdr *msg, int flags);
 extern int async_accept4(int sockfd, struct sockaddr *addr, socklen_t *addrlen, int flags);
 extern int async_close(int fd);
+extern ssize_t cfa_recvmsg(int sockfd, struct msghdr *msg, int flags);
+extern int cfa_accept4(int sockfd, struct sockaddr *addr, socklen_t *addrlen, int flags);
+extern int cfa_close(int fd);
 //----------
 …
         struct sockaddr_in cli_addr;
         __socklen_t clilen = sizeof(cli_addr);
         int newsock = async_accept4(sock, (struct sockaddr *) &cli_addr, &clilen, 0);
+        int newsock = cfa_accept4(sock, (struct sockaddr *) &cli_addr, &clilen, 0);
         if (newsock < 0) {
                 error( printer, "accept", -newsock);
 …
         while(1) {
                 int res = async_recvmsg(newsock, &msg, 0);
+                int res = cfa_recvmsg(newsock, &msg, 0);
                 if(res == 0) break;
                 if(res < 0) {
 …
+        }
         ret = async_close(newsock);
+        ret = cfa_close(newsock);
       if(ret < 0) {
             error( printer, "close new", -ret);
 …
+      }
         ret = async_close(sock);
+        ret = cfa_close(sock);
       if(ret < 0) {
             error( printer, "close old", -ret);

libcfa/prelude/defines.hfa.in

-              r6091b88a
+              rd45ed83
+//
+// Cforall Version 1.0.0 Copyright (C) 2016 University of Waterloo
+//
+// The contents of this file are covered under the licence agreement in the
+// file "LICENCE" distributed with Cforall.
+//
+// defines.hfa.in --
+//
+// Author           : Thierry Delisle
+// Created On       : Thu Apr 30 15:23:00 2020
+// Last Modified By :
+// Last Modified On :
+// Update Count     :
+//
 #undef HAVE_LINUX_IO_URING_H
+// #define __CFA_IO_POLLING_USER__
+// #define __CFA_IO_POLLING_KERNEL__

libcfa/src/bits/locks.hfa

-              r6091b88a
+              rd45ed83
         struct __bin_sem_t {
-                bool                    signaled;
                 pthread_mutex_t         lock;
                 pthread_cond_t          cond;
+                int                     val;
         };
         static inline void ?{}(__bin_sem_t & this) with( this ) {
-                signaled = false;
                 pthread_mutex_init(&lock, NULL);
                 pthread_cond_init (&cond, NULL);
+                val = 0;
+        }
 …
                 verify(__cfaabi_dbg_in_kernel());
                 pthread_mutex_lock(&lock);
                         if(!signaled) {   // this must be a loop, not if!
+                        while(val < 1) {
                                 pthread_cond_wait(&cond, &lock);
+                        }
                         signaled = false;
+                        val -= 1;
                 pthread_mutex_unlock(&lock);
+        }
         static inline bool post(__bin_sem_t & this) with( this ) {
+                bool needs_signal = false;
                 pthread_mutex_lock(&lock);
+                        bool needs_signal = !signaled;
+                        signaled = true;
+                        if(val < 1) {
+                                val += 1;
+                                pthread_cond_signal(&cond);
+                                needs_signal = true;
+                        }
                 pthread_mutex_unlock(&lock);
-                if (needs_signal) pthread_cond_signal(&cond);
                 return needs_signal;

libcfa/src/bits/signal.hfa

r6091b88a	rd45ed83
54	54	sig, handler, flags, errno, strerror( errno )
55	55	);
56		_exit( EXIT_FAILURE );
	56	_Exit( EXIT_FAILURE );
57	57	} // if
58	58	}

libcfa/src/concurrency/io.cfa

-              r6091b88a
+              rd45ed83
+//
+// Cforall Version 1.0.0 Copyright (C) 2020 University of Waterloo
+//
+// The contents of this file are covered under the licence agreement in the
+// file "LICENCE" distributed with Cforall.
+//
+// io.cfa --
+//
+// Author           : Thierry Delisle
+// Created On       : Thu Apr 23 17:31:00 2020
+// Last Modified By :
+// Last Modified On :
+// Update Count     :
+//
+// #define __CFA_DEBUG_PRINT_IO__
 #include "kernel.hfa"
 #if !defined(HAVE_LINUX_IO_URING_H)
         void __kernel_io_startup( cluster & this ) {
+        void __kernel_io_startup( cluster & ) {
                 // Nothing to do without io_uring
+        }
         void __kernel_io_shutdown( cluster & this ) {
+        void __kernel_io_start_thrd( cluster & ) {
                 // Nothing to do without io_uring
+        }
+        bool is_async( void (*)() ) {
+                return false;
+        void __kernel_io_stop_thrd ( cluster & ) {
+                // Nothing to do without io_uring
+        }
+        void __kernel_io_shutdown( cluster & ) {
+                // Nothing to do without io_uring
+        }
 …
+        }
+        static void * __io_poller( void * arg );
+       // Weirdly, some systems that do support io_uring don't actually define these
+       #ifdef __alpha__
+       /*
+       * alpha is the only exception, all other architectures
+       * have common numbers for new system calls.
+       */
+       # ifndef __NR_io_uring_setup
+       #  define __NR_io_uring_setup           535
+       # endif
+       # ifndef __NR_io_uring_enter
+       #  define __NR_io_uring_enter           536
+       # endif
+       # ifndef __NR_io_uring_register
+       #  define __NR_io_uring_register        537
+       # endif
+       #else /* !__alpha__ */
+       # ifndef __NR_io_uring_setup
+       #  define __NR_io_uring_setup           425
+       # endif
+       # ifndef __NR_io_uring_enter
+       #  define __NR_io_uring_enter           426
+       # endif
+       # ifndef __NR_io_uring_register
+       #  define __NR_io_uring_register        427
+       # endif
+       #endif
+        static void * __io_poller_slow( void * arg );
+        // Weirdly, some systems that do support io_uring don't actually define these
+        #ifdef __alpha__
+                /*
+                * alpha is the only exception, all other architectures
+                * have common numbers for new system calls.
+                */
+                #ifndef __NR_io_uring_setup
+                        #define __NR_io_uring_setup           535
+                #endif
+                #ifndef __NR_io_uring_enter
+                        #define __NR_io_uring_enter           536
+                #endif
+                #ifndef __NR_io_uring_register
+                        #define __NR_io_uring_register        537
+                #endif
+        #else /* !__alpha__ */
+                #ifndef __NR_io_uring_setup
+                        #define __NR_io_uring_setup           425
+                #endif
+                #ifndef __NR_io_uring_enter
+                        #define __NR_io_uring_enter           426
+                #endif
+                #ifndef __NR_io_uring_register
+                        #define __NR_io_uring_register        427
+                #endif
+        #endif
+        #if defined(__CFA_IO_POLLING_USER__)
+                void ?{}( __io_poller_fast & this, struct cluster & cltr ) {
+                        this.ring = &cltr.io;
+                        (this.thrd){ "I/O Poller", cltr };
+                }
+                void ^?{}( __io_poller_fast & mutex this );
+        void main( __io_poller_fast & this );
+        static inline $thread * get_thread( __io_poller_fast & this ) { return &this.thrd; }
+                void ^?{}( __io_poller_fast & mutex this ) {}
+        #endif
 //=============================================================================================
 // I/O Startup / Shutdown logic
 //=============================================================================================
         void __kernel_io_startup( cluster & this ) {
+        void __kernel_io_startup( cluster & this, bool main_cluster ) {
                 // Step 1 : call to setup
                 struct io_uring_params params;
 …
                 // Requires features
+                // // adjust the size according to the parameters
+                // if ((params.features & IORING_FEAT_SINGLE_MMAP) != 0) {
+                //      cq->ring_sz = sq->ring_sz = max(cq->ring_sz, sq->ring_sz);
+                // }
+                #if defined(IORING_FEAT_SINGLE_MMAP)
+                        // adjust the size according to the parameters
+                        if ((params.features & IORING_FEAT_SINGLE_MMAP) != 0) {
+                                cq->ring_sz = sq->ring_sz = max(cq->ring_sz, sq->ring_sz);
+                        }
+                #endif
                 // mmap the Submit Queue into existence
 …
+                }
-                // mmap the Completion Queue into existence (may or may not be needed)
                 // Requires features
+                // if ((params.features & IORING_FEAT_SINGLE_MMAP) != 0) {
+                //      cq->ring_ptr = sq->ring_ptr;
+                // }
+                // else {
+                #if defined(IORING_FEAT_SINGLE_MMAP)
+                        // mmap the Completion Queue into existence (may or may not be needed)
+                        if ((params.features & IORING_FEAT_SINGLE_MMAP) != 0) {
+                                cq->ring_ptr = sq->ring_ptr;
+                        }
+                        else
+                #endif
+                {
                         // We need multiple call to MMAP
                         cq.ring_ptr = mmap(0, cq.ring_sz, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd, IORING_OFF_CQ_RING);
 …
                                 abort("KERNEL ERROR: IO_URING MMAP2 - %s\n", strerror(errno));
+                        }
                 // }
+                }
                 // mmap the submit queue entries
 …
                 sq.array   = (         uint32_t *)(((intptr_t)sq.ring_ptr) + params.sq_off.array);
                 sq.alloc = *sq.tail;
+                sq.ready = *sq.tail;
                 // completion queue
 …
                 (this.io.submit){ min(*sq.num, *cq.num) };
+                // Initialize statistics
+                #if !defined(__CFA_NO_STATISTICS__)
+                        this.io.submit_q.stats.submit_avg.val = 0;
+                        this.io.submit_q.stats.submit_avg.cnt = 0;
+                        this.io.completion_q.stats.completed_avg.val = 0;
+                        this.io.completion_q.stats.completed_avg.cnt = 0;
+                #endif
+                if(!main_cluster) {
+                        __kernel_io_finish_start( this );
+                }
+        }
+        void __kernel_io_finish_start( cluster & this ) {
+                #if defined(__CFA_IO_POLLING_USER__)
+                        __cfadbg_print_safe(io, "Kernel I/O : Creating fast poller for cluter %p\n", &this);
+                        (this.io.poller.fast){ "Fast IO Poller", this };
+                        __thrd_start( this.io.poller.fast, main );
+                #endif
                 // Create the poller thread
+                this.io.stack = __create_pthread( &this.io.poller, __io_poller, &this );
+        }
+        void __kernel_io_shutdown( cluster & this ) {
+                // Stop the IO Poller
+                __cfadbg_print_safe(io, "Kernel I/O : Creating slow poller for cluter %p\n", &this);
+                this.io.poller.slow.stack = __create_pthread( &this.io.poller.slow.kthrd, __io_poller_slow, &this );
+        }
+        void __kernel_io_prepare_stop( cluster & this ) {
+                __cfadbg_print_safe(io, "Kernel I/O : Stopping pollers for cluster\n", &this);
                 // Notify the poller thread of the shutdown
                 __atomic_store_n(&this.io.done, true, __ATOMIC_SEQ_CST);
+                // Stop the IO Poller
                 sigval val = { 1 };
+                pthread_sigqueue( this.io.poller, SIGUSR1, val );
+                pthread_sigqueue( this.io.poller.slow.kthrd, SIGUSR1, val );
+                #if defined(__CFA_IO_POLLING_USER__)
+                        post( this.io.poller.sem );
+                #endif
                 // Wait for the poller thread to finish
+                pthread_join( this.io.poller, 0p );
+                free( this.io.stack );
+                pthread_join( this.io.poller.slow.kthrd, 0p );
+                free( this.io.poller.slow.stack );
+                __cfadbg_print_safe(io, "Kernel I/O : Slow poller stopped for cluster\n", &this);
+                #if defined(__CFA_IO_POLLING_USER__)
+                        // unpark the fast io_poller
+                        unpark( &this.io.poller.fast.thrd __cfaabi_dbg_ctx2 );
+                        ^(this.io.poller.fast){};
+                        __cfadbg_print_safe(io, "Kernel I/O : Fast poller stopped for cluster\n", &this);
+                #endif
+        }
+        void __kernel_io_shutdown( cluster & this, bool main_cluster ) {
+                if(!main_cluster) {
+                        __kernel_io_prepare_stop( this );
+                }
+                // print statistics
+                #if !defined(__CFA_NO_STATISTICS__)
+                        if(this.print_stats) {
+                                __cfaabi_bits_print_safe( STDERR_FILENO,
+                                        "----- I/O uRing Stats -----\n"
+                                        "- total submit calls  : %llu\n"
+                                        "- avg submit          : %lf\n"
+                                        "- total wait calls    : %llu\n"
+                                        "- avg completion/wait : %lf\n",
+                                        this.io.submit_q.stats.submit_avg.cnt,
+                                        ((double)this.io.submit_q.stats.submit_avg.val) / this.io.submit_q.stats.submit_avg.cnt,
+                                        this.io.completion_q.stats.completed_avg.cnt,
+                                        ((double)this.io.completion_q.stats.completed_avg.val) / this.io.completion_q.stats.completed_avg.cnt
+                                );
+                        }
+                #endif
                 // Shutdown the io rings
 …
         // Process a single completion message from the io_uring
         // This is NOT thread-safe
+        static bool __io_process(struct io_ring & ring) {
+        static int __drain_io( struct io_ring & ring, sigset_t * mask, int waitcnt, bool in_kernel ) {
+                int ret = syscall( __NR_io_uring_enter, ring.fd, 0, waitcnt, IORING_ENTER_GETEVENTS, mask, _NSIG / 8);
+                if( ret < 0 ) {
+                        switch((int)errno) {
+                        case EAGAIN:
+                        case EINTR:
+                                return -EAGAIN;
+                        default:
+                                abort( "KERNEL ERROR: IO_URING WAIT - %s\n", strerror(errno) );
+                        }
+                }
+                // Drain the queue
                 unsigned head = *ring.completion_q.head;
                 unsigned tail = __atomic_load_n(ring.completion_q.tail, __ATOMIC_ACQUIRE);
+                if (head == tail) return false;
+                unsigned idx = head & (*ring.completion_q.mask);
+                struct io_uring_cqe & cqe = ring.completion_q.cqes[idx];
+                /* paranoid */ verify(&cqe);
+                struct io_user_data * data = (struct io_user_data *)cqe.user_data;
+                // __cfaabi_bits_print_safe( STDERR_FILENO, "Performed reading io cqe %p, result %d for %p\n", data, cqe.res, data->thrd );
+                data->result = cqe.res;
+                __unpark( data->thrd __cfaabi_dbg_ctx2 );
+                // Nothing was new return 0
+                if (head == tail) {
+                        #if !defined(__CFA_NO_STATISTICS__)
+                                ring.completion_q.stats.completed_avg.cnt += 1;
+                        #endif
+                        return 0;
+                }
+                uint32_t count = tail - head;
+                for(i; count) {
+                        unsigned idx = (head + i) & (*ring.completion_q.mask);
+                        struct io_uring_cqe & cqe = ring.completion_q.cqes[idx];
+                        /* paranoid */ verify(&cqe);
+                        struct io_user_data * data = (struct io_user_data *)cqe.user_data;
+                        __cfadbg_print_safe( io, "Kernel I/O : Performed reading io cqe %p, result %d for %p\n", data, cqe.res, data->thrd );
+                        data->result = cqe.res;
+                        if(!in_kernel) { unpark( data->thrd __cfaabi_dbg_ctx2 ); }
+                        else         { __unpark( data->thrd __cfaabi_dbg_ctx2 ); }
+                }
                 // Allow new submissions to happen
                 V(ring.submit);
+                V(ring.submit, count);
                 // Mark to the kernel that the cqe has been seen
                 // Ensure that the kernel only sees the new value of the head index after the CQEs have been read.
+                __atomic_fetch_add( ring.completion_q.head, 1, __ATOMIC_RELAXED );
+                return true;
+        }
+        static void * __io_poller( void * arg ) {
+                __atomic_fetch_add( ring.completion_q.head, count, __ATOMIC_RELAXED );
+                // Update statistics
+                #if !defined(__CFA_NO_STATISTICS__)
+                        ring.completion_q.stats.completed_avg.val += count;
+                        ring.completion_q.stats.completed_avg.cnt += 1;
+                #endif
+                return count;
+        }
+        static void * __io_poller_slow( void * arg ) {
                 cluster * cltr = (cluster *)arg;
                 struct io_ring & ring = cltr->io;
 …
                 verify( (*ring.completion_q.head) == (*ring.completion_q.tail) );
+                LOOP: while(!__atomic_load_n(&ring.done, __ATOMIC_SEQ_CST)) {
+                        int ret = syscall( __NR_io_uring_enter, ring.fd, 0, 1, IORING_ENTER_GETEVENTS, &mask, _NSIG / 8);
+                        if( ret < 0 ) {
+                                switch((int)errno) {
+                                case EAGAIN:
+                                case EINTR:
+                                        continue LOOP;
+                                default:
+                                        abort( "KERNEL ERROR: IO_URING WAIT - %s\n", strerror(errno) );
+                while(!__atomic_load_n(&ring.done, __ATOMIC_SEQ_CST)) {
+                        #if defined(__CFA_IO_POLLING_USER__)
+                                // In the user-thread approach drain and if anything was drained,
+                                // batton pass to the user-thread
+                                int count = __drain_io( ring, &mask, 1, true );
+                                if(count > 0) {
+                                        __cfadbg_print_safe(io, "Kernel I/O : Moving to ring %p to fast poller\n", &ring);
+                                        __unpark( &ring.poller.fast.thrd __cfaabi_dbg_ctx2 );
+                                        wait( ring.poller.sem );
+                                }
+                        #else
+                                //In the naive approach, just poll the io completion queue directly
+                                __drain_io( ring, &mask, 1, true );
+                        #endif
+                }
+                return 0p;
+        }
+        #if defined(__CFA_IO_POLLING_USER__)
+                void main( __io_poller_fast & this ) {
+                        // Start parked
+                        park( __cfaabi_dbg_ctx );
+                        // Then loop until we need to start
+                        while(!__atomic_load_n(&this.ring->done, __ATOMIC_SEQ_CST)) {
+                                // Drain the io
+                                if(0 > __drain_io( *this.ring, 0p, 0, false )) {
+                                        // If we got something, just yield and check again
+                                        yield();
+                                }
+                                else {
+                                        // We didn't get anything baton pass to the slow poller
+                                        __cfadbg_print_safe(io, "Kernel I/O : Moving to ring %p to slow poller\n", &this.ring);
+                                        post( this.ring->poller.sem );
+                                        park( __cfaabi_dbg_ctx );
+                                }
+                        }
+                        // Drain the queue
+                        while(__io_process(ring)) {}
+                }
+                return 0p;
+        }
+                }
+        #endif
 //=============================================================================================
 …
 //
+static inline [* struct io_uring_sqe, uint32_t] __submit_alloc( struct io_ring & ring ) {
+        // Wait for a spot to be available
+        P(ring.submit);
+        // Allocate the sqe
+        uint32_t idx = __atomic_fetch_add(&ring.submit_q.alloc, 1ul32, __ATOMIC_SEQ_CST);
+        // Validate that we didn't overflow anything
+        // Check that nothing overflowed
+        /* paranoid */ verify( true );
+        // Check that it goes head -> tail -> alloc and never head -> alloc -> tail
+        /* paranoid */ verify( true );
+        // Return the sqe
+        return [&ring.submit_q.sqes[ idx & (*ring.submit_q.mask)], idx];
+}
+static inline void __submit( struct io_ring & ring, uint32_t idx ) {
+        // get mutual exclusion
+        lock(ring.submit_q.lock __cfaabi_dbg_ctx2);
+        // Append to the list of ready entries
+        uint32_t * tail = ring.submit_q.tail;
+        const uint32_t mask = *ring.submit_q.mask;
+        ring.submit_q.array[ (*tail) & mask ] = idx & mask;
+        __atomic_fetch_add(tail, 1ul32, __ATOMIC_SEQ_CST);
+        // Submit however, many entries need to be submitted
+        int ret = syscall( __NR_io_uring_enter, ring.fd, 1, 0, 0, 0p, 0);
+        // __cfaabi_bits_print_safe( STDERR_FILENO, "Performed io_submit, returned %d\n", ret );
+        if( ret < 0 ) {
+                switch((int)errno) {
+                default:
+                        abort( "KERNEL ERROR: IO_URING SUBMIT - %s\n", strerror(errno) );
+                }
+        }
+        unlock(ring.submit_q.lock);
+        // Make sure that idx was submitted
+        // Be careful to not get false positive if we cycled the entire list or that someone else submitted for us
+}
+static inline void ?{}(struct io_uring_sqe & this, uint8_t opcode, int fd) {
+        this.opcode = opcode;
+        #if !defined(IOSQE_ASYNC)
+                this.flags = 0;
+        #else
+                this.flags = IOSQE_ASYNC;
+        #endif
+        this.ioprio = 0;
+        this.fd = fd;
+        this.off = 0;
+        this.addr = 0;
+        this.len = 0;
+        this.rw_flags = 0;
+        this.__pad2[0] = this.__pad2[1] = this.__pad2[2] = 0;
+}
+static inline void ?{}(struct io_uring_sqe & this, uint8_t opcode, int fd, void * addr, uint32_t len, uint64_t off ) {
+        (this){ opcode, fd };
+        this.off = off;
+        this.addr = (uint64_t)addr;
+        this.len = len;
+}
+        static inline [* struct io_uring_sqe, uint32_t] __submit_alloc( struct io_ring & ring ) {
+                // Wait for a spot to be available
+                P(ring.submit);
+                // Allocate the sqe
+                uint32_t idx = __atomic_fetch_add(&ring.submit_q.alloc, 1ul32, __ATOMIC_SEQ_CST);
+                // Validate that we didn't overflow anything
+                // Check that nothing overflowed
+                /* paranoid */ verify( true );
+                // Check that it goes head -> tail -> alloc and never head -> alloc -> tail
+                /* paranoid */ verify( true );
+                // Return the sqe
+                return [&ring.submit_q.sqes[ idx & (*ring.submit_q.mask)], idx];
+        }
+        static inline void __submit( struct io_ring & ring, uint32_t idx ) {
+                // get mutual exclusion
+                lock(ring.submit_q.lock __cfaabi_dbg_ctx2);
+                // Append to the list of ready entries
+                uint32_t * tail = ring.submit_q.tail;
+                const uint32_t mask = *ring.submit_q.mask;
+                ring.submit_q.array[ (*tail) & mask ] = idx & mask;
+                __atomic_fetch_add(tail, 1ul32, __ATOMIC_SEQ_CST);
+                // Submit however, many entries need to be submitted
+                int ret = syscall( __NR_io_uring_enter, ring.fd, 1, 0, 0, 0p, 0);
+                if( ret < 0 ) {
+                        switch((int)errno) {
+                        default:
+                                abort( "KERNEL ERROR: IO_URING SUBMIT - %s\n", strerror(errno) );
+                        }
+                }
+                // update statistics
+                #if !defined(__CFA_NO_STATISTICS__)
+                        ring.submit_q.stats.submit_avg.val += 1;
+                        ring.submit_q.stats.submit_avg.cnt += 1;
+                #endif
+                unlock(ring.submit_q.lock);
+                // Make sure that idx was submitted
+                // Be careful to not get false positive if we cycled the entire list or that someone else submitted for us
+                __cfadbg_print_safe( io, "Kernel I/O : Performed io_submit for %p, returned %d\n", active_thread(), ret );
+        }
+        static inline void ?{}(struct io_uring_sqe & this, uint8_t opcode, int fd) {
+                this.opcode = opcode;
+                #if !defined(IOSQE_ASYNC)
+                        this.flags = 0;
+                #else
+                        this.flags = IOSQE_ASYNC;
+                #endif
+                this.ioprio = 0;
+                this.fd = fd;
+                this.off = 0;
+                this.addr = 0;
+                this.len = 0;
+                this.rw_flags = 0;
+                this.__pad2[0] = this.__pad2[1] = this.__pad2[2] = 0;
+        }
+        static inline void ?{}(struct io_uring_sqe & this, uint8_t opcode, int fd, void * addr, uint32_t len, uint64_t off ) {
+                (this){ opcode, fd };
+                this.off = off;
+                this.addr = (uint64_t)addr;
+                this.len = len;
+        }
 //=============================================================================================
 // I/O Interface
 //=============================================================================================
-        extern "C" {
-                #define __USE_GNU
-                #define _GNU_SOURCE
-                #include <fcntl.h>
-                #include <sys/uio.h>
-                #include <sys/socket.h>
-                #include <sys/stat.h>
+        }
         #define __submit_prelude \
 …
                 park( __cfaabi_dbg_ctx ); \
                 return data.result;
+#endif
+// Some forward declarations
+extern "C" {
+        #include <sys/types.h>
+        struct iovec;
+        extern ssize_t preadv2 (int fd, const struct iovec *iov, int iovcnt, off_t offset, int flags);
+        extern ssize_t pwritev2(int fd, const struct iovec *iov, int iovcnt, off_t offset, int flags);
+        extern int fsync(int fd);
+        extern int sync_file_range(int fd, int64_t offset, int64_t nbytes, unsigned int flags);
+        struct msghdr;
+        struct sockaddr;
+        extern ssize_t sendmsg(int sockfd, const struct msghdr *msg, int flags);
+        extern ssize_t recvmsg(int sockfd, struct msghdr *msg, int flags);
+        extern ssize_t send(int sockfd, const void *buf, size_t len, int flags);
+        extern ssize_t recv(int sockfd, void *buf, size_t len, int flags);
+        extern int accept4(int sockfd, struct sockaddr *addr, socklen_t *addrlen, int flags);
+        extern int connect(int sockfd, const struct sockaddr *addr, socklen_t addrlen);
+        extern int fallocate(int fd, int mode, uint64_t offset, uint64_t len);
+        extern int posix_fadvise(int fd, uint64_t offset, uint64_t len, int advice);
+        extern int madvise(void *addr, size_t length, int advice);
+        extern int openat(int dirfd, const char *pathname, int flags, mode_t mode);
+        extern int close(int fd);
+        struct statx;
+        extern int statx(int dirfd, const char *pathname, int flags, unsigned int mask, struct statx *statxbuf);
+        extern ssize_t read (int fd, void *buf, size_t count);
+}
 //-----------------------------------------------------------------------------
 // Asynchronous operations
+        ssize_t async_preadv2(int fd, const struct iovec *iov, int iovcnt, off_t offset, int flags) {
+                #if !defined(IORING_OP_READV)
+                        return preadv2(fd, iov, iovcnt, offset, flags);
+                #else
+                        __submit_prelude
+                        (*sqe){ IORING_OP_READV, fd, iov, iovcnt, offset };
+                        __submit_wait
+                #endif
+        }
+        ssize_t async_pwritev2(int fd, const struct iovec *iov, int iovcnt, off_t offset, int flags) {
+                #if !defined(IORING_OP_WRITEV)
+                        return pwritev2(fd, iov, iovcnt, offset, flags);
+                #else
+                        __submit_prelude
+                        (*sqe){ IORING_OP_WRITEV, fd, iov, iovcnt, offset };
+                        __submit_wait
+                #endif
+        }
+        int async_fsync(int fd) {
+                #if !defined(IORING_OP_FSYNC)
+                        return fsync(fd);
+                #else
+                        __submit_prelude
+                        (*sqe){ IORING_OP_FSYNC, fd };
+                        __submit_wait
+                #endif
+        }
+        int async_sync_file_range(int fd, int64_t offset, int64_t nbytes, unsigned int flags) {
+                #if !defined(IORING_OP_SYNC_FILE_RANGE)
+                        return sync_file_range(fd, offset, nbytes, flags);
+                #else
+                        __submit_prelude
+                        (*sqe){ IORING_OP_SYNC_FILE_RANGE, fd };
+                        sqe->off = offset;
+                        sqe->len = nbytes;
+                        sqe->sync_range_flags = flags;
+                        __submit_wait
+                #endif
+        }
+        ssize_t async_sendmsg(int sockfd, const struct msghdr *msg, int flags) {
+                #if !defined(IORING_OP_SENDMSG)
+                        return recv(sockfd, msg, flags);
+                #else
+                        __submit_prelude
+                        (*sqe){ IORING_OP_SENDMSG, sockfd, msg, 1, 0 };
+                        sqe->msg_flags = flags;
+                        __submit_wait
+                #endif
+        }
+        ssize_t async_recvmsg(int sockfd, struct msghdr *msg, int flags) {
+                #if !defined(IORING_OP_RECVMSG)
+                        return recv(sockfd, msg, flags);
+                #else
+                        __submit_prelude
+                        (*sqe){ IORING_OP_RECVMSG, sockfd, msg, 1, 0 };
+                        sqe->msg_flags = flags;
+                        __submit_wait
+                #endif
+        }
+        ssize_t async_send(int sockfd, const void *buf, size_t len, int flags) {
+                #if !defined(IORING_OP_SEND)
+                        return send( sockfd, buf, len, flags );
+                #else
+                        __submit_prelude
+                        (*sqe){ IORING_OP_SEND, sockfd };
+                        sqe->addr = (uint64_t)buf;
+                        sqe->len = len;
+                        sqe->msg_flags = flags;
+                        __submit_wait
+                #endif
+        }
+        ssize_t async_recv(int sockfd, void *buf, size_t len, int flags) {
+                #if !defined(IORING_OP_RECV)
+                        return recv( sockfd, buf, len, flags );
+                #else
+                        __submit_prelude
+                        (*sqe){ IORING_OP_RECV, sockfd };
+                        sqe->addr = (uint64_t)buf;
+                        sqe->len = len;
+                        sqe->msg_flags = flags;
+                        __submit_wait
+                #endif
+        }
+        int async_accept4(int sockfd, struct sockaddr *addr, socklen_t *addrlen, int flags) {
+                #if !defined(IORING_OP_ACCEPT)
+                        __SOCKADDR_ARG _addr;
+                        _addr.__sockaddr__ = addr;
+                        return accept4( sockfd, _addr, addrlen, flags );
+                #else
+                        __submit_prelude
+                        (*sqe){ IORING_OP_ACCEPT, sockfd };
+                        sqe->addr = addr;
+                        sqe->addr2 = addrlen;
+                        sqe->accept_flags = flags;
+                        __submit_wait
+                #endif
+        }
+        int async_connect(int sockfd, const struct sockaddr *addr, socklen_t addrlen) {
+                #if !defined(IORING_OP_CONNECT)
+                        __CONST_SOCKADDR_ARG _addr;
+                        _addr.__sockaddr__ = addr;
+                        return connect( sockfd, _addr, addrlen );
+                #else
+                        __submit_prelude
+                        (*sqe){ IORING_OP_CONNECT, sockfd };
+                        sqe->addr = (uint64_t)addr;
+                        sqe->off = addrlen;
+                        __submit_wait
+                #endif
+        }
+        int async_fallocate(int fd, int mode, uint64_t offset, uint64_t len) {
+                #if !defined(IORING_OP_FALLOCATE)
+                        return fallocate( fd, mode, offset, len );
+                #else
+                        __submit_prelude
+                        (*sqe){ IORING_OP_FALLOCATE, fd };
+                        sqe->off = offset;
+                        sqe->len = length;
+                        sqe->mode = mode;
+                        __submit_wait
+                #endif
+        }
+        int async_fadvise(int fd, uint64_t offset, uint64_t len, int advice) {
+                #if !defined(IORING_OP_FADVISE)
+                        return posix_fadvise( fd, offset, len, advice );
+                #else
+                        __submit_prelude
+                        (*sqe){ IORING_OP_FADVISE, fd };
+                        sqe->off = (uint64_t)offset;
+                        sqe->len = length;
+                        sqe->fadvise_advice = advice;
+                        __submit_wait
+                #endif
+        }
+        int async_madvise(void *addr, size_t length, int advice) {
+                #if !defined(IORING_OP_MADVISE)
+                        return madvise( addr, length, advice );
+                #else
+                        __submit_prelude
+                        (*sqe){ IORING_OP_MADVISE, 0 };
+                        sqe->addr = (uint64_t)addr;
+                        sqe->len = length;
+                        sqe->fadvise_advice = advice;
+                        __submit_wait
+                #endif
+        }
+        int async_openat(int dirfd, const char *pathname, int flags, mode_t mode) {
+                #if !defined(IORING_OP_OPENAT)
+                        return openat( dirfd, pathname, flags, mode );
+                #else
+                        __submit_prelude
+                        (*sqe){ IORING_OP_OPENAT, dirfd };
+                        sqe->addr = (uint64_t)pathname;
+                        sqe->open_flags = flags;
+                        sqe->mode = mode;
+                        __submit_wait
+                #endif
+        }
+        int async_close(int fd) {
+                #if !defined(IORING_OP_CLOSE)
+                        return close( fd );
+                #else
+                        __submit_prelude
+                        (*sqe){ IORING_OP_CLOSE, fd };
+                        __submit_wait
+                #endif
+        }
+        int async_statx(int dirfd, const char *pathname, int flags, unsigned int mask, struct statx *statxbuf) {
+                #if !defined(IORING_OP_STATX)
+                        //return statx( dirfd, pathname, flags, mask, statxbuf );
+                        return syscall( __NR_io_uring_setup, dirfd, pathname, flags, mask, statxbuf );
+                #else
+                        __submit_prelude
+                        (*sqe){ IORING_OP_STATX, dirfd };
+                        sqe->addr = (uint64_t)pathname;
+                        sqe->statx_flags = flags;
+                        sqe->len = mask;
+                        sqe->off = (uint64_t)statxbuf;
+                        __submit_wait
+                #endif
+        }
+        ssize_t async_read(int fd, void *buf, size_t count) {
+                #if !defined(IORING_OP_READ)
+                        return read( fd, buf, count );
+                #else
+                        __submit_prelude
+                        (*sqe){ IORING_OP_READ, fd, buf, count, 0 };
+                        __submit_wait
+                #endif
+        }
+        ssize_t async_write(int fd, void *buf, size_t count) {
+                #if !defined(IORING_OP_WRITE)
+                        return read( fd, buf, count );
+                #else
+                        __submit_prelude
+                        (*sqe){ IORING_OP_WRITE, fd, buf, count, 0 };
+                        __submit_wait
+                #endif
+        }
+ssize_t cfa_preadv2(int fd, const struct iovec *iov, int iovcnt, off_t offset, int flags) {
+        #if !defined(HAVE_LINUX_IO_URING_H) || !defined(IORING_OP_READV)
+                return preadv2(fd, iov, iovcnt, offset, flags);
+        #else
+                __submit_prelude
+                (*sqe){ IORING_OP_READV, fd, iov, iovcnt, offset };
+                __submit_wait
+        #endif
+}
+ssize_t cfa_pwritev2(int fd, const struct iovec *iov, int iovcnt, off_t offset, int flags) {
+        #if !defined(HAVE_LINUX_IO_URING_H) || !defined(IORING_OP_WRITEV)
+                return pwritev2(fd, iov, iovcnt, offset, flags);
+        #else
+                __submit_prelude
+                (*sqe){ IORING_OP_WRITEV, fd, iov, iovcnt, offset };
+                __submit_wait
+        #endif
+}
+int cfa_fsync(int fd) {
+        #if !defined(HAVE_LINUX_IO_URING_H) || !defined(IORING_OP_FSYNC)
+                return fsync(fd);
+        #else
+                __submit_prelude
+                (*sqe){ IORING_OP_FSYNC, fd };
+                __submit_wait
+        #endif
+}
+int cfa_sync_file_range(int fd, int64_t offset, int64_t nbytes, unsigned int flags) {
+        #if !defined(HAVE_LINUX_IO_URING_H) || !defined(IORING_OP_SYNC_FILE_RANGE)
+                return sync_file_range(fd, offset, nbytes, flags);
+        #else
+                __submit_prelude
+                (*sqe){ IORING_OP_SYNC_FILE_RANGE, fd };
+                sqe->off = offset;
+                sqe->len = nbytes;
+                sqe->sync_range_flags = flags;
+                __submit_wait
+        #endif
+}
+ssize_t cfa_sendmsg(int sockfd, const struct msghdr *msg, int flags) {
+        #if !defined(HAVE_LINUX_IO_URING_H) || !defined(IORING_OP_SENDMSG)
+                return recv(sockfd, msg, flags);
+        #else
+                __submit_prelude
+                (*sqe){ IORING_OP_SENDMSG, sockfd, msg, 1, 0 };
+                sqe->msg_flags = flags;
+                __submit_wait
+        #endif
+}
+ssize_t cfa_recvmsg(int sockfd, struct msghdr *msg, int flags) {
+        #if !defined(HAVE_LINUX_IO_URING_H) || !defined(IORING_OP_RECVMSG)
+                return recv(sockfd, msg, flags);
+        #else
+                __submit_prelude
+                (*sqe){ IORING_OP_RECVMSG, sockfd, msg, 1, 0 };
+                sqe->msg_flags = flags;
+                __submit_wait
+        #endif
+}
+ssize_t cfa_send(int sockfd, const void *buf, size_t len, int flags) {
+        #if !defined(HAVE_LINUX_IO_URING_H) || !defined(IORING_OP_SEND)
+                return send( sockfd, buf, len, flags );
+        #else
+                __submit_prelude
+                (*sqe){ IORING_OP_SEND, sockfd };
+                sqe->addr = (uint64_t)buf;
+                sqe->len = len;
+                sqe->msg_flags = flags;
+                __submit_wait
+        #endif
+}
+ssize_t cfa_recv(int sockfd, void *buf, size_t len, int flags) {
+        #if !defined(HAVE_LINUX_IO_URING_H) || !defined(IORING_OP_RECV)
+                return recv( sockfd, buf, len, flags );
+        #else
+                __submit_prelude
+                (*sqe){ IORING_OP_RECV, sockfd };
+                sqe->addr = (uint64_t)buf;
+                sqe->len = len;
+                sqe->msg_flags = flags;
+                __submit_wait
+        #endif
+}
+int cfa_accept4(int sockfd, struct sockaddr *addr, socklen_t *addrlen, int flags) {
+        #if !defined(HAVE_LINUX_IO_URING_H) || !defined(IORING_OP_ACCEPT)
+                return accept4( sockfd, addr, addrlen, flags );
+        #else
+                __submit_prelude
+                (*sqe){ IORING_OP_ACCEPT, sockfd };
+                sqe->addr = addr;
+                sqe->addr2 = addrlen;
+                sqe->accept_flags = flags;
+                __submit_wait
+        #endif
+}
+int cfa_connect(int sockfd, const struct sockaddr *addr, socklen_t addrlen) {
+        #if !defined(HAVE_LINUX_IO_URING_H) || !defined(IORING_OP_CONNECT)
+                return connect( sockfd, addr, addrlen );
+        #else
+                __submit_prelude
+                (*sqe){ IORING_OP_CONNECT, sockfd };
+                sqe->addr = (uint64_t)addr;
+                sqe->off = addrlen;
+                __submit_wait
+        #endif
+}
+int cfa_fallocate(int fd, int mode, uint64_t offset, uint64_t len) {
+        #if !defined(HAVE_LINUX_IO_URING_H) || !defined(IORING_OP_FALLOCATE)
+                return fallocate( fd, mode, offset, len );
+        #else
+                __submit_prelude
+                (*sqe){ IORING_OP_FALLOCATE, fd };
+                sqe->off = offset;
+                sqe->len = length;
+                sqe->mode = mode;
+                __submit_wait
+        #endif
+}
+int cfa_fadvise(int fd, uint64_t offset, uint64_t len, int advice) {
+        #if !defined(HAVE_LINUX_IO_URING_H) || !defined(IORING_OP_FADVISE)
+                return posix_fadvise( fd, offset, len, advice );
+        #else
+                __submit_prelude
+                (*sqe){ IORING_OP_FADVISE, fd };
+                sqe->off = (uint64_t)offset;
+                sqe->len = length;
+                sqe->fadvise_advice = advice;
+                __submit_wait
+        #endif
+}
+int cfa_madvise(void *addr, size_t length, int advice) {
+        #if !defined(HAVE_LINUX_IO_URING_H) || !defined(IORING_OP_MADVISE)
+                return madvise( addr, length, advice );
+        #else
+                __submit_prelude
+                (*sqe){ IORING_OP_MADVISE, 0 };
+                sqe->addr = (uint64_t)addr;
+                sqe->len = length;
+                sqe->fadvise_advice = advice;
+                __submit_wait
+        #endif
+}
+int cfa_openat(int dirfd, const char *pathname, int flags, mode_t mode) {
+        #if !defined(HAVE_LINUX_IO_URING_H) || !defined(IORING_OP_OPENAT)
+                return openat( dirfd, pathname, flags, mode );
+        #else
+                __submit_prelude
+                (*sqe){ IORING_OP_OPENAT, dirfd };
+                sqe->addr = (uint64_t)pathname;
+                sqe->open_flags = flags;
+                sqe->mode = mode;
+                __submit_wait
+        #endif
+}
+int cfa_close(int fd) {
+        #if !defined(HAVE_LINUX_IO_URING_H) || !defined(IORING_OP_CLOSE)
+                return close( fd );
+        #else
+                __submit_prelude
+                (*sqe){ IORING_OP_CLOSE, fd };
+                __submit_wait
+        #endif
+}
+int cfa_statx(int dirfd, const char *pathname, int flags, unsigned int mask, struct statx *statxbuf) {
+        #if !defined(HAVE_LINUX_IO_URING_H) || !defined(IORING_OP_STATX)
+                //return statx( dirfd, pathname, flags, mask, statxbuf );
+                return syscall( __NR_io_uring_setup, dirfd, pathname, flags, mask, statxbuf );
+        #else
+                __submit_prelude
+                (*sqe){ IORING_OP_STATX, dirfd };
+                sqe->addr = (uint64_t)pathname;
+                sqe->statx_flags = flags;
+                sqe->len = mask;
+                sqe->off = (uint64_t)statxbuf;
+                __submit_wait
+        #endif
+}
+ssize_t cfa_read(int fd, void *buf, size_t count) {
+        #if !defined(HAVE_LINUX_IO_URING_H) || !defined(IORING_OP_READ)
+                return read( fd, buf, count );
+        #else
+                __submit_prelude
+                (*sqe){ IORING_OP_READ, fd, buf, count, 0 };
+                __submit_wait
+        #endif
+}
+ssize_t cfa_write(int fd, void *buf, size_t count) {
+        #if !defined(HAVE_LINUX_IO_URING_H) || !defined(IORING_OP_WRITE)
+                return read( fd, buf, count );
+        #else
+                __submit_prelude
+                (*sqe){ IORING_OP_WRITE, fd, buf, count, 0 };
+                __submit_wait
+        #endif
+}
 //-----------------------------------------------------------------------------
 …
 // Macro magic to reduce the size of the following switch case
         #define IS_DEFINED_APPLY(f, ...) f(__VA_ARGS__)
         #define IS_DEFINED_SECOND(first, second, ...) second
         #define IS_DEFINED_TEST(expansion) _CFA_IO_FEATURE_##expansion
         #define IS_DEFINED(macro) IS_DEFINED_APPLY( IS_DEFINED_SECOND,IS_DEFINED_TEST(macro) false, true)
         bool is_async( fptr_t func ) {
+#define IS_DEFINED_APPLY(f, ...) f(__VA_ARGS__)
+#define IS_DEFINED_SECOND(first, second, ...) second
+#define IS_DEFINED_TEST(expansion) _CFA_IO_FEATURE_##expansion
+#define IS_DEFINED(macro) IS_DEFINED_APPLY( IS_DEFINED_SECOND,IS_DEFINED_TEST(macro) false, true)
+bool has_user_level_blocking( fptr_t func ) {
+        #if defined(HAVE_LINUX_IO_URING_H)
                 if( /*func == (fptr_t)preadv2 || */
                         func == (fptr_t)async_preadv2 )
+                        func == (fptr_t)cfa_preadv2 )
                         #define _CFA_IO_FEATURE_IORING_OP_READV ,
                         return IS_DEFINED(IORING_OP_READV);
                 if( /*func == (fptr_t)pwritev2 || */
                       func == (fptr_t)async_pwritev2 )
+                        func == (fptr_t)cfa_pwritev2 )
                         #define _CFA_IO_FEATURE_IORING_OP_WRITEV ,
                         return IS_DEFINED(IORING_OP_WRITEV);
                 if( /*func == (fptr_t)fsync || */
                       func == (fptr_t)async_fsync )
+                        func == (fptr_t)cfa_fsync )
                         #define _CFA_IO_FEATURE_IORING_OP_FSYNC ,
                         return IS_DEFINED(IORING_OP_FSYNC);
                 if( /*func == (fptr_t)ync_file_range || */
                       func == (fptr_t)async_sync_file_range )
+                        func == (fptr_t)cfa_sync_file_range )
                         #define _CFA_IO_FEATURE_IORING_OP_SYNC_FILE_RANGE ,
                         return IS_DEFINED(IORING_OP_SYNC_FILE_RANGE);
                 if( /*func == (fptr_t)sendmsg || */
                       func == (fptr_t)async_sendmsg )
+                        func == (fptr_t)cfa_sendmsg )
                         #define _CFA_IO_FEATURE_IORING_OP_SENDMSG ,
                         return IS_DEFINED(IORING_OP_SENDMSG);
                 if( /*func == (fptr_t)recvmsg || */
                       func == (fptr_t)async_recvmsg )
+                        func == (fptr_t)cfa_recvmsg )
                         #define _CFA_IO_FEATURE_IORING_OP_RECVMSG ,
                         return IS_DEFINED(IORING_OP_RECVMSG);
                 if( /*func == (fptr_t)send || */
                         func == (fptr_t)async_send )
+                        func == (fptr_t)cfa_send )
                         #define _CFA_IO_FEATURE_IORING_OP_SEND ,
                         return IS_DEFINED(IORING_OP_SEND);
                 if( /*func == (fptr_t)recv || */
                         func == (fptr_t)async_recv )
+                        func == (fptr_t)cfa_recv )
                         #define _CFA_IO_FEATURE_IORING_OP_RECV ,
                         return IS_DEFINED(IORING_OP_RECV);
                 if( /*func == (fptr_t)accept4 || */
                         func == (fptr_t)async_accept4 )
+                        func == (fptr_t)cfa_accept4 )
                         #define _CFA_IO_FEATURE_IORING_OP_ACCEPT ,
                         return IS_DEFINED(IORING_OP_ACCEPT);
                 if( /*func == (fptr_t)connect || */
                         func == (fptr_t)async_connect )
+                        func == (fptr_t)cfa_connect )
                         #define _CFA_IO_FEATURE_IORING_OP_CONNECT ,
                         return IS_DEFINED(IORING_OP_CONNECT);
                 if( /*func == (fptr_t)fallocate || */
                         func == (fptr_t)async_fallocate )
+                        func == (fptr_t)cfa_fallocate )
                         #define _CFA_IO_FEATURE_IORING_OP_FALLOCATE ,
                         return IS_DEFINED(IORING_OP_FALLOCATE);
                 if( /*func == (fptr_t)fadvise || */
                         func == (fptr_t)async_fadvise )
+                if( /*func == (fptr_t)posix_fadvise || */
+                        func == (fptr_t)cfa_fadvise )
                         #define _CFA_IO_FEATURE_IORING_OP_FADVISE ,
                         return IS_DEFINED(IORING_OP_FADVISE);
                 if( /*func == (fptr_t)madvise || */
                         func == (fptr_t)async_madvise )
+                        func == (fptr_t)cfa_madvise )
                         #define _CFA_IO_FEATURE_IORING_OP_MADVISE ,
                         return IS_DEFINED(IORING_OP_MADVISE);
                 if( /*func == (fptr_t)openat || */
                         func == (fptr_t)async_openat )
+                        func == (fptr_t)cfa_openat )
                         #define _CFA_IO_FEATURE_IORING_OP_OPENAT ,
                         return IS_DEFINED(IORING_OP_OPENAT);
                 if( /*func == (fptr_t)close || */
                         func == (fptr_t)async_close )
+                        func == (fptr_t)cfa_close )
                         #define _CFA_IO_FEATURE_IORING_OP_CLOSE ,
                         return IS_DEFINED(IORING_OP_CLOSE);
                 if( /*func == (fptr_t)statx || */
                         func == (fptr_t)async_statx )
+                        func == (fptr_t)cfa_statx )
                         #define _CFA_IO_FEATURE_IORING_OP_STATX ,
                         return IS_DEFINED(IORING_OP_STATX);
                 if( /*func == (fptr_t)read || */
                       func == (fptr_t)async_read )
+                        func == (fptr_t)cfa_read )
                         #define _CFA_IO_FEATURE_IORING_OP_READ ,
                         return IS_DEFINED(IORING_OP_READ);
                 if( /*func == (fptr_t)write || */
                       func == (fptr_t)async_write )
+                        func == (fptr_t)cfa_write )
                         #define _CFA_IO_FEATURE_IORING_OP_WRITE ,
                         return IS_DEFINED(IORING_OP_WRITE);
+                return false;
+        }
+#endif
+        #endif
+        return false;
+}

libcfa/src/concurrency/kernel.cfa

-              r6091b88a
+              rd45ed83
 #define __cforall_thread__
+// #define __CFA_DEBUG_PRINT_RUNTIME_CORE__
 //C Includes
 …
 #include "invoke.h"
 //-----------------------------------------------------------------------------
 // Some assembly required
 …
         idle{};
         __cfaabi_dbg_print_safe("Kernel : Starting core %p\n", &this);
+        __cfadbg_print_safe(runtime_core, "Kernel : Starting core %p\n", &this);
         this.stack = __create_pthread( &this.kernel_thread, __invoke_processor, (void *)&this );
         __cfaabi_dbg_print_safe("Kernel : core %p started\n", &this);
+        __cfadbg_print_safe(runtime_core, "Kernel : core %p created\n", &this);
+}
 void ^?{}(processor & this) with( this ){
         if( ! __atomic_load_n(&do_terminate, __ATOMIC_ACQUIRE) ) {
                 __cfaabi_dbg_print_safe("Kernel : core %p signaling termination\n", &this);
+                __cfadbg_print_safe(runtime_core, "Kernel : core %p signaling termination\n", &this);
                 __atomic_store_n(&do_terminate, true, __ATOMIC_RELAXED);
 …
         ready_queue_lock{};
+        #if !defined(__CFA_NO_STATISTICS__)
+                print_stats = false;
+        #endif
         procs{ __get };
         idles{ __get };
         threads{ __get };
         __kernel_io_startup( this );
+        __kernel_io_startup( this, &this == mainCluster );
         doregister(this);
 …
 void ^?{}(cluster & this) {
         __kernel_io_shutdown( this );
+        __kernel_io_shutdown( this, &this == mainCluster );
         unregister(this);
 …
         verify(this);
         __cfaabi_dbg_print_safe("Kernel : core %p starting\n", this);
+        __cfadbg_print_safe(runtime_core, "Kernel : core %p starting\n", this);
         doregister(this->cltr, this);
 …
                 preemption_scope scope = { this };
                 __cfaabi_dbg_print_safe("Kernel : core %p started\n", this);
+                __cfadbg_print_safe(runtime_core, "Kernel : core %p started\n", this);
                 $thread * readyThread = 0p;
 …
+                }
                 __cfaabi_dbg_print_safe("Kernel : core %p stopping\n", this);
+                __cfadbg_print_safe(runtime_core, "Kernel : core %p stopping\n", this);
+        }
 …
         V( this->terminated );
         __cfaabi_dbg_print_safe("Kernel : core %p terminated\n", this);
+        __cfadbg_print_safe(runtime_core, "Kernel : core %p terminated\n", this);
         // HACK : the coroutine context switch expects this_thread to be set
 …
         //We now have a proper context from which to schedule threads
         __cfaabi_dbg_print_safe("Kernel : core %p created (%p, %p)\n", proc, &proc->runner, &ctx);
+        __cfadbg_print_safe(runtime_core, "Kernel : core %p created (%p, %p)\n", proc, &proc->runner, &ctx);
         // SKULLDUGGERY: Since the coroutine doesn't have its own stack, we can't
 …
         // Main routine of the core returned, the core is now fully terminated
         __cfaabi_dbg_print_safe("Kernel : core %p main ended (%p)\n", proc, &proc->runner);
+        __cfadbg_print_safe(runtime_core, "Kernel : core %p main ended (%p)\n", proc, &proc->runner);
         return 0p;
 …
 static void __kernel_startup(void) {
         verify( ! kernelTLS.preemption_state.enabled );
         __cfaabi_dbg_print_safe("Kernel : Starting\n");
+        __cfadbg_print_safe(runtime_core, "Kernel : Starting\n");
         __page_size = sysconf( _SC_PAGESIZE );
 …
         (*mainCluster){"Main Cluster"};
         __cfaabi_dbg_print_safe("Kernel : Main cluster ready\n");
+        __cfadbg_print_safe(runtime_core, "Kernel : Main cluster ready\n");
         // Start by initializing the main thread
 …
         (*mainThread){ &info };
         __cfaabi_dbg_print_safe("Kernel : Main thread ready\n");
+        __cfadbg_print_safe(runtime_core, "Kernel : Main thread ready\n");
 …
                 runner{ &this };
                 __cfaabi_dbg_print_safe("Kernel : constructed main processor context %p\n", &runner);
+                __cfadbg_print_safe(runtime_core, "Kernel : constructed main processor context %p\n", &runner);
+        }
 …
         // THE SYSTEM IS NOW COMPLETELY RUNNING
+        __cfaabi_dbg_print_safe("Kernel : Started\n--------------------------------------------------\n\n");
+        // Now that the system is up, finish creating systems that need threading
+        __kernel_io_finish_start( *mainCluster );
+        __cfadbg_print_safe(runtime_core, "Kernel : Started\n--------------------------------------------------\n\n");
         verify( ! kernelTLS.preemption_state.enabled );
 …
 static void __kernel_shutdown(void) {
+        __cfaabi_dbg_print_safe("\n--------------------------------------------------\nKernel : Shutting down\n");
+        //Before we start shutting things down, wait for systems that need threading to shutdown
+        __kernel_io_prepare_stop( *mainCluster );
         /* paranoid */ verify( TL_GET( preemption_state.enabled ) );
         disable_interrupts();
         /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
+        __cfadbg_print_safe(runtime_core, "\n--------------------------------------------------\nKernel : Shutting down\n");
         // SKULLDUGGERY: Notify the mainProcessor it needs to terminates.
 …
         ^(__cfa_dbg_global_clusters.lock){};
         __cfaabi_dbg_print_safe("Kernel : Shutdown complete\n");
+        __cfadbg_print_safe(runtime_core, "Kernel : Shutdown complete\n");
+}
 …
         // We are ready to sleep
         __cfaabi_dbg_print_safe("Kernel : Processor %p ready to sleep\n", this);
+        __cfadbg_print_safe(runtime_core, "Kernel : Processor %p ready to sleep\n", this);
         wait( idle );
         // We have woken up
         __cfaabi_dbg_print_safe("Kernel : Processor %p woke up and ready to run\n", this);
+        __cfadbg_print_safe(runtime_core, "Kernel : Processor %p woke up and ready to run\n", this);
         // Get ourself off the idle list
 …
 static bool __wake_one(cluster * this, __attribute__((unused)) bool force) {
         // if we don't want to force check if we know it's false
         if( !this->idles.head && !force ) return false;
+        // if( !this->idles.head && !force ) return false;
         // First, lock the cluster idle
 …
         // Wake them up
+        __cfadbg_print_safe(runtime_core, "Kernel : waking Processor %p\n", this->idles.head);
         post( this->idles.head->idle );
 …
 // Unconditionnaly wake a thread
 static bool __wake_proc(processor * this) {
+        __cfadbg_print_safe(runtime_core, "Kernel : waking Processor %p\n", this);
         return post( this->idle );
+}
 …
         // make new owner
         unpark( thrd __cfaabi_dbg_ctx2 );
+        return thrd != 0p;
+}
+bool V(semaphore & this, unsigned diff) with( this ) {
+        $thread * thrd = 0p;
+        lock( lock __cfaabi_dbg_ctx2 );
+        int release = max(-count, (int)diff);
+        count += diff;
+        for(release) {
+                unpark( pop_head( waiting ) __cfaabi_dbg_ctx2 );
+        }
+        unlock( lock );
         return thrd != 0p;

libcfa/src/concurrency/kernel.hfa

-              r6091b88a
+              rd45ed83
 void   P (semaphore & this);
 bool   V (semaphore & this);
+bool   V (semaphore & this, unsigned count);
 …
         // Like head/tail but not seen by the kernel
         volatile uint32_t alloc;
+        volatile uint32_t ready;
         __spinlock_t lock;
 …
         void * ring_ptr;
         size_t ring_sz;
+        // Statistics
+        #if !defined(__CFA_NO_STATISTICS__)
+                struct {
+                        struct {
+                                unsigned long long int val;
+                                unsigned long long int cnt;
+                        } submit_avg;
+                } stats;
+        #endif
 };
 …
         void * ring_ptr;
         size_t ring_sz;
+};
+        // Statistics
+        #if !defined(__CFA_NO_STATISTICS__)
+                struct {
+                        struct {
+                                unsigned long long int val;
+                                unsigned long long int cnt;
+                        } completed_avg;
+                } stats;
+        #endif
+};
+#if defined(__CFA_IO_POLLING_USER__)
+        struct __io_poller_fast {
+                struct io_ring * ring;
+                $thread thrd;
+        };
+#endif
 struct io_ring {
 …
         uint32_t flags;
         int fd;
+        pthread_t poller;
+        void * stack;
+        semaphore submit;
         volatile bool done;
+        semaphore submit;
+        struct {
+                struct {
+                        void * stack;
+                        pthread_t kthrd;
+                } slow;
+                #if defined(__CFA_IO_POLLING_USER__)
+                        __io_poller_fast fast;
+                        __bin_sem_t sem;
+                #endif
+        } poller;
 };
 #endif
 …
                 struct io_ring io;
         #endif
+        #if !defined(__CFA_NO_STATISTICS__)
+                bool print_stats;
+        #endif
 };
 extern Duration default_preemption();
 …
 static inline struct processor * active_processor() { return TL_GET( this_processor ); } // UNSAFE
 static inline struct cluster   * active_cluster  () { return TL_GET( this_processor )->cltr; }
+#if !defined(__CFA_NO_STATISTICS__)
+        static inline void print_stats_at_exit( cluster & this ) {
+                this.print_stats = true;
+        }
+#endif
 // Local Variables: //

libcfa/src/concurrency/kernel_private.hfa

-              r6091b88a
+              rd45ed83
 //-----------------------------------------------------------------------------
 // I/O
+void __kernel_io_startup ( cluster & );
+void __kernel_io_shutdown( cluster & );
+void __kernel_io_startup     ( cluster &, bool );
+void __kernel_io_finish_start( cluster & );
+void __kernel_io_prepare_stop( cluster & );
+void __kernel_io_shutdown    ( cluster &, bool );
 //-----------------------------------------------------------------------------

libcfa/src/concurrency/preemption.cfa

-              r6091b88a
+              rd45ed83
 // FwdDeclarations : Signal handlers
 static void sigHandler_ctxSwitch( __CFA_SIGPARMS__ );
+static void sigHandler_alarm    ( __CFA_SIGPARMS__ );
 static void sigHandler_segv     ( __CFA_SIGPARMS__ );
 static void sigHandler_ill      ( __CFA_SIGPARMS__ );
 …
         if ( pthread_sigmask( SIG_BLOCK, &mask, 0p ) == -1 ) {
             abort( "internal error, pthread_sigmask" );
+                abort( "internal error, pthread_sigmask" );
+        }
+}
 …
         // Setup proper signal handlers
         __cfaabi_sigaction( SIGUSR1, sigHandler_ctxSwitch, SA_SIGINFO | SA_RESTART ); // __cfactx_switch handler
+        __cfaabi_sigaction( SIGALRM, sigHandler_alarm    , SA_SIGINFO | SA_RESTART ); // debug handler
         signal_block( SIGALRM );
 …
         force_yield( __ALARM_PREEMPTION ); // Do the actual __cfactx_switch
+}
+static void sigHandler_alarm( __CFA_SIGPARMS__ ) {
+        abort("SIGALRM should never reach the signal handler");
+}

tools/gdb/utils-gdb.py

-              r6091b88a
+              rd45ed83
 class ThreadInfo:
     tid = 0
     cluster = None
     value = None
     def __init__(self, cluster, value):
         self.cluster = cluster
         self.value = value
     def is_system(self):
         return False
+        tid = 0
+        cluster = None
+        value = None
+        def __init__(self, cluster, value):
+                self.cluster = cluster
+                self.value = value
+        def is_system(self):
+                return False
 # A named tuple representing information about a stack
 …
 def is_cforall():
     return True
+        return True
 def get_cfa_types():
     # GDB types for various structures/types in CFA
     return CfaTypes(cluster_ptr = gdb.lookup_type('struct cluster').pointer(),
                   processor_ptr = gdb.lookup_type('struct processor').pointer(),
                      thread_ptr = gdb.lookup_type('struct $thread').pointer(),
                         int_ptr = gdb.lookup_type('int').pointer(),
                    thread_state = gdb.lookup_type('enum coroutine_state'))
+        # GDB types for various structures/types in CFA
+        return CfaTypes(cluster_ptr = gdb.lookup_type('struct cluster').pointer(),
+                                  processor_ptr = gdb.lookup_type('struct processor').pointer(),
+                                         thread_ptr = gdb.lookup_type('struct $thread').pointer(),
+                                                int_ptr = gdb.lookup_type('int').pointer(),
+                                   thread_state = gdb.lookup_type('enum coroutine_state'))
 def get_addr(addr):
     """
     NOTE: sketchy solution to retrieve address. There is a better solution...
     @addr: str of an address that can be in a format 0xfffff <type of the object
     at this address>
     Return: str of just the address
     """
     str_addr = str(addr)
     ending_addr_index = str_addr.find('<')
     if ending_addr_index == -1:
         return str(addr)
     return str_addr[:ending_addr_index].strip()
+        """
+        NOTE: sketchy solution to retrieve address. There is a better solution...
+        @addr: str of an address that can be in a format 0xfffff <type of the object
+        at this address>
+        Return: str of just the address
+        """
+        str_addr = str(addr)
+        ending_addr_index = str_addr.find('<')
+        if ending_addr_index == -1:
+                return str(addr)
+        return str_addr[:ending_addr_index].strip()
 def print_usage(obj):
     print(obj.__doc__)
+        print(obj.__doc__)
 def parse(args):
     """
     Split the argument list in string format, where each argument is separated
     by whitespace delimiter, to a list of arguments like argv
     @args: str of arguments
     Return:
         [] if args is an empty string
         list if args is not empty
     """
     # parse the string format of arguments and return a list of arguments
     argv = args.split(' ')
     if len(argv) == 1 and argv[0] == '':
         return []
     return argv
+        """
+        Split the argument list in string format, where each argument is separated
+        by whitespace delimiter, to a list of arguments like argv
+        @args: str of arguments
+        Return:
+                [] if args is an empty string
+                list if args is not empty
+        """
+        # parse the string format of arguments and return a list of arguments
+        argv = args.split(' ')
+        if len(argv) == 1 and argv[0] == '':
+                return []
+        return argv
 def get_cluster_root():
     """
     Return: gdb.Value of globalClusters.root (is an address)
     """
     cluster_root = gdb.parse_and_eval('_X11mainClusterPS7cluster_1')
     if cluster_root.address == 0x0:
         print('No clusters, program terminated')
     return cluster_root
+        """
+        Return: gdb.Value of globalClusters.root (is an address)
+        """
+        cluster_root = gdb.parse_and_eval('_X11mainClusterPS7cluster_1')
+        if cluster_root.address == 0x0:
+                print('No clusters, program terminated')
+        return cluster_root
 def find_curr_thread():
+    # btstr = gdb.execute('bt', to_string = True).splitlines()
+    # if len(btstr) == 0:
+    #     print('error')
+    #     return None
+    # return btstr[0].split('this=',1)[1].split(',')[0].split(')')[0]
+    return None
+        # btstr = gdb.execute('bt', to_string = True).splitlines()
+        # if len(btstr) == 0:
+        #     print('error')
+        #     return None
+        # return btstr[0].split('this=',1)[1].split(',')[0].split(')')[0]
+        return None
+def all_clusters():
+        if not is_cforall():
+                return None
+        cluster_root = get_cluster_root()
+        if cluster_root.address == 0x0:
+                return
+        curr = cluster_root
+        ret = [curr]
+        while True:
+                curr = curr['_X4nodeS26__cluster____dbg_node_cltr_1']['_X4nextPS7cluster_1']
+                if curr == cluster_root:
+                        break
+                ret.append(curr)
+        return ret
 def lookup_cluster(name = None):
     """
     Look up a cluster given its ID
     @name: str
     Return: gdb.Value
     """
     if not is_cforall():
         return None
     root = get_cluster_root()
     if root.address == 0x0:
         return None
     if not name:
         return root
     # lookup for the task associated with the id
     cluster = None
     curr = root
     while True:
         if curr['_X4namePKc_1'].string() == name:
             cluster = curr.address
             break
         curr = curr['_X4nodeS26__cluster____dbg_node_cltr_1']['_X4nextPS7cluster_1']
         if curr == root or curr == 0x0:
             break
     if not cluster:
         print("Cannot find a cluster with the name: {}.".format(name))
         return None
     return cluster
+        """
+        Look up a cluster given its ID
+        @name: str
+        Return: gdb.Value
+        """
+        if not is_cforall():
+                return None
+        root = get_cluster_root()
+        if root.address == 0x0:
+                return None
+        if not name:
+                return root
+        # lookup for the task associated with the id
+        cluster = None
+        curr = root
+        while True:
+                if curr['_X4namePKc_1'].string() == name:
+                        cluster = curr.address
+                        break
+                curr = curr['_X4nodeS26__cluster____dbg_node_cltr_1']['_X4nextPS7cluster_1']
+                if curr == root or curr == 0x0:
+                        break
+        if not cluster:
+                print("Cannot find a cluster with the name: {}.".format(name))
+                return None
+        return cluster
 def lookup_threads_by_cluster(cluster):
         # Iterate through a circular linked list of threads and accumulate them in an array
         threads = []
         cfa_t = get_cfa_types()
         root = cluster['_X7threadsS8__dllist_S7$thread__1']['_X4headPY15__TYPE_generic__1'].cast(cfa_t.thread_ptr)
         if root == 0x0 or root.address == 0x0:
             print('There are no tasks for cluster: {}'.format(cluster))
             return threads
         curr = root
         tid = 0
         sid = -1
         while True:
             t = ThreadInfo(cluster, curr)
             if t.is_system():
                 t.tid = sid
                 sid -= 1
             else:
                 t.tid = tid
                 tid += 1
             threads.append(t)
             curr = curr['node']['next']
             if curr == root or curr == 0x0:
                 break
         return threads
+                # Iterate through a circular linked list of threads and accumulate them in an array
+                threads = []
+                cfa_t = get_cfa_types()
+                root = cluster['_X7threadsS8__dllist_S7$thread__1']['_X4headPY15__TYPE_generic__1'].cast(cfa_t.thread_ptr)
+                if root == 0x0 or root.address == 0x0:
+                        print('There are no tasks for cluster: {}'.format(cluster))
+                        return threads
+                curr = root
+                tid = 0
+                sid = -1
+                while True:
+                        t = ThreadInfo(cluster, curr)
+                        if t.is_system():
+                                t.tid = sid
+                                sid -= 1
+                        else:
+                                t.tid = tid
+                                tid += 1
+                        threads.append(t)
+                        curr = curr['node']['next']
+                        if curr == root or curr == 0x0:
+                                break
+                return threads
 def system_thread(thread):
     return False
+        return False
 def adjust_stack(pc, fp, sp):
     # pop sp, fp, pc from global stack
     gdb.execute('set $pc = {}'.format(pc))
     gdb.execute('set $rbp = {}'.format(fp))
     gdb.execute('set $sp = {}'.format(sp))
+        # pop sp, fp, pc from global stack
+        gdb.execute('set $pc = {}'.format(pc))
+        gdb.execute('set $rbp = {}'.format(fp))
+        gdb.execute('set $sp = {}'.format(sp))
 ############################ COMMAND IMPLEMENTATION #########################
 class Clusters(gdb.Command):
     """Cforall: Display currently known clusters
+        """Cforall: Display currently known clusters
 Usage:
     info clusters                 : print out all the clusters
+        info clusters                 : print out all the clusters
 """
+    def __init__(self):
+        super(Clusters, self).__init__('info clusters', gdb.COMMAND_USER)
+    def print_cluster(self, cluster_name, cluster_address):
+        print('{:>20}  {:>20}'.format(cluster_name, cluster_address))
+    #entry point from gdb
+    def invoke(self, arg, from_tty):
+        if not is_cforall():
+            return
+        if arg:
+            print("info clusters does not take arguments")
+            print_usage(self)
+            return
+        cluster_root = get_cluster_root()
+        if cluster_root.address == 0x0:
+            return
+        curr = cluster_root
+        self.print_cluster('Name', 'Address')
+        while True:
+            self.print_cluster(curr['_X4namePKc_1'].string(), str(curr))
+            curr = curr['_X4nodeS26__cluster____dbg_node_cltr_1']['_X4nextPS7cluster_1']
+            if curr == cluster_root:
+                break
+        print("")
+        def __init__(self):
+                super(Clusters, self).__init__('info clusters', gdb.COMMAND_USER)
+        def print_cluster(self, cluster_name, cluster_address):
+                print('{:>20}  {:>20}'.format(cluster_name, cluster_address))
+        #entry point from gdb
+        def invoke(self, arg, from_tty):
+                if not is_cforall():
+                        return
+                if arg:
+                        print("info clusters does not take arguments")
+                        print_usage(self)
+                        return
+                self.print_cluster('Name', 'Address')
+                for c in all_clusters():
+                        self.print_cluster(c['_X4namePKc_1'].string(), str(c))
+                print("")
 ############
 class Processors(gdb.Command):
     """Cforall: Display currently known processors
+        """Cforall: Display currently known processors
 Usage:
+    info processors                 : print out all the processors in the Main Cluster
+    info processors <cluster_name>  : print out all processors in a given cluster
+        info processors                 : print out all the processors in the Main Cluster
+        info processors all             : print out all processors in all clusters
+        info processors <cluster_name>  : print out all processors in a given cluster
 """
+    def __init__(self):
+        super(Processors, self).__init__('info processors', gdb.COMMAND_USER)
+    def print_processor(self, name, status, pending, address):
+        print('{:>20}  {:>11}  {:>13}  {:>20}'.format(name, status, pending, address))
+    def iterate_procs(self, root, active):
+        if root == 0x0:
+            return
+        cfa_t = get_cfa_types()
+        curr = root
+        while True:
+            processor = curr
+            should_stop = processor['_X12do_terminateVb_1']
+            stop_count  = processor['_X10terminatedS9semaphore_1']['_X5counti_1']
+            if not should_stop:
+                status = 'Active' if active else 'Idle'
+            else:
+                status_str  = 'Last Thread' if stop_count >= 0 else 'Terminating'
+                status      = '{}({},{})'.format(status_str, should_stop, stop_count)
+            self.print_processor(processor['_X4namePKc_1'].string(),
+                    status, str(processor['_X18pending_preemptionb_1']), str(processor)
+                )
+            curr = curr['_X4nodeS28__processor____dbg_node_proc_1']['_X4nextPS9processor_1']
+            if curr == root or curr == 0x0:
+                break
+    #entry point from gdb
+    def invoke(self, arg, from_tty):
+        if not is_cforall():
+            return
+        cluster = lookup_cluster(arg if arg else None)
+        if not cluster:
+            print("No Cluster matching arguments found")
+            return
+        cfa_t = get_cfa_types()
+        print('Cluster: "{}"({})'.format(cluster['_X4namePKc_1'].string(), cluster.cast(cfa_t.cluster_ptr)))
+        active_root = cluster.cast(cfa_t.cluster_ptr) \
+                ['_X5procsS8__dllist_S9processor__1'] \
+                ['_X4headPY15__TYPE_generic__1'] \
+                .cast(cfa_t.processor_ptr)
+        idle_root = cluster.cast(cfa_t.cluster_ptr) \
+                ['_X5idlesS8__dllist_S9processor__1'] \
+                ['_X4headPY15__TYPE_generic__1'] \
+                .cast(cfa_t.processor_ptr)
+        if idle_root != 0x0 or active_root != 0x0:
+            self.print_processor('Name', 'Status', 'Pending Yield', 'Address')
+            self.iterate_procs(active_root, True)
+            self.iterate_procs(idle_root, False)
+        else:
+            print("No processors on cluster")
+        print()
+        def __init__(self):
+                super(Processors, self).__init__('info processors', gdb.COMMAND_USER)
+        def print_processor(self, name, status, pending, address):
+                print('{:>20}  {:>11}  {:>13}  {:>20}'.format(name, status, pending, address))
+        def iterate_procs(self, root, active):
+                if root == 0x0:
+                        return
+                cfa_t = get_cfa_types()
+                curr = root
+                while True:
+                        processor = curr
+                        should_stop = processor['_X12do_terminateVb_1']
+                        stop_count  = processor['_X10terminatedS9semaphore_1']['_X5counti_1']
+                        if not should_stop:
+                                status = 'Active' if active else 'Idle'
+                        else:
+                                status_str  = 'Last Thread' if stop_count >= 0 else 'Terminating'
+                                status      = '{}({},{})'.format(status_str, should_stop, stop_count)
+                        self.print_processor(processor['_X4namePKc_1'].string(),
+                                        status, str(processor['_X18pending_preemptionb_1']), str(processor)
+                                )
+                        curr = curr['_X4nodeS28__processor____dbg_node_proc_1']['_X4nextPS9processor_1']
+                        if curr == root or curr == 0x0:
+                                break
+        #entry point from gdb
+        def invoke(self, arg, from_tty):
+                if not is_cforall():
+                        return
+                if not arg:
+                        clusters = [lookup_cluster(None)]
+                elif arg == "all":
+                        clusters = all_clusters()
+                else:
+                        clusters = [lookup_cluster(arg)]
+                if not clusters:
+                        print("No Cluster matching arguments found")
+                        return
+                cfa_t = get_cfa_types()
+                for cluster in clusters:
+                        print('Cluster: "{}"({})'.format(cluster['_X4namePKc_1'].string(), cluster.cast(cfa_t.cluster_ptr)))
+                        active_root = cluster.cast(cfa_t.cluster_ptr) \
+                                        ['_X5procsS8__dllist_S9processor__1'] \
+                                        ['_X4headPY15__TYPE_generic__1'] \
+                                        .cast(cfa_t.processor_ptr)
+                        idle_root = cluster.cast(cfa_t.cluster_ptr) \
+                                        ['_X5idlesS8__dllist_S9processor__1'] \
+                                        ['_X4headPY15__TYPE_generic__1'] \
+                                        .cast(cfa_t.processor_ptr)
+                        if idle_root != 0x0 or active_root != 0x0:
+                                self.print_processor('Name', 'Status', 'Pending Yield', 'Address')
+                                self.iterate_procs(active_root, True)
+                                self.iterate_procs(idle_root, False)
+                        else:
+                                print("No processors on cluster")
+                print()
 ############
 class Threads(gdb.Command):
     """Cforall: Display currently known threads
+        """Cforall: Display currently known threads
 Usage:
+    cfathreads                           : print Main Cluster threads, application threads only
+    cfathreads all                       : print all clusters, all threads
+    cfathreads <clusterName>             : print cluster threads, application threads only
+    """
+    def __init__(self):
+        # The first parameter of the line below is the name of the command. You
+        # can call it 'uc++ task'
+        super(Threads, self).__init__('info cfathreads', gdb.COMMAND_USER)
+    def print_formatted(self, marked, tid, name, state, address):
+        print('{:>1}  {:>4}  {:>20}  {:>10}  {:>20}'.format('*' if marked else ' ', tid, name, state, address))
+    def print_thread(self, thread, tid, marked):
+        cfa_t = get_cfa_types()
+        self.print_formatted(marked, tid, thread['self_cor']['name'].string(), str(thread['state'].cast(cfa_t.thread_state)), str(thread))
+    def print_formatted_cluster(self, str_format, cluster_name, cluster_addr):
+        print(str_format.format(cluster_name, cluster_addr))
+    def print_threads_by_cluster(self, cluster, print_system = False):
+        # Iterate through a circular linked list of tasks and print out its
+        # name along with address associated to each cluster
+        threads = lookup_threads_by_cluster(cluster)
+        if not threads:
+            return
+        running_thread = find_curr_thread()
+        if running_thread is None:
+            print('Could not identify current thread')
+        self.print_formatted(False, '', 'Name', 'State', 'Address')
+        for t in threads:
+            if not t.is_system() or print_system:
+                self.print_thread(t.value, t.tid, t.value == running_thread if running_thread else False)
+        print()
+    def print_all_threads(self):
+        print("Not implemented")
+    def invoke(self, arg, from_tty):
+        """
+        @arg: str
+        @from_tty: bool
+        """
+        if not is_cforall():
+            return
+        if not arg:
+            cluster = lookup_cluster()
+            if not cluster:
+                print("Could not find Main Cluster")
+                return
+            # only tasks and main
+            self.print_threads_by_cluster(cluster, False)
+        elif arg == 'all':
+            # all threads, all clusters
+            self.print_all_threads()
+        else:
+            cluster = lookup_cluster(arg)
+            if not cluster:
+                print("Could not find cluster '{}'".format(arg))
+                return
+            # all tasks, specified cluster
+            self.print_threads_by_cluster(cluster, True)
+        cfathreads                           : print Main Cluster threads, application threads only
+        cfathreads all                       : print all clusters, all threads
+        cfathreads <clusterName>             : print cluster threads, application threads only
+        """
+        def __init__(self):
+                # The first parameter of the line below is the name of the command. You
+                # can call it 'uc++ task'
+                super(Threads, self).__init__('info cfathreads', gdb.COMMAND_USER)
+        def print_formatted(self, marked, tid, name, state, address):
+                print('{:>1}  {:>4}  {:>20}  {:>10}  {:>20}'.format('*' if marked else ' ', tid, name, state, address))
+        def print_thread(self, thread, tid, marked):
+                cfa_t = get_cfa_types()
+                self.print_formatted(marked, tid, thread['self_cor']['name'].string(), str(thread['state'].cast(cfa_t.thread_state)), str(thread))
+        def print_threads_by_cluster(self, cluster, print_system = False):
+                # Iterate through a circular linked list of tasks and print out its
+                # name along with address associated to each cluster
+                threads = lookup_threads_by_cluster(cluster)
+                if not threads:
+                        return
+                running_thread = find_curr_thread()
+                if running_thread is None:
+                        print('Could not identify current thread')
+                self.print_formatted(False, '', 'Name', 'State', 'Address')
+                for t in threads:
+                        if not t.is_system() or print_system:
+                                self.print_thread(t.value, t.tid, t.value == running_thread if running_thread else False)
+                print()
+        def print_all_threads(self):
+                for c in all_clusters():
+                        self.print_threads_by_cluster(c, False)
+        def invoke(self, arg, from_tty):
+                """
+                @arg: str
+                @from_tty: bool
+                """
+                if not is_cforall():
+                        return
+                if not arg:
+                        cluster = lookup_cluster()
+                        if not cluster:
+                                print("Could not find Main Cluster")
+                                return
+                        # only tasks and main
+                        self.print_threads_by_cluster(cluster, False)
+                elif arg == 'all':
+                        # all threads, all clusters
+                        self.print_all_threads()
+                else:
+                        cluster = lookup_cluster(arg)
+                        if not cluster:
+                                print("Could not find cluster '{}'".format(arg))
+                                return
+                        # all tasks, specified cluster
+                        self.print_threads_by_cluster(cluster, True)
 ############
 class Thread(gdb.Command):
+    def __init__(self):
+        # The first parameter of the line below is the name of the command. You
+        # can call it 'uc++ task'
+        super(Threads, self).__init__('cfathread', gdb.COMMAND_USER)
+    def print_usage(self):
+        print_usage("""
+    cfathread                            : print userCluster tasks, application tasks only
+    cfathread <clusterName>              : print cluster tasks, application tasks only
+    cfathread all                        : print all clusters, all tasks
+    cfathread <id>                       : switch stack to thread id on userCluster
+    cfathread 0x<address>                    : switch stack to thread on any cluster
+    cfathread <id> <clusterName>         : switch stack to thread on specified cluster
+    """)
+    ############################ AUXILIARY FUNCTIONS #########################
+    def print_formatted(self, marked, tid, name, state, address):
+        print('{:>1}  {:>4}  {:>20}  {:>10}  {:>20}'.format('*' if marked else ' ', tid, name, state, address))
+    def print_thread(self, thread, tid, marked):
+        cfa_t = get_cfa_types()
+        self.print_formatted(marked, tid, thread['self_cor']['name'].string(), str(thread['state'].cast(cfa_t.thread_state)), str(thread))
+    def print_formatted_cluster(self, str_format, cluster_name, cluster_addr):
+        print(str_format.format(cluster_name, cluster_addr))
+    def print_tasks_by_cluster_all(self, cluster_address):
+        """
+        Display a list of all info about all available tasks on a particular cluster
+        @cluster_address: gdb.Value
+        """
+        cluster_address = cluster_address.cast(uCPPTypes.ucluster_ptr)
+        task_root = cluster_address['tasksOnCluster']['root']
+        if task_root == 0x0 or task_root.address == 0x0:
+            print('There are no tasks for cluster at address: {}'.format(cluster_address))
+            return
+        self.print_formatted_task('', 'Task Name', 'Address', 'State')
+        curr = task_root
+        task_id = 0
+        systask_id = -1
+        breakpoint_addr = self.find_curr_breakpoint_addr()
+        if breakpoint_addr is None:
+            return
+        while True:
+            global SysTask_Name
+            if (curr['task_']['name'].string() in SysTask_Name):
+                self.print_formatted_tasks(systask_id, breakpoint_addr, curr)
+                systask_id -= 1
+            else:
+                self.print_formatted_tasks(task_id, breakpoint_addr, curr)
+                task_id += 1
+            curr = curr['next'].cast(uCPPTypes.uBaseTaskDL_ptr_type)
+            if curr == task_root:
+                break
+    def print_tasks_by_cluster_address_all(self, cluster_address):
+        """
+        Display a list of all info about all available tasks on a particular cluster
+        @cluster_address: str
+        """
+        # Iterate through a circular linked list of tasks and print out its
+        # name along with address associated to each cluster
+        # convert hex string to hex number
+        try:
+            hex_addr = int(cluster_address, 16)
+        except:
+            self.print_usage()
+            return
+        cluster_address = gdb.Value(hex_addr)
+        if not self.print_tasks_by_cluster_all(cluster_address):
+            return
+    def print_threads_by_cluster(self, cluster, print_system = False):
+        """
+        Display a list of limited info about all available threads on a particular cluster
+        @cluster: str
+        @print_system: bool
+        """
+        # Iterate through a circular linked list of tasks and print out its
+        # name along with address associated to each cluster
+        threads = self.threads_by_cluster(cluster)
+        if not threads:
+            return
+        running_thread = self.find_curr_thread()
+        if running_thread is None:
+            print('Could not identify current thread')
+        self.print_formatted(False, '', 'Name', 'State', 'Address')
+        for t in threads:
+            if not t.is_system() or print_system:
+                self.print_thread(t.value, t.tid, t.value == running_thread if running_thread else False)
+        print()
+    ############################ COMMAND FUNCTIONS #########################
+    def print_all_threads(self):
+        """Iterate through each cluster, iterate through all tasks and  print out info about all the tasks
+        in those clusters"""
+        uCPPTypes = None
+        try:
+            uCPPTypes = get_uCPP_types()
+        except gdb.error:
+            print(not_supported_error_msg)
+            print(gdb.error)
+            return
+        cluster_root = get_cluster_root()
+        if cluster_root.address == 0x0:
+            return
+        curr = cluster_root
+        self.print_formatted_cluster(self.cluster_str_format, 'Cluster Name', 'Address')
+        while True:
+            addr = str(curr['cluster_'].reference_value())[1:]
+            self.print_formatted_cluster(self.cluster_str_format, curr['cluster_']['name'].string(), addr)
+            self.print_tasks_by_cluster_address_all(addr)
+            curr = curr['next'].cast(uCPPTypes.uClusterDL_ptr_type)
+            if curr == cluster_root:
+                break
+    def switchto(self, thread):
+        """Change to a new task by switching to a different stack and manually
+        adjusting sp, fp and pc
+        @task_address: str
+supported format:
+                in hex format
+                    <hex_address>: literal hexadecimal address
+                    Ex: 0xffffff
+                in name of the pointer to the task
+                    "task_name": pointer of the variable name of the cluster
+                        Ex: T* s -> task_name = s
+            Return: gdb.value of the cluster's address
+        """
+        # uCPPTypes = None
+        # try:
+        #     uCPPTypes = get_uCPP_types()
+        # except gdb.error:
+        #     print(not_supported_error_msg)
+        #     print(gdb.error)
+        #     return
+        # # Task address has a format "task_address", which implies that it is the
+        # # name of the variable, and it needs to be evaluated
+        # if task_address.startswith('"') and task_address.endswith('"'):
+        #     task = gdb.parse_and_eval(task_address.replace('"', ''))
+        # else:
+        # # Task address format does not include the quotation marks, which implies
+        # # that it is a hex address
+        #     # convert hex string to hex number
+        #     try:
+        #         hex_addr = int(task_address, 16)
+        #     except:
+        #         self.print_usage()
+        #         return
+        #     task_address = gdb.Value(hex_addr)
+        #     task = task_address.cast(uCPPTypes.uBaseTask_ptr_type)
+        try:
+            if not gdb.lookup_symbol('__cfactx_switch'):
+                print('__cfactx_switch symbol is unavailable')
+                return
+        except:
+            print('here 3')
+        cfa_t = get_cfa_types()
+        state = thread['state'].cast(cfa_t.thread_state)
+        try:
+            if state == gdb.parse_and_eval('Halted'):
+                print('Cannot switch to a terminated thread')
+                return
+            if state == gdb.parse_and_eval('Start'):
+                print('Cannjot switch to a thread not yet run')
+                return
+        except:
+            print("here 2")
+            return
+        context = thread['context']
+        # lookup for sp,fp and uSwitch
+        xsp = context['SP'] + 48
+        xfp = context['FP']
+        # convert string so we can strip out the address
+        try:
+            xpc = get_addr(gdb.parse_and_eval('__cfactx_switch').address + 28)
+        except:
+            print("here")
+            return
+        # must be at frame 0 to set pc register
+        gdb.execute('select-frame 0')
+        # push sp, fp, pc into a global stack
+        global STACK
+        sp = gdb.parse_and_eval('$sp')
+        fp = gdb.parse_and_eval('$fp')
+        pc = gdb.parse_and_eval('$pc')
+        stack_info = StackInfo(sp = sp, fp = fp, pc = pc)
+        STACK.append(stack_info)
+        # update registers for new task
+        print('switching to ')
+        gdb.execute('set $rsp={}'.format(xsp))
+        gdb.execute('set $rbp={}'.format(xfp))
+        gdb.execute('set $pc={}'.format(xpc))
+    def find_matching_gdb_thread_id():
+        """
+        Parse the str from info thread to get the number
+        """
+        info_thread_str = gdb.execute('info thread', to_string=True).splitlines()
+        for thread_str in info_thread_str:
+            if thread_str.find('this={}'.format(task)) != -1:
+                thread_id_pattern = r'^\*?\s+(\d+)\s+Thread'
+                # retrive gdb thread id
+                return re.match(thread_id_pattern, thread_str).group(1)
+            # check if the task is running or not
+            if task_state == gdb.parse_and_eval('uBaseTask::Running'):
+                # find the equivalent thread from info thread
+                gdb_thread_id = find_matching_gdb_thread_id()
+                if gdb_thread_id is None:
+                    print('cannot find the thread id to switch to')
+                    return
+                # switch to that thread based using thread command
+                gdb.execute('thread {}'.format(gdb_thread_id))
+    def switchto_id(self, tid, cluster):
+        """
+        @cluster: cluster object
+        @tid: int
+        """
+        threads = self.threads_by_cluster( cluster )
+        for t in threads:
+            if t.tid == tid:
+                self.switchto(t.value)
+                return
+        print("Cound not find thread by id '{}'".format(tid))
+    def invoke(self, arg, from_tty):
+        """
+        @arg: str
+        @from_tty: bool
+        """
+        if not is_cforall():
+            return
+        argv = parse(arg)
+        print(argv)
+        if len(argv) == 0:
+            """
+            Iterate only Main Thread, print only tasks and main
+            """
+            cluster = lookup_cluster()
+            if not cluster:
+                print("Could not find Main Cluster")
+                return
+            # only tasks and main
+            self.print_threads_by_cluster(cluster, False)
+        elif len(argv) == 1:
+            if argv[0] == 'help':
+                self.print_usage()
+            # push task
+            elif argv[0].isdigit():
+                cluster = lookup_cluster()
+                if not cluster:
+                    print("Could not find Main Cluster")
+                    return
+                try:
+                    tid = int(argv[0])
+                except:
+                    print("'{}' not a valid thread id".format(argv[0]))
+                    self.print_usage()
+                    return
+                 # by id, userCluster
+                self.switchto_id(tid, cluster)
+            elif argv[0].startswith('0x') or argv[0].startswith('0X'):
+                self.switchto(argv[0]) # by address, any cluster
+            # print tasks
+            elif argv[0] == 'all':
+                self.print_all_threads() # all tasks, all clusters
+            else:
+                """
+                Print out all the tasks available in the specified cluster
+                @cluster_name: str
+                """
+                print("cfathread by name")
+                cluster = lookup_cluster(argv[0])
+                if not cluster:
+                    return
+                # all tasks, specified cluster
+                self.print_threads_by_cluster(cluster, True)
+        elif len(argv) == 2:
+            # push task
+            self.pushtask_by_id(argv[0], argv[1]) # by id, specified cluster
+        else:
+            print('Invalid arguments')
+            self.print_usage()
+        """Cforall: Switch to specified user threads
+Usage:
+        cfathread <id>                       : switch stack to thread id on main cluster
+        cfathread 0x<address>                : switch stack to thread on any cluster
+        cfathread <id> <clusterName>         : switch stack to thread on specified cluster
+        """
+        def __init__(self):
+                # The first parameter of the line below is the name of the command. You
+                # can call it 'uc++ task'
+                super(Thread, self).__init__('cfathread', gdb.COMMAND_USER)
+        ############################ AUXILIARY FUNCTIONS #########################
+        def switchto(self, thread):
+                """Change to a new task by switching to a different stack and manually
+                adjusting sp, fp and pc
+                @task_address: str
+supported format:
+                                in hex format
+                                        <hex_address>: literal hexadecimal address
+                                        Ex: 0xffffff
+                                in name of the pointer to the task
+                                        "task_name": pointer of the variable name of the cluster
+                                                Ex: T* s -> task_name = s
+                        Return: gdb.value of the cluster's address
+                """
+                try:
+                        if not gdb.lookup_symbol('__cfactx_switch'):
+                                print('__cfactx_switch symbol is unavailable')
+                                return
+                except:
+                        print('here 3')
+                cfa_t = get_cfa_types()
+                state = thread['state'].cast(cfa_t.thread_state)
+                try:
+                        if state == gdb.parse_and_eval('Halted'):
+                                print('Cannot switch to a terminated thread')
+                                return
+                        if state == gdb.parse_and_eval('Start'):
+                                print('Cannjot switch to a thread not yet run')
+                                return
+                except:
+                        print("here 2")
+                        return
+                context = thread['context']
+                # lookup for sp,fp and uSwitch
+                xsp = context['SP'] + 48
+                xfp = context['FP']
+                # convert string so we can strip out the address
+                try:
+                        xpc = get_addr(gdb.parse_and_eval('__cfactx_switch').address + 28)
+                except:
+                        print("here")
+                        return
+                # must be at frame 0 to set pc register
+                gdb.execute('select-frame 0')
+                # push sp, fp, pc into a global stack
+                global STACK
+                sp = gdb.parse_and_eval('$sp')
+                fp = gdb.parse_and_eval('$fp')
+                pc = gdb.parse_and_eval('$pc')
+                stack_info = StackInfo(sp = sp, fp = fp, pc = pc)
+                STACK.append(stack_info)
+                # update registers for new task
+                print('switching to ')
+                gdb.execute('set $rsp={}'.format(xsp))
+                gdb.execute('set $rbp={}'.format(xfp))
+                gdb.execute('set $pc={}'.format(xpc))
+        def find_matching_gdb_thread_id():
+                """
+                Parse the str from info thread to get the number
+                """
+                info_thread_str = gdb.execute('info thread', to_string=True).splitlines()
+                for thread_str in info_thread_str:
+                        if thread_str.find('this={}'.format(task)) != -1:
+                                thread_id_pattern = r'^\*?\s+(\d+)\s+Thread'
+                                # retrive gdb thread id
+                                return re.match(thread_id_pattern, thread_str).group(1)
+                        # check if the task is running or not
+                        if task_state == gdb.parse_and_eval('uBaseTask::Running'):
+                                # find the equivalent thread from info thread
+                                gdb_thread_id = find_matching_gdb_thread_id()
+                                if gdb_thread_id is None:
+                                        print('cannot find the thread id to switch to')
+                                        return
+                                # switch to that thread based using thread command
+                                gdb.execute('thread {}'.format(gdb_thread_id))
+        def switchto_id(self, tid, cluster):
+                """
+                @cluster: cluster object
+                @tid: int
+                """
+                threads = lookup_threads_by_cluster( cluster )
+                for t in threads:
+                        if t.tid == tid:
+                                self.switchto(t.value)
+                                return
+                print("Cound not find thread by id '{}'".format(tid))
+        def invoke(self, arg, from_tty):
+                """
+                @arg: str
+                @from_tty: bool
+                """
+                if not is_cforall():
+                        return
+                argv = parse(arg)
+                print(argv)
+                if argv[0].isdigit():
+                        cname = " ".join(argv[1:]) if len(argv) > 1 else None
+                        cluster = lookup_cluster(cname)
+                        if not cluster:
+                                print("Could not find cluster '{}'".format(cname if cname else "Main Cluster"))
+                                return
+                        try:
+                                tid = int(argv[0])
+                        except:
+                                print("'{}' not a valid thread id".format(argv[0]))
+                                print_usage(self)
+                                return
+                                # by id, userCluster
+                        self.switchto_id(tid, cluster)
+                elif argv[0].startswith('0x') or argv[0].startswith('0X'):
+                        self.switchto(argv[0]) # by address, any cluster
 ############
 class PrevThread(gdb.Command):
     """Switch back to previous task on the stack"""
     usage_msg = 'prevtask'
     def __init__(self):
         super(PrevThread, self).__init__('prevtask', gdb.COMMAND_USER)
     def invoke(self, arg, from_tty):
         """
         @arg: str
         @from_tty: bool
         """
         global STACK
         if len(STACK) != 0:
             # must be at frame 0 to set pc register
             gdb.execute('select-frame 0')
             # pop stack
             stack_info = STACK.pop()
             pc = get_addr(stack_info.pc)
             sp = stack_info.sp
             fp = stack_info.fp
             # pop sp, fp, pc from global stack
             adjust_stack(pc, fp, sp)
             # must be at C++ frame to access C++ vars
             gdb.execute('frame 1')
         else:
             print('empty stack')
+        """Switch back to previous task on the stack"""
+        usage_msg = 'prevtask'
+        def __init__(self):
+                super(PrevThread, self).__init__('prevtask', gdb.COMMAND_USER)
+        def invoke(self, arg, from_tty):
+                """
+                @arg: str
+                @from_tty: bool
+                """
+                global STACK
+                if len(STACK) != 0:
+                        # must be at frame 0 to set pc register
+                        gdb.execute('select-frame 0')
+                        # pop stack
+                        stack_info = STACK.pop()
+                        pc = get_addr(stack_info.pc)
+                        sp = stack_info.sp
+                        fp = stack_info.fp
+                        # pop sp, fp, pc from global stack
+                        adjust_stack(pc, fp, sp)
+                        # must be at C++ frame to access C++ vars
+                        gdb.execute('frame 1')
+                else:
+                        print('empty stack')
 class ResetOriginFrame(gdb.Command):
     """Reset to the origin frame prior to continue execution again"""
     usage_msg = 'resetOriginFrame'
     def __init__(self):
         super(ResetOriginFrame, self).__init__('reset', gdb.COMMAND_USER)
     def invoke(self, arg, from_tty):
         """
         @arg: str
         @from_tty: bool
         """
         global STACK
         if len(STACK) != 0:
             stack_info = STACK.pop(0)
             STACK.clear()
             pc = get_addr(stack_info.pc)
             sp = stack_info.sp
             fp = stack_info.fp
             # pop sp, fp, pc from global stack
             adjust_stack(pc, fp, sp)
             # must be at C++ frame to access C++ vars
             gdb.execute('frame 1')
         #else:
             #print('reset: empty stack') #probably does not have to print msg
+        """Reset to the origin frame prior to continue execution again"""
+        usage_msg = 'resetOriginFrame'
+        def __init__(self):
+                super(ResetOriginFrame, self).__init__('reset', gdb.COMMAND_USER)
+        def invoke(self, arg, from_tty):
+                """
+                @arg: str
+                @from_tty: bool
+                """
+                global STACK
+                if len(STACK) != 0:
+                        stack_info = STACK.pop(0)
+                        STACK.clear()
+                        pc = get_addr(stack_info.pc)
+                        sp = stack_info.sp
+                        fp = stack_info.fp
+                        # pop sp, fp, pc from global stack
+                        adjust_stack(pc, fp, sp)
+                        # must be at C++ frame to access C++ vars
+                        gdb.execute('frame 1')
+                #else:
+                        #print('reset: empty stack') #probably does not have to print msg
 Clusters()
 …
 PrevThread()
 Threads()
+Thread()
 # Local Variables: #

Note: See TracChangeset for help on using the changeset viewer.

Context Navigation

Changes in / [6091b88a:d45ed83]

Legend:

Download in other formats: