Changeset 7a80113
- Timestamp:
- Sep 22, 2020, 11:29:12 AM (2 years ago)
- Branches:
- arm-eh, enum, forall-pointer-decay, jacob/cs343-translation, master, new-ast-unique-expr, pthread-emulation, qualifiedEnum
- Children:
- 0a945fd
- Parents:
- 1c507eb (diff), 08f3ad3 (diff)
Note: this is a merge changeset, the changes displayed below correspond to the merge itself.
Use the(diff)
links above to see all the changes relative to each parent. - Files:
-
- 18 added
- 57 edited
- 1 moved
Legend:
- Unmodified
- Added
- Removed
-
Jenkinsfile
r1c507eb r7a80113 102 102 103 103 echo GitLogMessage() 104 105 // This is a complete hack but it solves problems with automake thinking it needs to regenerate makefiles106 // We fudged automake/missing to handle that but automake stills bakes prints inside the makefiles107 // and these cause more problems.108 sh 'find . -name Makefile.in -exec touch {} +'109 104 } 110 105 } … … 465 460 description: 'Which compiler to use', \ 466 461 name: 'Compiler', \ 467 choices: 'gcc-9\ngcc-8\ngcc-7\ngcc-6\ngcc-5\ngcc-4.9\nclang', 462 choices: 'gcc-9\ngcc-8\ngcc-7\ngcc-6\ngcc-5\ngcc-4.9\nclang', \ 468 463 defaultValue: 'gcc-8', \ 469 464 ], \ -
benchmark/io/http/filecache.cfa
r1c507eb r7a80113 73 73 cache_line * entries; 74 74 size_t size; 75 int * rawfds; 76 int nfds; 75 77 } file_cache; 76 78 … … 98 100 } 99 101 100 int put_file( cache_line & entry ) {102 int put_file( cache_line & entry, int fd ) { 101 103 uint32_t idx = murmur3_32( (const uint8_t *)entry.file, strlen(entry.file), options.file_cache.hash_seed ) % file_cache.size; 102 104 … … 108 110 109 111 file_cache.entries[idx] = entry; 112 file_cache.entries[idx].fd = fd; 110 113 return i > 0 ? 1 : 0; 111 114 } … … 121 124 size_t fcount = 0; 122 125 size_t fsize = 16; 123 cache_line * raw = 0p; 124 raw = alloc(raw, fsize, true); 126 cache_line * raw = alloc(fsize); 125 127 // Step 1 get a dense array of all files 126 128 int walk(const char *fpath, const struct stat *sb, int typeflag) { … … 131 133 if(fcount > fsize) { 132 134 fsize *= 2; 133 raw = alloc( raw, fsize, true);135 raw = alloc(fsize, raw`realloc); 134 136 } 135 137 … … 162 164 file_cache.entries = anew(file_cache.size); 163 165 166 if(options.file_cache.fixed_fds) { 167 file_cache.nfds = fcount; 168 file_cache.rawfds = alloc(fcount); 169 } 170 164 171 // Step 3 fill the cache 165 172 int conflicts = 0; 166 173 for(i; fcount) { 167 conflicts += put_file( raw[i] ); 174 int fd; 175 if(options.file_cache.fixed_fds) { 176 file_cache.rawfds[i] = raw[i].fd; 177 fd = i; 178 } 179 else { 180 fd = raw[i].fd; 181 } 182 conflicts += put_file( raw[i], fd ); 168 183 } 169 184 printf("Filled cache from path \"%s\" with %zu files\n", path, fcount); … … 197 212 } 198 213 199 return [aalloc(extra), 0]; 214 size_t s = file_cache.nfds + extra; 215 int * data = alloc(s, file_cache.rawfds`realloc); 216 return [data, file_cache.nfds]; 200 217 } 201 218 -
benchmark/io/http/main.cfa
r1c507eb r7a80113 12 12 #include <kernel.hfa> 13 13 #include <stats.hfa> 14 #include <time.hfa> 14 15 #include <thread.hfa> 15 16 16 #include "channel.hfa"17 17 #include "filecache.hfa" 18 18 #include "options.hfa" 19 19 #include "worker.hfa" 20 20 21 extern void register_fixed_files( cluster &, int *, unsigned count ); 22 23 Duration default_preemption() { 24 return 0; 25 } 26 21 27 //============================================================================================= 22 28 // Globals 23 29 //============================================================================================= 24 channel & wait_connect;25 26 30 struct ServerProc { 27 31 processor self; … … 84 88 // Run Server Cluster 85 89 { 86 cluster cl = { "Server Cluster", options.clopts. flags };90 cluster cl = { "Server Cluster", options.clopts.params }; 87 91 #if !defined(__CFA_NO_STATISTICS__) 88 92 print_stats_at_exit( cl, CFA_STATS_READY_Q | CFA_STATS_IO ); 89 93 #endif 90 94 options.clopts.instance = &cl; 91 92 channel chan = { options.clopts.chan_size };93 &wait_connect = &chan;94 95 95 96 int pipe_cnt = options.clopts.nworkers * 2; … … 102 103 } 103 104 105 if(options.file_cache.fixed_fds) { 106 register_fixed_files(cl, fds, pipe_off); 107 } 108 104 109 { 105 110 ServerProc procs[options.clopts.nprocs]; … … 107 112 Worker workers[options.clopts.nworkers]; 108 113 for(i; options.clopts.nworkers) { 109 if( options.file_cache.fixed_fds ) { 110 workers[i].pipe[0] = pipe_off + (i * 2) + 0; 111 workers[i].pipe[1] = pipe_off + (i * 2) + 1; 112 } 113 else { 114 // if( options.file_cache.fixed_fds ) { 115 // workers[i].pipe[0] = pipe_off + (i * 2) + 0; 116 // workers[i].pipe[1] = pipe_off + (i * 2) + 1; 117 // } 118 // else 119 { 114 120 workers[i].pipe[0] = fds[pipe_off + (i * 2) + 0]; 115 121 workers[i].pipe[1] = fds[pipe_off + (i * 2) + 1]; 122 workers[i].sockfd = server_fd; 123 workers[i].addr = (struct sockaddr *)&address; 124 workers[i].addrlen = (socklen_t*)&addrlen; 125 workers[i].flags = 0; 116 126 } 117 127 unpark( workers[i] __cfaabi_dbg_ctx2 ); … … 119 129 printf("%d workers started on %d processors\n", options.clopts.nworkers, options.clopts.nprocs); 120 130 { 121 Acceptor acceptor = { server_fd, (struct sockaddr *)&address, (socklen_t*)&addrlen, 0 };122 123 131 char buffer[128]; 124 132 while(!feof(stdin)) { … … 127 135 128 136 printf("Shutting Down\n"); 129 }130 printf("Acceptor Closed\n");131 132 // Clean-up the workers133 for(options.clopts.nworkers) {134 put( wait_connect, -1 );135 137 } 136 138 } -
benchmark/io/http/options.cfa
r1c507eb r7a80113 31 31 1, // nworkers; 32 32 0, // flags; 33 10, // chan_size;34 33 false, // procstats 35 34 false, // viewhalts … … 39 38 40 39 const char * parse_options( int argc, char * argv[] ) { 41 bool uthrdpo = false;42 40 bool subthrd = false; 43 41 bool eagrsub = false; … … 52 50 {'t', "threads", "Number of worker threads to use", options.clopts.nworkers}, 53 51 {'b', "accept-backlog", "Maximum number of pending accepts", options.socket.backlog}, 54 {'B', "channel-size", "Maximum number of accepted connection pending", options.clopts.chan_size},55 52 {'r', "request_len", "Maximum number of bytes in the http request, requests with more data will be answered with Http Code 414", options.socket.buflen}, 56 53 {'S', "seed", "seed to use for hashing", options.file_cache.hash_seed }, 57 54 {'C', "cache-size", "Size of the cache to use, if set to small, will uses closes power of 2", options.file_cache.size }, 58 55 {'l', "list-files", "List the files in the specified path and exit", options.file_cache.list, parse_settrue }, 59 {'u', "userthread", "If set, cluster uses user-thread to poll I/O", uthrdpo, parse_settrue },60 56 {'s', "submitthread", "If set, cluster uses polling thread to submit I/O", subthrd, parse_settrue }, 61 57 {'e', "eagersubmit", "If set, cluster submits I/O eagerly but still aggregates submits", eagrsub, parse_settrue}, … … 71 67 parse_args( argc, argv, opt, opt_cnt, "[OPTIONS]... [PATH]\ncforall http server", left ); 72 68 73 if( uthrdpo ) { 74 options.clopts.flags |= CFA_CLUSTER_IO_POLLER_USER_THREAD; 75 } 76 77 if( subthrd ) { 78 options.clopts.flags |= CFA_CLUSTER_IO_POLLER_THREAD_SUBMITS; 79 } 80 81 if( eagrsub ) { 82 options.clopts.flags |= CFA_CLUSTER_IO_EAGER_SUBMITS; 83 } 69 options.clopts.params.poller_submits = subthrd; 70 options.clopts.params.eager_submits = eagrsub; 84 71 85 72 if( fixedfd ) { … … 88 75 89 76 if( sqkpoll ) { 90 options.clopts. flags |= CFA_CLUSTER_IO_KERNEL_POLL_SUBMITS;77 options.clopts.params.poll_submit = true; 91 78 options.file_cache.fixed_fds = true; 92 79 } 93 80 94 81 if( iokpoll ) { 95 options.clopts. flags |= CFA_CLUSTER_IO_KERNEL_POLL_COMPLETES;82 options.clopts.params.poll_complete = true; 96 83 options.file_cache.open_flags |= O_DIRECT; 97 84 } 98 85 99 options.clopts. flags |= (sublen << CFA_CLUSTER_IO_BUFFLEN_OFFSET);86 options.clopts.params.num_ready = sublen; 100 87 101 88 if( left[0] == 0p ) { return "."; } -
benchmark/io/http/options.hfa
r1c507eb r7a80113 2 2 3 3 #include <stdint.h> 4 5 #include <kernel.hfa> 4 6 5 7 struct cluster; … … 23 25 int nprocs; 24 26 int nworkers; 25 int flags; 26 int chan_size; 27 io_context_params params; 27 28 bool procstats; 28 29 bool viewhalts; -
benchmark/io/http/protocol.cfa
r1c507eb r7a80113 11 11 extern "C" { 12 12 int snprintf ( char * s, size_t n, const char * format, ... ); 13 #include <linux/io_uring.h> 13 14 } 14 15 #include <string.h> 15 16 16 #include <errno.h> 17 17 18 #include "options.hfa" 18 19 19 20 const char * http_msgs[] = { … … 74 75 READ: 75 76 for() { 76 int ret = cfa_read(fd, it, count);77 if(ret == 0 ) return [OK200, true, 0 p, 0];77 int ret = cfa_read(fd, (void*)it, count, 0, -1`s, 0p, 0p); 78 if(ret == 0 ) return [OK200, true, 0, 0]; 78 79 if(ret < 0 ) { 79 80 if( errno == EAGAIN || errno == EWOULDBLOCK) continue READ; … … 88 89 count -= ret; 89 90 90 if( count < 1 ) return [E414, false, 0 p, 0];91 if( count < 1 ) return [E414, false, 0, 0]; 91 92 } 92 93 … … 95 96 it = buffer; 96 97 int ret = memcmp(it, "GET /", 5); 97 if( ret != 0 ) return [E400, false, 0 p, 0];98 if( ret != 0 ) return [E400, false, 0, 0]; 98 99 it += 5; 99 100 … … 106 107 ssize_t ret; 107 108 SPLICE1: while(count > 0) { 108 ret = cfa_splice(ans_fd, &offset, pipe[1], 0p, count, SPLICE_F_MOVE | SPLICE_F_MORE );109 ret = cfa_splice(ans_fd, &offset, pipe[1], 0p, count, SPLICE_F_MOVE | SPLICE_F_MORE, 0, -1`s, 0p, 0p); 109 110 if( ret < 0 ) { 110 111 if( errno != EAGAIN && errno != EWOULDBLOCK) continue SPLICE1; … … 116 117 size_t in_pipe = ret; 117 118 SPLICE2: while(in_pipe > 0) { 118 ret = cfa_splice(pipe[0], 0p, fd, 0p, in_pipe, SPLICE_F_MOVE | SPLICE_F_MORE );119 ret = cfa_splice(pipe[0], 0p, fd, 0p, in_pipe, SPLICE_F_MOVE | SPLICE_F_MORE, 0, -1`s, 0p, 0p); 119 120 if( ret < 0 ) { 120 121 if( errno != EAGAIN && errno != EWOULDBLOCK) continue SPLICE2; -
benchmark/io/http/worker.cfa
r1c507eb r7a80113 28 28 CONNECTION: 29 29 for() { 30 int fd = take(wait_connect); 31 if (fd < 0) break; 30 int fd = cfa_accept4( this.[sockfd, addr, addrlen, flags], 0, -1`s, 0p, 0p ); 31 if(fd < 0) { 32 if( errno == ECONNABORTED ) break; 33 abort( "accept error: (%d) %s\n", (int)errno, strerror(errno) ); 34 } 32 35 33 36 printf("New connection %d, waiting for requests\n", fd); … … 82 85 } 83 86 } 84 85 //=============================================================================================86 // Acceptor Thread87 //=============================================================================================88 void ?{}( Acceptor & this, int sockfd, struct sockaddr * addr, socklen_t * addrlen, int flags ) {89 ((thread&)this){ "Acceptor Thread", *options.clopts.instance };90 this.sockfd = sockfd;91 this.addr = addr;92 this.addrlen = addrlen;93 this.flags = flags;94 }95 96 void main( Acceptor & this ) {97 for() {98 int ret = cfa_accept4( this.[sockfd, addr, addrlen, flags] );99 if(ret < 0) {100 if( errno == ECONNABORTED ) break;101 abort( "accept error: (%d) %s\n", (int)errno, strerror(errno) );102 }103 104 printf("New connection accepted\n");105 put( wait_connect, ret );106 }107 } -
benchmark/io/http/worker.hfa
r1c507eb r7a80113 7 7 } 8 8 9 #include "channel.hfa"10 11 extern channel & wait_connect;12 13 9 //============================================================================================= 14 10 // Worker Thread … … 17 13 thread Worker { 18 14 int pipe[2]; 19 };20 void ?{}( Worker & this );21 void main( Worker & );22 23 //=============================================================================================24 // Acceptor Thread25 //=============================================================================================26 thread Acceptor {27 15 int sockfd; 28 16 struct sockaddr * addr; … … 30 18 int flags; 31 19 }; 32 33 void ?{}( Acceptor & this, int sockfd, struct sockaddr * addr, socklen_t * addrlen, int flags ); 34 void main( Acceptor & this ); 20 void ?{}( Worker & this); 21 void main( Worker & ); -
doc/theses/thierry_delisle_PhD/comp_II/comp_II.tex
r1c507eb r7a80113 60 60 \section{Introduction} 61 61 \subsection{\CFA and the \CFA concurrency package} 62 \CFA \cite{Moss18} is a modern, polymorphic, non-object-oriented, concurrent, backwards-compatible extension of the C programming language.62 \CFA~\cite{Moss18} is a modern, polymorphic, non-object-oriented, concurrent, backwards-compatible extension of the C programming language. 63 63 It aims to add high-productivity features while maintaining the predictable performance of C. 64 As such, concurrency in \CFA \cite{Delisle19} aims to offer simple and safe high-level tools while still allowing performant code.65 \CFA concurrent code is written in the synchronous programming paradigm but uses \glspl{uthrd} in orderto achieve the simplicity and maintainability of synchronous programming without sacrificing the efficiency of asynchronous programming.64 As such, concurrency in \CFA~\cite{Delisle19} aims to offer simple and safe high-level tools while still allowing performant code. 65 \CFA concurrent code is written in the synchronous programming paradigm but uses \glspl{uthrd} to achieve the simplicity and maintainability of synchronous programming without sacrificing the efficiency of asynchronous programming. 66 66 As such, the \CFA \newterm{scheduler} is a preemptive user-level scheduler that maps \glspl{uthrd} onto \glspl{kthrd}. 67 67 68 \subsection{Scheduling} 68 69 \newterm{Scheduling} occurs when execution switches from one thread to another, where the second thread is implicitly chosen by the scheduler. 69 This scheduling is an indirect handoff, as opposed to generators and coroutines whichexplicitly switch to the next generator and coroutine respectively.70 This scheduling is an indirect handoff, as opposed to generators and coroutines that explicitly switch to the next generator and coroutine respectively. 70 71 The cost of switching between two threads for an indirect handoff has two components: 71 72 \begin{enumerate} … … 75 76 and the cost of scheduling, \ie deciding which thread to run next among all the threads ready to run. 76 77 \end{enumerate} 77 The first cost is generally constant and fixed\footnote{Affecting the constant context-switch cost is whether it is done in one step, after the scheduling, or in two steps, context-switching to a third fixed thread before scheduling.}, while the scheduling cost can vary based on the system state.78 Adding multiple \glspl{kthrd} does not fundamentally change the scheduler semantics or requirements, it simply adds new correctness requirements, \ie \newterm{linearizability}\footnote{Meaning , however fast the CPU threads run, there is an equivalent sequential order that gives the same result.}, and a new dimension to performance: scalability, where scheduling cost nowalso depends on contention.78 The first cost is generally constant\footnote{Affecting the constant context-switch cost is whether it is done in one step, where the first thread schedules the second, or in two steps, where the first thread context switches to a third scheduler thread.}, while the scheduling cost can vary based on the system state. 79 Adding multiple \glspl{kthrd} does not fundamentally change the scheduler semantics or requirements, it simply adds new correctness requirements, \ie \newterm{linearizability}\footnote{Meaning however fast the CPU threads run, there is an equivalent sequential order that gives the same result.}, and a new dimension to performance: scalability, where scheduling cost also depends on contention. 79 80 The more threads switch, the more the administration cost of scheduling becomes noticeable. 80 81 It is therefore important to build a scheduler with the lowest possible cost and latency. 81 82 Another important consideration is \newterm{fairness}. 82 83 In principle, scheduling should give the illusion of perfect fairness, where all threads ready to run are running \emph{simultaneously}. 84 In practice, there can be advantages to unfair scheduling, similar to the express cash register at a grocery store. 83 85 While the illusion of simultaneity is easier to reason about, it can break down if the scheduler allows too much unfairness. 84 86 Therefore, the scheduler should offer as much fairness as needed to guarantee eventual progress, but use unfairness to help performance. 85 In practice, threads must wait in turn but there can be advantages to unfair scheduling, similar to the express cash register at a grocery store. 86 87 The goal of this research is to produce a scheduler that is simple for programmers to understand and offers good performance.87 88 \subsection{Research Goal} 89 The goal of this research is to produce a scheduler that is simple for programmers to understand and offers good general performance. 88 90 Here understandability does not refer to the API but to how much scheduling concerns programmers need to take into account when writing a \CFA concurrent package. 89 Therefore, the main goal of this proposal is :91 Therefore, the main consequence of this goal is : 90 92 \begin{quote} 91 93 The \CFA scheduler should be \emph{viable} for \emph{any} workload. 92 94 \end{quote} 93 95 94 For a general-purpose scheduler, it is impossible to produce an optimal algorithm as it would requireknowledge of the future behaviour of threads.95 As such, scheduling performance is generally either defined by the best-case scenario, \ie a workload to which the scheduler is tailored, or theworst-case scenario, \ie the scheduler behaves no worse than \emph{X}.96 For a general-purpose scheduler, it is impossible to produce an optimal algorithm as that requires knowledge of the future behaviour of threads. 97 As such, scheduling performance is generally either defined by a best-case scenario, \ie a workload to which the scheduler is tailored, or a worst-case scenario, \ie the scheduler behaves no worse than \emph{X}. 96 98 For this proposal, the performance is evaluated using the second approach to allow \CFA programmers to rely on scheduling performance. 97 99 Because there is no optimal scheduler, ultimately \CFA may allow programmers to write their own scheduler; but that is not the subject of this proposal, which considers only the default scheduler. … … 103 105 \item creating an abstraction layer over the operating system to handle kernel-threads spinning unnecessarily, 104 106 \item scheduling blocking I/O operations, 105 \item and writing sufficient library tools to allow developers to indirectly use the scheduler, either through tuning knobs or replacing the default scheduler.107 \item and writing sufficient library tools to allow developers to indirectly use the scheduler, either through tuning knobs in the default scheduler or replacing the default scheduler. 106 108 \end{enumerate} 107 109 … … 119 121 \paragraph{Performance} The performance of a scheduler can generally be measured in terms of scheduling cost, scalability and latency. 120 122 \newterm{Scheduling cost} is the cost to switch from one thread to another, as mentioned above. 121 For simple applications, where a single kernel thread does most of the scheduling, it is generally the dominating cost. 122 \newterm{Scalability} is the cost of adding multiple kernel threads because it increases the time for context switching because of contention by multiple threads accessing shared resources, \eg the ready queue. 123 For compute-bound concurrent applications with little context switching, the scheduling cost is negligible. 124 For applications with high context-switch rates, scheduling cost can begin to dominating the cost. 125 \newterm{Scalability} is the cost of adding multiple kernel threads. 126 It can increase the time for scheduling because of contention from the multiple threads accessing shared resources, \eg a single ready queue. 123 127 Finally, \newterm{tail latency} is service delay and relates to thread fairness. 124 Specifically, latency measures how long a thread waits to run once scheduled and is evaluated inthe worst case.128 Specifically, latency measures how long a thread waits to run once scheduled and is evaluated by the worst case. 125 129 The \CFA scheduler should offer good performance for all three metrics. 126 130 … … 128 132 \newterm{Eventual progress} guarantees every scheduled thread is eventually run, \ie prevent starvation. 129 133 As a hard requirement, the \CFA scheduler must guarantee eventual progress, otherwise the above-mentioned illusion of simultaneous execution is broken and the scheduler becomes much more complex to reason about. 130 \newterm{Predictability} and \newterm{reliability} mean similar workloads achieve similar performance andprogrammer execution intuition is respected.134 \newterm{Predictability} and \newterm{reliability} mean similar workloads achieve similar performance so programmer execution intuition is respected. 131 135 For example, a thread that yields aggressively should not run more often than other threads. 132 136 While this is intuitive, it does not hold true for many work-stealing or feedback based schedulers. 133 The \CFA scheduler must guarantee eventual progress and should be predictableand offer reliable performance.137 The \CFA scheduler must guarantee eventual progress, should be predictable, and offer reliable performance. 134 138 135 139 \paragraph{Efficiency} Finally, efficient usage of CPU resources is also an important requirement and is discussed in depth towards the end of the proposal. 136 \newterm{Efficiency} means avoiding using CPU cycles when there are no threads to run , and conversely, use all CPUs availablewhen the workload can benefit from it.140 \newterm{Efficiency} means avoiding using CPU cycles when there are no threads to run (conserve energy/heat), and conversely, using as many available CPU cycles when the workload can benefit from it. 137 141 Balancing these two states is where the complexity lies. 138 142 The \CFA scheduler should be efficient with respect to the underlying (shared) computer. … … 146 150 \begin{enumerate} 147 151 \item Threads live long enough for useful feedback information to be gathered. 148 \item Threads belong to multiple users so fairness across threads is insufficient.152 \item Threads belong to multiple users so fairness across users is largely invisible. 149 153 \end{enumerate} 150 154 … … 158 162 Security concerns mean more precise and robust fairness metrics must be used to guarantee fairness across processes created by users as well as threads created within a process. 159 163 In the case of the \CFA scheduler, every thread runs in the same user space and is controlled by the same user. 160 Fairness across users is therefore a given and it is then possible to safely ignore the possibility that threads are malevolent. 161 This approach allows for a much simpler fairness metric and in this proposal \emph{fairness} is defined as: when multiple threads are cycling through the system, the total ordering of threads being scheduled, \ie pushed onto the ready queue, should not differ much from the total ordering of threads being executed, \ie popped from the ready queue. 164 Fairness across threads is therefore a given and it is then possible to safely ignore the possibility that threads are malevolent. 165 This approach allows for a much simpler fairness metric, and in this proposal, \emph{fairness} is defined as: 166 \begin{quote} 167 When multiple threads are cycling through the system, the total ordering of threads being scheduled, \ie pushed onto the ready queue, should not differ much from the total ordering of threads being executed, \ie popped from the ready queue. 168 \end{quote} 162 169 163 170 Since feedback is not necessarily feasible within the lifetime of all threads and a simple fairness metric can be used, the scheduling strategy proposed for the \CFA runtime does not use per-threads feedback. … … 169 176 Threads with equal priority are scheduled using a secondary strategy, often something simple like round robin or FIFO. 170 177 A consequence of priority is that, as long as there is a thread with a higher priority that desires to run, a thread with a lower priority does not run. 171 Th is possible starving of threads can dramatically increaseprogramming complexity since starving threads and priority inversion (prioritizing a lower priority thread) can both lead to serious problems.178 The potential for thread starvation dramatically increases programming complexity since starving threads and priority inversion (prioritizing a lower priority thread) can both lead to serious problems. 172 179 173 180 An important observation is that threads do not need to have explicit priorities for problems to occur. 174 Indeed, any system with multiple ready queues that attempts to exhaust one queue before accessing the other queues, essentially provide implicit priority, which can encounter starvation problems.181 Indeed, any system with multiple ready queues that attempts to exhaust one queue before accessing the other queues, essentially provides implicit priority, which can encounter starvation problems. 175 182 For example, a popular scheduling strategy that suffers from implicit priorities is work stealing. 176 183 \newterm{Work stealing} is generally presented as follows: … … 180 187 \item If a processor's ready queue is empty, attempt to run threads from some other processor's ready queue. 181 188 \end{enumerate} 182 183 189 In a loaded system\footnote{A \newterm{loaded system} is a system where threads are being run at the same rate they are scheduled.}, if a thread does not yield, block, or preempt for an extended period of time, threads on the same processor's list starve if no other processors exhaust their list. 184 190 185 Since priorities can be complex for programmers to incorporate into their execution intuition, the scheduling strategy proposed for the \CFA runtime does not use a strategy with either implicit or explicit threadpriorities.191 Since priorities can be complex for programmers to incorporate into their execution intuition, the \CFA scheduling strategy does not provided explicit priorities and attempts to eliminate implicit priorities. 186 192 187 193 \subsection{Schedulers without feedback or priorities} 188 194 This proposal conjectures that it is possible to construct a default scheduler for the \CFA runtime that offers good scalability and a simple fairness guarantee that is easy for programmers to reason about. 189 The simplest fairness guarantee is FIFO ordering, \ie threads scheduled first runfirst.195 The simplest fairness guarantee is FIFO ordering, \ie threads scheduled first come first. 190 196 However, enforcing FIFO ordering generally conflicts with scalability across multiple processors because of the additional synchronization. 191 197 Thankfully, strict FIFO is not needed for sufficient fairness. 192 198 Since concurrency is inherently non-deterministic, fairness concerns in scheduling are only a problem if a thread repeatedly runs before another thread can run. 193 Some relaxation is possible because non-determinism means programmers already handle ordering problems to produce correct code and hence rely on weak guarantees, \eg that a specific thread will \emph{eventually} run.199 Some relaxation is possible because non-determinism means programmers already handle ordering problems to produce correct code and hence rely on weak guarantees, \eg that a thread \emph{eventually} runs. 194 200 Since some reordering does not break correctness, the FIFO fairness guarantee can be significantly relaxed without causing problems. 195 201 For this proposal, the target guarantee is that the \CFA scheduler provides \emph{probable} FIFO ordering, which allows reordering but makes it improbable that threads are reordered far from their position in total ordering. 196 202 197 203 The \CFA scheduler fairness is defined as follows: 198 \begin{ itemize}199 \itemGiven two threads $X$ and $Y$, the odds that thread $X$ runs $N$ times \emph{after} thread $Y$ is scheduled but \emph{before} it is run, decreases exponentially with regard to $N$.200 \end{ itemize}204 \begin{quote} 205 Given two threads $X$ and $Y$, the odds that thread $X$ runs $N$ times \emph{after} thread $Y$ is scheduled but \emph{before} it is run, decreases exponentially with regard to $N$. 206 \end{quote} 201 207 While this is not a bounded guarantee, the probability that unfairness persist for long periods of times decreases exponentially, making persisting unfairness virtually impossible. 202 208 … … 210 216 The described queue uses an array of underlying strictly FIFO queues as shown in Figure~\ref{fig:base}\footnote{For this section, the number of underlying queues is assumed to be constant. 211 217 Section~\ref{sec:resize} discusses resizing the array.}. 212 Pushing new data is done by selecting one of the se underlying queues at random, recording a timestamp for the operationand pushing to the selected queue.218 Pushing new data is done by selecting one of the underlying queues at random, recording a timestamp for the operation, and pushing to the selected queue. 213 219 Popping is done by selecting two queues at random and popping from the queue with the oldest timestamp. 214 A higher number of underlying queues lead to less contention on each queue and therefore better performance.220 A higher number of underlying queues leads to less contention on each queue and therefore better performance. 215 221 In a loaded system, it is highly likely the queues are non-empty, \ie several threads are on each of the underlying queues. 216 This means thatselecting a queue at random to pop from is highly likely to yield a queue with available items.222 For this case, selecting a queue at random to pop from is highly likely to yield a queue with available items. 217 223 In Figure~\ref{fig:base}, ignoring the ellipsis, the chances of getting an empty queue is 2/7 per pick, meaning two random picks yield an item approximately 9 times out of 10. 218 224 … … 221 227 \input{base.pstex_t} 222 228 \end{center} 223 \caption{ Relaxed FIFO list at the base of the scheduler:an array of strictly FIFO lists.224 The timestamp is in all nodes and cell arrays.}229 \caption{Loaded relaxed FIFO list base on an array of strictly FIFO lists. 230 A timestamp appears in each node and array cell.} 225 231 \label{fig:base} 226 232 \end{figure} … … 230 236 \input{empty.pstex_t} 231 237 \end{center} 232 \caption{ ``More empty'' state of the queue:the array contains many empty cells.}238 \caption{Unloaded relaxed FIFO list where the array contains many empty cells.} 233 239 \label{fig:empty} 234 240 \end{figure} 235 241 236 When the ready queue is \emph{more empty}, \ie several of the queues are empty,selecting a random queue for popping is less likely to yield a successful selection and more attempts are needed, resulting in a performance degradation.242 In an unloaded system, several of the queues are empty, so selecting a random queue for popping is less likely to yield a successful selection and more attempts are needed, resulting in a performance degradation. 237 243 Figure~\ref{fig:empty} shows an example with fewer elements, where the chances of getting an empty queue is 5/7 per pick, meaning two random picks yield an item only half the time. 238 244 Since the ready queue is not empty, the pop operation \emph{must} find an element before returning and therefore must retry. … … 262 268 \end{table} 263 269 264 Performance can be improved in case~D (Table~\ref{tab:perfcases})by adding information to help processors find which inner queues are used.270 Performance can be improved in Table~\ref{tab:perfcases} case~D by adding information to help processors find which inner queues are used. 265 271 This addition aims to avoid the cost of retrying the pop operation but does not affect contention on the underlying queues and can incur some management cost for both push and pop operations. 266 272 The approach used to encode this information can vary in density and be either global or local. … … 273 279 With a multi-word bitmask, this maximum limit can be increased arbitrarily, but it is not possible to check if the queue is empty by reading the bitmask atomically. 274 280 275 Finally, a dense bitmap, either single or multi-word, causes additional problems in case C (Table 1), because many processors are continuously scanning the bitmask to find the few available threads.281 Finally, a dense bitmap, either single or multi-word, causes additional problems in Table~\ref{tab:perfcases} case C, because many processors are continuously scanning the bitmask to find the few available threads. 276 282 This increased contention on the bitmask(s) reduces performance because of cache misses after updates and the bitmask is updated more frequently by the scanning processors racing to read and/or update that information. 277 283 This increased update frequency means the information in the bitmask is more often stale before a processor can use it to find an item, \ie mask read says there are available user threads but none on queue. … … 279 285 \begin{figure} 280 286 \begin{center} 281 {\resizebox{0.8\textwidth}{!}{\input{emptybit}}} 282 \end{center} 283 \caption{``More empty'' queue with added bitmask to indicate which array cells have items.} 287 {\resizebox{0.73\textwidth}{!}{\input{emptybit}}} 288 \end{center} 289 \vspace*{-5pt} 290 \caption{Unloaded queue with added bitmask to indicate which array cells have items.} 284 291 \label{fig:emptybit} 292 \begin{center} 293 {\resizebox{0.73\textwidth}{!}{\input{emptytree}}} 294 \end{center} 295 \vspace*{-5pt} 296 \caption{Unloaded queue with added binary search tree indicate which array cells have items.} 297 \label{fig:emptytree} 298 \begin{center} 299 {\resizebox{0.9\textwidth}{!}{\input{emptytls}}} 300 \end{center} 301 \vspace*{-5pt} 302 \caption{Unloaded queue with added per processor bitmask to indicate which array cells have items.} 303 \label{fig:emptytls} 285 304 \end{figure} 286 305 287 Figure~\ref{fig:emptytree} shows an otherapproach using a hierarchical tree data-structure to reduce contention and has been shown to work in similar cases~\cite{ellen2007snzi}\footnote{This particular paper seems to be patented in the US.306 Figure~\ref{fig:emptytree} shows an approach using a hierarchical tree data-structure to reduce contention and has been shown to work in similar cases~\cite{ellen2007snzi}\footnote{This particular paper seems to be patented in the US. 288 307 How does that affect \CFA? Can I use it in my work?}. 289 However, this approach may lead to poorer performance in case~B (Table~\ref{tab:perfcases}) due to the inherent pointer chasing cost and already low contention cost in that case. 290 291 \begin{figure} 292 \begin{center} 293 {\resizebox{0.8\textwidth}{!}{\input{emptytree}}} 294 \end{center} 295 \caption{``More empty'' queue with added binary search tree indicate which array cells have items.} 296 \label{fig:emptytree} 297 \end{figure} 298 299 Finally, a third approach is to use dense information, similar to the bitmap, but have each thread keep its own independent copy of it. 308 However, this approach may lead to poorer performance in Table~\ref{tab:perfcases} case~B due to the inherent pointer chasing cost and already low contention cost in that case. 309 310 Figure~\ref{fig:emptytls} shows an approach using dense information, similar to the bitmap, but have each thread keep its own independent copy of it. 300 311 While this approach can offer good scalability \emph{and} low latency, the liveliness of the information can become a problem. 301 In the simple cases, local copies of which underlying queues are emptycan become stale and end-up not being useful for the pop operation.312 In the simple cases, local copies with empty underlying queues can become stale and end-up not being useful for the pop operation. 302 313 A more serious problem is that reliable information is necessary for some parts of this algorithm to be correct. 303 314 As mentioned in this section, processors must know \emph{reliably} whether the list is empty or not to decide if they can return \texttt{NULL} or if they must keep looking during a pop operation. 304 315 Section~\ref{sec:sleep} discusses another case where reliable information is required for the algorithm to be correct. 305 316 306 \begin{figure}307 \begin{center}308 \input{emptytls}309 \end{center}310 \caption{``More empty'' queue with added per processor bitmask to indicate which array cells have items.}311 \label{fig:emptytls}312 \end{figure}313 314 317 There is a fundamental tradeoff among these approach. 315 Dense global information about empty underlying queues helps zero-contention cases at the cost of high-contention case.316 Sparse global information helps high-contention cases but increases latency in zero-contention -cases,to read and ``aggregate'' the information\footnote{Hierarchical structures, \eg binary search tree, effectively aggregate information but follow pointer chains, learning information at each node.318 Dense global information about empty underlying queues helps zero-contention cases at the cost of the high-contention case. 319 Sparse global information helps high-contention cases but increases latency in zero-contention cases to read and ``aggregate'' the information\footnote{Hierarchical structures, \eg binary search tree, effectively aggregate information but follow pointer chains, learning information at each node. 317 320 Similarly, other sparse schemes need to read multiple cachelines to acquire all the information needed.}. 318 Finally, dense local information has both the advantages of low latency in zero-contention cases and scalability in high-contention cases. However the information can become stale making it difficult to use to ensure correctness. 321 Finally, dense local information has both the advantages of low latency in zero-contention cases and scalability in high-contention cases. 322 However, the information can become stale making it difficult to use to ensure correctness. 319 323 The fact that these solutions have these fundamental limits suggest to me a better solution that attempts to combine these properties in an interesting way. 320 324 Also, the lock discussed in Section~\ref{sec:resize} allows for solutions that adapt to the number of processors, which could also prove useful. … … 323 327 324 328 How much scalability is actually needed is highly debatable. 325 \emph{libfibre} \cite{libfibre} has compared favourably to other schedulers in webserver tests\cite{Karsten20} and uses a single atomic counter in its scheduling algorithm similarly to the proposed bitmask.329 \emph{libfibre}~\cite{libfibre} has compared favourably to other schedulers in webserver tests~\cite{Karsten20} and uses a single atomic counter in its scheduling algorithm similarly to the proposed bitmask. 326 330 As such, the single atomic instruction on a shared cacheline may be sufficiently performant. 327 331 328 I have built a prototype of this ready queue in the shape of a data queue, \ie nodes on the queue are structures with a single intrepresenting a thread and intrusive data fields.329 Using this prototype, I ran preliminary performance experiments thatconfirm the expected performance in Table~\ref{tab:perfcases}.330 However, these experiments only offer a hint at the actual performance of the scheduler since threads form more complex operations than simple integer nodes, \eg threads are not independent of each other,when a thread blocks some other thread must intervene to wake it.332 I have built a prototype of this ready queue in the shape of a data queue, \ie nodes on the queue are structures with a single $int$ representing a thread and intrusive data fields. 333 Using this prototype, preliminary performance experiments confirm the expected performance in Table~\ref{tab:perfcases}. 334 However, these experiments only offer a hint at the actual performance of the scheduler since threads are involved in more complex operations, \eg threads are not independent of each other: when a thread blocks some other thread must intervene to wake it. 331 335 332 336 I have also integrated this prototype into the \CFA runtime, but have not yet created performance experiments to compare results, as creating one-to-one comparisons between the prototype and the \CFA runtime will be complex. … … 345 349 Threads on a cluster are always scheduled on one of the processors of the cluster. 346 350 Currently, the runtime handles dynamically adding and removing processors from clusters at any time. 347 Since this is part of the existing design, the proposed scheduler must also support this behaviour.351 Since this feature is part of the existing design, the proposed scheduler must also support this behaviour. 348 352 However, dynamically resizing a cluster is considered a rare event associated with setup, tear down and major configuration changes. 349 353 This assumption is made both in the design of the proposed scheduler as well as in the original design of the \CFA runtime system. 350 354 As such, the proposed scheduler must honour the correctness of this behaviour but does not have any performance objectives with regard to resizing a cluster. 351 How long adding or removing processors takeand how much this disrupts the performance of other threads is considered a secondary concern since it should be amortized over long periods of times.355 That is, the time to add or remove processors and how much this disrupts the performance of other threads is considered a secondary concern since it should be amortized over long periods of times. 352 356 However, as mentioned in Section~\ref{sec:queue}, contention on the underlying queues can have a direct impact on performance. 353 357 The number of underlying queues must therefore be adjusted as the number of processors grows or shrinks. … … 371 375 372 376 There are possible alternatives to the reader-writer lock solution. 373 This problem is effectively a memory reclamation problem and as such there is a large body of research on the subject \cite{michael2004hazard, brown2015reclaiming}.377 This problem is effectively a memory reclamation problem and as such there is a large body of research on the subject~\cite{brown2015reclaiming, michael2004hazard}. 374 378 However, the reader-write lock-solution is simple and can be leveraged to solve other problems (\eg processor ordering and memory reclamation of threads), which makes it an attractive solution. 375 379 … … 401 405 Individual processors always finish scheduling user threads before looking for new work, which means that the last processor to go to sleep cannot miss threads scheduled from inside the cluster (if they do, that demonstrates the ready queue is not linearizable). 402 406 However, this guarantee does not hold if threads are scheduled from outside the cluster, either due to an external event like timers and I/O, or due to a user (or kernel) thread migrating from a different cluster. 403 In this case, missed signals can lead to the cluster deadlocking\footnote{Clusters should only deadlock in cases where a \CFA programmer \emph{actually} write \CFA code that leads to a deadlock.}.407 In this case, missed signals can lead to the cluster deadlocking\footnote{Clusters should only deadlock in cases where a \CFA programmer \emph{actually} writes \CFA code that leads to a deadlock.}. 404 408 Therefore, it is important that the scheduling of threads include a mechanism where signals \emph{cannot} be missed. 405 409 For performance reasons, it can be advantageous to have a secondary mechanism that allows signals to be missed in cases where it cannot lead to a deadlock. 406 To be safe, this process must include a ``handshake'' where it is guaranteed that either~: the sleeping processor notices that a user thread is scheduled after the sleeping processor signalled its intent to block or code scheduling threads sees the intent to sleep before scheduling and be able to wake-up the processor. 410 To be safe, this process must include a ``handshake'' where it is guaranteed that either: 411 \begin{enumerate} 412 \item 413 the sleeping processor notices that a user thread is scheduled after the sleeping processor signalled its intent to block or 414 \item 415 code scheduling threads sees the intent to sleep before scheduling and be able to wake-up the processor. 416 \end{enumerate} 407 417 This matter is complicated by the fact that pthreads and Linux offer few tools to implement this solution and no guarantee of ordering of threads waking up for most of these tools. 408 418 409 419 Another important issue is avoiding kernel threads sleeping and waking frequently because there is a significant operating-system cost. 410 This scenario happens when a program oscillates between high and low activity, needing most and then few erprocessors.420 This scenario happens when a program oscillates between high and low activity, needing most and then few processors. 411 421 A possible partial solution is to order the processors so that the one which most recently went to sleep is woken up. 412 422 This allows other sleeping processors to reach deeper sleep state (when these are available) while keeping ``hot'' processors warmer. … … 417 427 Processors that are unnecessarily unblocked lead to unnecessary contention, CPU usage, and power consumption, while too many sleeping processors can lead to suboptimal throughput. 418 428 Furthermore, transitions from sleeping to awake and vice versa also add unnecessary latency. 419 There is already a wealth of research on the subject \cite{schillings1996engineering, wiki:thunderherd} and I may use an existing approach for the idle-sleep heuristic in this project, \eg\cite{Karsten20}.429 There is already a wealth of research on the subject~\cite{schillings1996engineering, wiki:thunderherd} and I may use an existing approach for the idle-sleep heuristic in this project, \eg~\cite{Karsten20}. 420 430 421 431 \subsection{Asynchronous I/O} … … 432 442 an event-engine to (de)multiplex the operations, 433 443 \item 434 and a synchronous interface for users to use.444 and a synchronous interface for users. 435 445 \end{enumerate} 436 446 None of these components currently exist in \CFA and I will need to build all three for this project. 437 447 438 \paragraph{OS A bstraction}439 One fundamental part for converting blocking I/O operations into non-blocking onesis having an underlying asynchronous I/O interface to direct the I/O operations.448 \paragraph{OS Asynchronous Abstraction} 449 One fundamental part for converting blocking I/O operations into non-blocking is having an underlying asynchronous I/O interface to direct the I/O operations. 440 450 While there exists many different APIs for asynchronous I/O, it is not part of this proposal to create a novel API. 441 451 It is sufficient to make one work in the complex context of the \CFA runtime. 442 \uC uses the $select$ \cite{select} as its interface, which handles ttys, pipes and sockets, but not disk.452 \uC uses the $select$~\cite{select} as its interface, which handles ttys, pipes and sockets, but not disk. 443 453 $select$ entails significant complexity and is being replaced in UNIX operating systems, which make it a less interesting alternative. 444 Another popular interface is $epoll$ \cite{epoll}, which is supposed to be cheaper than $select$.445 However, $epoll$ also does not handle the file system and anecdotal evidence suggest it has problems with Linux pipes and $TTY$s.446 A popular cross-platform alternative is $libuv$ \cite{libuv}, which offers asynchronous sockets and asynchronous file system operations (among other features).454 Another popular interface is $epoll$~\cite{epoll}, which is supposed to be cheaper than $select$. 455 However, $epoll$ also does not handle the file system and anecdotal evidence suggest it has problems with Linux pipes and ttys. 456 A popular cross-platform alternative is $libuv$~\cite{libuv}, which offers asynchronous sockets and asynchronous file system operations (among other features). 447 457 However, as a full-featured library it includes much more than I need and could conflict with other features of \CFA unless significant effort is made to merge them together. 448 A very recent alternative that I am investigating is $io_uring$ \cite{io_uring}.458 A very recent alternative that I am investigating is $io_uring$~\cite{io_uring}. 449 459 It claims to address some of the issues with $epoll$ and my early investigating suggests that the claim is accurate. 450 $io_uring$ uses a much more general approach where system calls are registered to a queue and later executed by the kernel, rather than relying on system calls to return an error instead of blocking and subsequently waiting for changes on file descriptors.451 I believe this approach allows for fewer problems, \eg the manpage for $open$ \cite{open} states:460 $io_uring$ uses a much more general approach where system calls are registered to a queue and later executed by the kernel, rather than relying on system calls to subsequently wait for changes on file descriptors or return an error. 461 I believe this approach allows for fewer problems, \eg the manpage for $open$~\cite{open} states: 452 462 \begin{quote} 453 463 Note that [the $O_NONBLOCK$ flag] has no effect for regular files and block devices; … … 455 465 Since $O_NONBLOCK$ semantics might eventually be implemented, applications should not depend upon blocking behaviour when specifying this flag for regular files and block devices. 456 466 \end{quote} 457 This makes approach based on $epoll$/$select$ less reliable since they may not work for every file descriptors.458 For this reason, I plan to use $io_uring$ as the OS abstraction for the \CFA runtime unless further work shows problems I haven't encountered yet.459 However, only a small subset of the features are available in Ubuntu as of April 2020 \cite{wiki:ubuntu-linux}, which will limit performance comparisons.467 This makes approaches based on $select$/$epoll$ less reliable since they may not work for every file descriptors. 468 For this reason, I plan to use $io_uring$ as the OS abstraction for the \CFA runtime unless further work encounters a fatal problem. 469 However, only a small subset of the features are available in Ubuntu as of April 2020~\cite{wiki:ubuntu-linux}, which will limit performance comparisons. 460 470 I do not believe this will affect the comparison result. 461 471 462 472 \paragraph{Event Engine} 463 Laying on top of the asynchronous interface layeris the event engine.473 Above the OS asynchronous abstraction is the event engine. 464 474 This engine is responsible for multiplexing (batching) the synchronous I/O requests into asynchronous I/O requests and demultiplexing the results to appropriate blocked user threads. 465 475 This step can be straightforward for simple cases, but becomes quite complex when there are thousands of user threads performing both reads and writes, possibly on overlapping file descriptors. … … 478 488 The interface can be novel but it is preferable to match the existing POSIX interface when possible to be compatible with existing code. 479 489 Matching allows C programs written using this interface to be transparently converted to \CFA with minimal effort. 480 Where new functionality is needed, I will create a novel interfaceto fill gaps and provide advanced features.490 Where new functionality is needed, I will add novel interface extensions to fill gaps and provide advanced features. 481 491 482 492 … … 485 495 \section{Discussion} 486 496 I believe that runtime system and scheduling are still open topics. 487 Many ``state of the art'' production frameworks still use single-threaded event loops because of performance considerations, \eg 497 Many ``state of the art'' production frameworks still use single-threaded event loops because of performance considerations, \eg~\cite{nginx-design}, and, to my knowledge, no widely available system language offers modern threading facilities. 488 498 I believe the proposed work offers a novel runtime and scheduling package, where existing work only offers fragments that users must assemble themselves when possible. 489 499 -
libcfa/configure.ac
r1c507eb r7a80113 166 166 AH_TEMPLATE([CFA_HAVE_IORING_OP_PROVIDE_BUFFERS],[Defined if io_uring support is present when compiling libcfathread and supports the operation IORING_OP_PROVIDE_BUFFERS.]) 167 167 AH_TEMPLATE([CFA_HAVE_IORING_OP_REMOVE_BUFFER],[Defined if io_uring support is present when compiling libcfathread and supports the operation IORING_OP_REMOVE_BUFFER.]) 168 AH_TEMPLATE([CFA_HAVE_IORING_OP_TEE],[Defined if io_uring support is present when compiling libcfathread and supports the operation IORING_OP_TEE.]) 168 169 AH_TEMPLATE([CFA_HAVE_IOSQE_FIXED_FILE],[Defined if io_uring support is present when compiling libcfathread and supports the flag FIXED_FILE.]) 169 170 AH_TEMPLATE([CFA_HAVE_IOSQE_IO_DRAIN],[Defined if io_uring support is present when compiling libcfathread and supports the flag IO_DRAIN.]) … … 173 174 AH_TEMPLATE([CFA_HAVE_SPLICE_F_FD_IN_FIXED],[Defined if io_uring support is present when compiling libcfathread and supports the flag SPLICE_F_FD_IN_FIXED.]) 174 175 AH_TEMPLATE([CFA_HAVE_IORING_SETUP_ATTACH_WQ],[Defined if io_uring support is present when compiling libcfathread and supports the flag IORING_SETUP_ATTACH_WQ.]) 175 AH_TEMPLATE([HAVE_PREADV2],[Defined if preadv2 support is present when compiling libcfathread.]) 176 AH_TEMPLATE([HAVE_PWRITEV2],[Defined if pwritev2 support is present when compiling libcfathread.]) 176 AH_TEMPLATE([CFA_HAVE_PREADV2],[Defined if preadv2 support is present when compiling libcfathread.]) 177 AH_TEMPLATE([CFA_HAVE_PWRITEV2],[Defined if pwritev2 support is present when compiling libcfathread.]) 178 AH_TEMPLATE([CFA_HAVE_PWRITEV2],[Defined if pwritev2 support is present when compiling libcfathread.]) 179 AH_TEMPLATE([CFA_HAVE_STATX],[Defined if statx support is present when compiling libcfathread.]) 180 AH_TEMPLATE([CFA_HAVE_OPENAT2],[Defined if openat2 support is present when compiling libcfathread.]) 177 181 AH_TEMPLATE([__CFA_NO_STATISTICS__],[Defined if libcfathread was compiled without support for statistics.]) 178 182 179 define(ioring_ops, [IORING_OP_NOP,IORING_OP_READV,IORING_OP_WRITEV,IORING_OP_FSYNC,IORING_OP_READ_FIXED,IORING_OP_WRITE_FIXED,IORING_OP_POLL_ADD,IORING_OP_POLL_REMOVE,IORING_OP_SYNC_FILE_RANGE,IORING_OP_SENDMSG,IORING_OP_RECVMSG,IORING_OP_TIMEOUT,IORING_OP_TIMEOUT_REMOVE,IORING_OP_ACCEPT,IORING_OP_ASYNC_CANCEL,IORING_OP_LINK_TIMEOUT,IORING_OP_CONNECT,IORING_OP_FALLOCATE,IORING_OP_OPENAT,IORING_OP_CLOSE,IORING_OP_FILES_UPDATE,IORING_OP_STATX,IORING_OP_READ,IORING_OP_WRITE,IORING_OP_FADVISE,IORING_OP_MADVISE,IORING_OP_SEND,IORING_OP_RECV,IORING_OP_OPENAT2,IORING_OP_EPOLL_CTL,IORING_OP_SPLICE,IORING_OP_PROVIDE_BUFFERS,IORING_OP_REMOVE_BUFFER ])183 define(ioring_ops, [IORING_OP_NOP,IORING_OP_READV,IORING_OP_WRITEV,IORING_OP_FSYNC,IORING_OP_READ_FIXED,IORING_OP_WRITE_FIXED,IORING_OP_POLL_ADD,IORING_OP_POLL_REMOVE,IORING_OP_SYNC_FILE_RANGE,IORING_OP_SENDMSG,IORING_OP_RECVMSG,IORING_OP_TIMEOUT,IORING_OP_TIMEOUT_REMOVE,IORING_OP_ACCEPT,IORING_OP_ASYNC_CANCEL,IORING_OP_LINK_TIMEOUT,IORING_OP_CONNECT,IORING_OP_FALLOCATE,IORING_OP_OPENAT,IORING_OP_CLOSE,IORING_OP_FILES_UPDATE,IORING_OP_STATX,IORING_OP_READ,IORING_OP_WRITE,IORING_OP_FADVISE,IORING_OP_MADVISE,IORING_OP_SEND,IORING_OP_RECV,IORING_OP_OPENAT2,IORING_OP_EPOLL_CTL,IORING_OP_SPLICE,IORING_OP_PROVIDE_BUFFERS,IORING_OP_REMOVE_BUFFER,IORING_OP_TEE]) 180 184 define(ioring_flags, [IOSQE_FIXED_FILE,IOSQE_IO_DRAIN,IOSQE_ASYNC,IOSQE_IO_LINK,IOSQE_IO_HARDLINK,SPLICE_F_FD_IN_FIXED,IORING_SETUP_ATTACH_WQ]) 181 185 … … 222 226 ]) 223 227 ]) 224 AC_CHECK_FUNCS([preadv2 pwritev2]) 228 AC_CHECK_FUNC([preadv2], [AC_DEFINE([CFA_HAVE_PREADV2])]) 229 AC_CHECK_FUNC([pwritev2], [AC_DEFINE([CFA_HAVE_PWRITEV2])]) 225 230 226 231 AC_CONFIG_FILES([ … … 229 234 prelude/Makefile 230 235 ]) 236 AC_CONFIG_FILES([src/concurrency/io/call.cfa], [python3 ${srcdir}/src/concurrency/io/call.cfa.in > src/concurrency/io/call.cfa]) 231 237 232 238 AC_CONFIG_HEADERS(prelude/defines.hfa) -
libcfa/prelude/defines.hfa.in
r1c507eb r7a80113 117 117 118 118 /* Defined if io_uring support is present when compiling libcfathread and 119 supports the operation IORING_OP_TEE. */ 120 #undef CFA_HAVE_IORING_OP_TEE 121 122 /* Defined if io_uring support is present when compiling libcfathread and 119 123 supports the operation IORING_OP_TIMEOUT. */ 120 124 #undef CFA_HAVE_IORING_OP_TIMEOUT … … 163 167 #undef CFA_HAVE_LINUX_IO_URING_H 164 168 169 /* Defined if openat2 support is present when compiling libcfathread. */ 170 #undef CFA_HAVE_OPENAT2 171 172 /* Defined if preadv2 support is present when compiling libcfathread. */ 173 #undef CFA_HAVE_PREADV2 174 175 /* Defined if pwritev2 support is present when compiling libcfathread. */ 176 #undef CFA_HAVE_PWRITEV2 177 165 178 /* Defined if io_uring support is present when compiling libcfathread and 166 179 supports the flag SPLICE_F_FD_IN_FIXED. */ 167 180 #undef CFA_HAVE_SPLICE_F_FD_IN_FIXED 168 181 182 /* Defined if statx support is present when compiling libcfathread. */ 183 #undef CFA_HAVE_STATX 184 169 185 /* Location of include files. */ 170 186 #undef CFA_INCDIR … … 188 204 #undef HAVE_MEMORY_H 189 205 190 /* Define to 1 if you have the `preadv2' function. */191 #undef HAVE_PREADV2192 193 /* Define to 1 if you have the `pwritev2' function. */194 #undef HAVE_PWRITEV2195 196 206 /* Define to 1 if you have the <stdint.h> header file. */ 197 207 #undef HAVE_STDINT_H -
libcfa/src/Makefile.am
r1c507eb r7a80113 62 62 iterator.hfa \ 63 63 limits.hfa \ 64 memory.hfa \ 64 65 parseargs.hfa \ 65 66 rational.hfa \ … … 107 108 concurrency/io/setup.cfa \ 108 109 concurrency/io/types.hfa \ 109 concurrency/io call.cfa \110 concurrency/io/call.cfa \ 110 111 concurrency/iofwd.hfa \ 111 112 concurrency/kernel_private.hfa \ -
libcfa/src/bits/locks.hfa
r1c507eb r7a80113 357 357 struct oneshot * expected = this.ptr; 358 358 // was this abandoned? 359 if( expected == 3p ) { free( &this ); return false; } 359 #if defined(__GNUC__) && __GNUC__ >= 7 360 #pragma GCC diagnostic push 361 #pragma GCC diagnostic ignored "-Wfree-nonheap-object" 362 #endif 363 if( expected == 3p ) { free( &this ); return false; } 364 #if defined(__GNUC__) && __GNUC__ >= 7 365 #pragma GCC diagnostic pop 366 #endif 360 367 361 368 /* paranoid */ verify( expected != 1p ); // Future is already fulfilled, should not happen -
libcfa/src/concurrency/coroutine.cfa
r1c507eb r7a80113 47 47 48 48 //----------------------------------------------------------------------------- 49 FORALL_DATA_INSTANCE(CoroutineCancelled, 50 (dtype coroutine_t | sized(coroutine_t)), (coroutine_t)) 51 52 struct __cfaehm_node { 53 struct _Unwind_Exception unwind_exception; 54 struct __cfaehm_node * next; 55 int handler_index; 56 }; 57 58 forall(dtype T) 59 void mark_exception(CoroutineCancelled(T) *) {} 60 61 forall(dtype T | sized(T)) 62 void copy(CoroutineCancelled(T) * dst, CoroutineCancelled(T) * src) { 63 dst->the_coroutine = src->the_coroutine; 64 dst->the_exception = src->the_exception; 65 } 66 67 forall(dtype T) 68 const char * msg(CoroutineCancelled(T) *) { 69 return "CoroutineCancelled(...)"; 70 } 71 72 // This code should not be inlined. It is the error path on resume. 73 forall(dtype T | is_coroutine(T)) 74 void __cfaehm_cancelled_coroutine( T & cor, $coroutine * desc ) { 75 verify( desc->cancellation ); 76 desc->state = Cancelled; 77 exception_t * except = (exception_t *)(1 + (__cfaehm_node *)desc->cancellation); 78 79 CoroutineCancelled(T) except; 80 except.the_coroutine = &cor; 81 except.the_exception = except; 82 throwResume except; 83 84 except->virtual_table->free( except ); 85 free( desc->cancellation ); 86 desc->cancellation = 0p; 87 } 88 89 //----------------------------------------------------------------------------- 49 90 // Global state variables 50 91 … … 180 221 this->storage->limit = storage; 181 222 this->storage->base = (void*)((intptr_t)storage + size); 223 this->storage->exception_context.top_resume = 0p; 224 this->storage->exception_context.current_exception = 0p; 182 225 __attribute__((may_alias)) intptr_t * istorage = (intptr_t*)&this->storage; 183 226 *istorage |= userStack ? 0x1 : 0x0; -
libcfa/src/concurrency/coroutine.hfa
r1c507eb r7a80113 18 18 #include <assert.h> 19 19 #include "invoke.h" 20 #include "../exception.hfa" 21 22 //----------------------------------------------------------------------------- 23 // Exception thrown from resume when a coroutine stack is cancelled. 24 // Should not have to be be sized (see trac #196). 25 FORALL_DATA_EXCEPTION(CoroutineCancelled, 26 (dtype coroutine_t | sized(coroutine_t)), (coroutine_t)) ( 27 coroutine_t * the_coroutine; 28 exception_t * the_exception; 29 ); 30 31 forall(dtype T) 32 void mark_exception(CoroutineCancelled(T) *); 33 34 forall(dtype T | sized(T)) 35 void copy(CoroutineCancelled(T) * dst, CoroutineCancelled(T) * src); 36 37 forall(dtype T) 38 const char * msg(CoroutineCancelled(T) *); 20 39 21 40 //----------------------------------------------------------------------------- … … 23 42 // Anything that implements this trait can be resumed. 24 43 // Anything that is resumed is a coroutine. 25 trait is_coroutine(dtype T) { 26 void main(T & this); 27 $coroutine * get_coroutine(T & this); 44 trait is_coroutine(dtype T | sized(T) 45 | is_resumption_exception(CoroutineCancelled(T)) 46 | VTABLE_ASSERTION(CoroutineCancelled, (T))) { 47 void main(T & this); 48 $coroutine * get_coroutine(T & this); 28 49 }; 29 50 … … 112 133 } 113 134 } 135 136 forall(dtype T | is_coroutine(T)) 137 void __cfaehm_cancelled_coroutine( T & cor, $coroutine * desc ); 114 138 115 139 // Resume implementation inlined for performance … … 145 169 // always done for performance testing 146 170 $ctx_switch( src, dst ); 171 if ( unlikely(dst->cancellation) ) { 172 __cfaehm_cancelled_coroutine( cor, dst ); 173 } 147 174 148 175 return cor; -
libcfa/src/concurrency/exception.cfa
r1c507eb r7a80113 57 57 58 58 STOP_AT_END_FUNCTION(coroutine_cancelstop, 59 // TODO: Instead pass information to the last resumer. 59 struct $coroutine * src = ($coroutine *)stop_param; 60 struct $coroutine * dst = src->last; 61 62 $ctx_switch( src, dst ); 60 63 abort(); 61 64 ) -
libcfa/src/concurrency/exception.hfa
r1c507eb r7a80113 18 18 #include "bits/defs.hfa" 19 19 #include "invoke.h" 20 struct _Unwind_Exception;21 22 // It must also be usable as a C header file.23 20 24 21 #ifdef __cforall 25 22 extern "C" { 23 24 #define HIDE_EXPORTS 26 25 #endif 26 #include "unwind.h" 27 27 28 28 struct exception_context_t * this_exception_context(void) OPTIONAL_THREAD; … … 32 32 33 33 #ifdef __cforall 34 #undef HIDE_EXPORTS 34 35 } 35 36 #endif -
libcfa/src/concurrency/invoke.h
r1c507eb r7a80113 68 68 }; 69 69 70 enum __Coroutine_State { Halted, Start, Primed, Blocked, Ready, Active };70 enum __Coroutine_State { Halted, Start, Primed, Blocked, Ready, Active, Cancelled }; 71 71 72 72 struct $coroutine { -
libcfa/src/concurrency/io.cfa
r1c507eb r7a80113 159 159 160 160 static inline void process(struct io_uring_cqe & cqe ) { 161 struct __io_user_data_t * data = (struct __io_user_data_t *)(uintptr_t)cqe.user_data; 162 __cfadbg_print_safe( io, "Kernel I/O : Syscall completed : cqe %p, result %d for %p\n", data, cqe.res, data->thrd ); 163 164 data->result = cqe.res; 165 post( data->sem ); 161 struct io_future_t * future = (struct io_future_t *)(uintptr_t)cqe.user_data; 162 __cfadbg_print_safe( io, "Kernel I/O : Syscall completed : cqe %p, result %d for %p\n", future, cqe.res, data->thrd ); 163 164 fulfil( *future, cqe.res ); 166 165 } 167 166 -
libcfa/src/concurrency/io/types.hfa
r1c507eb r7a80113 16 16 #pragma once 17 17 18 extern "C" { 19 #include <linux/types.h> 20 } 21 22 #include "bits/locks.hfa" 23 18 24 #if defined(CFA_HAVE_LINUX_IO_URING_H) 19 extern "C" {20 #include <linux/types.h>21 }22 23 #include "bits/locks.hfa"24 25 25 #define LEADER_LOCK 26 26 struct __leaderlock_t { … … 101 101 }; 102 102 103 104 //-----------------------------------------------------------------------105 // IO user data106 struct __io_user_data_t {107 __s32 result;108 oneshot sem;109 };110 111 103 //----------------------------------------------------------------------- 112 104 // Misc … … 143 135 void __ioctx_prepare_block($io_ctx_thread & ctx, struct epoll_event & ev); 144 136 #endif 137 138 //----------------------------------------------------------------------- 139 // IO user data 140 struct io_future_t { 141 future_t self; 142 __s32 result; 143 }; 144 145 static inline { 146 bool fulfil( io_future_t & this, __s32 result ) { 147 this.result = result; 148 return fulfil(this.self); 149 } 150 151 // Wait for the future to be fulfilled 152 bool wait( io_future_t & this ) { 153 return wait(this.self); 154 } 155 } -
libcfa/src/concurrency/iofwd.hfa
r1c507eb r7a80113 40 40 41 41 struct cluster; 42 struct io_future_t; 42 43 struct io_context; 43 44 struct io_cancellation; … … 48 49 struct statx; 49 50 50 extern ssize_t cfa_preadv2(int fd, const struct iovec *iov, int iovcnt, off_t offset, int flags, int submit_flags = 0, Duration timeout = -1`s, io_cancellation * cancellation = 0p, io_context * context = 0p); 51 extern ssize_t cfa_pwritev2(int fd, const struct iovec *iov, int iovcnt, off_t offset, int flags, int submit_flags = 0, Duration timeout = -1`s, io_cancellation * cancellation = 0p, io_context * context = 0p); 52 extern int cfa_fsync(int fd, int submit_flags = 0, Duration timeout = -1`s, io_cancellation * cancellation = 0p, io_context * context = 0p); 53 extern int cfa_sync_file_range(int fd, int64_t offset, int64_t nbytes, unsigned int flags, int submit_flags = 0, Duration timeout = -1`s, io_cancellation * cancellation = 0p, io_context * context = 0p); 54 extern ssize_t cfa_sendmsg(int sockfd, const struct msghdr *msg, int flags, int submit_flags = 0, Duration timeout = -1`s, io_cancellation * cancellation = 0p, io_context * context = 0p); 55 extern ssize_t cfa_recvmsg(int sockfd, struct msghdr *msg, int flags, int submit_flags = 0, Duration timeout = -1`s, io_cancellation * cancellation = 0p, io_context * context = 0p); 56 extern ssize_t cfa_send(int sockfd, const void *buf, size_t len, int flags, int submit_flags = 0, Duration timeout = -1`s, io_cancellation * cancellation = 0p, io_context * context = 0p); 57 extern ssize_t cfa_recv(int sockfd, void *buf, size_t len, int flags, int submit_flags = 0, Duration timeout = -1`s, io_cancellation * cancellation = 0p, io_context * context = 0p); 58 extern int cfa_accept4(int sockfd, struct sockaddr *addr, socklen_t *addrlen, int flags, int submit_flags = 0, Duration timeout = -1`s, io_cancellation * cancellation = 0p, io_context * context = 0p); 59 extern int cfa_connect(int sockfd, const struct sockaddr *addr, socklen_t addrlen, int submit_flags = 0, Duration timeout = -1`s, io_cancellation * cancellation = 0p, io_context * context = 0p); 60 extern int cfa_fallocate(int fd, int mode, uint64_t offset, uint64_t len, int submit_flags = 0, Duration timeout = -1`s, io_cancellation * cancellation = 0p, io_context * context = 0p); 61 extern int cfa_fadvise(int fd, uint64_t offset, uint64_t len, int advice, int submit_flags = 0, Duration timeout = -1`s, io_cancellation * cancellation = 0p, io_context * context = 0p); 62 extern int cfa_madvise(void *addr, size_t length, int advice, int submit_flags = 0, Duration timeout = -1`s, io_cancellation * cancellation = 0p, io_context * context = 0p); 63 extern int cfa_openat(int dirfd, const char *pathname, int flags, mode_t mode, int submit_flags = 0, Duration timeout = -1`s, io_cancellation * cancellation = 0p, io_context * context = 0p); 64 extern int cfa_close(int fd, int submit_flags = 0, Duration timeout = -1`s, io_cancellation * cancellation = 0p, io_context * context = 0p); 65 extern int cfa_statx(int dirfd, const char *pathname, int flags, unsigned int mask, struct statx *statxbuf, int submit_flags = 0, Duration timeout = -1`s, io_cancellation * cancellation = 0p, io_context * context = 0p); 66 extern ssize_t cfa_read(int fd, void *buf, size_t count, int submit_flags = 0, Duration timeout = -1`s, io_cancellation * cancellation = 0p, io_context * context = 0p); 67 extern ssize_t cfa_write(int fd, void *buf, size_t count, int submit_flags = 0, Duration timeout = -1`s, io_cancellation * cancellation = 0p, io_context * context = 0p); 68 extern ssize_t cfa_splice(int fd_in, loff_t *off_in, int fd_out, loff_t *off_out, size_t len, unsigned int flags, int submit_flags = 0, Duration timeout = -1`s, io_cancellation * cancellation = 0p, io_context * context = 0p); 69 extern ssize_t cfa_tee(int fd_in, int fd_out, size_t len, unsigned int flags, int submit_flags = 0, Duration timeout = -1`s, io_cancellation * cancellation = 0p, io_context * context = 0p); 51 //---------- 52 // synchronous calls 53 #if defined(CFA_HAVE_PREADV2) 54 extern ssize_t cfa_preadv2(int fd, const struct iovec *iov, int iovcnt, off_t offset, int flags, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context); 55 #endif 56 #if defined(CFA_HAVE_PWRITEV2) 57 extern ssize_t cfa_pwritev2(int fd, const struct iovec *iov, int iovcnt, off_t offset, int flags, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context); 58 #endif 59 extern int cfa_fsync(int fd, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context); 60 extern int cfa_epoll_ctl(int epfd, int op, int fd, struct epoll_event *event, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context); 61 extern int cfa_sync_file_range(int fd, off64_t offset, off64_t nbytes, unsigned int flags, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context); 62 extern ssize_t cfa_sendmsg(int sockfd, const struct msghdr *msg, int flags, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context); 63 extern ssize_t cfa_recvmsg(int sockfd, struct msghdr *msg, int flags, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context); 64 extern ssize_t cfa_send(int sockfd, const void *buf, size_t len, int flags, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context); 65 extern ssize_t cfa_recv(int sockfd, void *buf, size_t len, int flags, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context); 66 extern int cfa_accept4(int sockfd, struct sockaddr *addr, socklen_t *addrlen, int flags, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context); 67 extern int cfa_connect(int sockfd, const struct sockaddr *addr, socklen_t addrlen, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context); 68 extern int cfa_fallocate(int fd, int mode, off_t offset, off_t len, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context); 69 extern int cfa_posix_fadvise(int fd, off_t offset, off_t len, int advice, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context); 70 extern int cfa_madvise(void *addr, size_t length, int advice, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context); 71 extern int cfa_openat(int dirfd, const char *pathname, int flags, mode_t mode, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context); 72 #if defined(CFA_HAVE_OPENAT2) 73 extern int cfa_openat2(int dirfd, const char *pathname, struct open_how * how, size_t size, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context); 74 #endif 75 extern int cfa_close(int fd, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context); 76 #if defined(CFA_HAVE_STATX) 77 extern int cfa_statx(int dirfd, const char *pathname, int flags, unsigned int mask, struct statx *statxbuf, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context); 78 #endif 79 extern ssize_t cfa_read(int fd, void * buf, size_t count, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context); 80 extern ssize_t cfa_write(int fd, void * buf, size_t count, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context); 81 extern ssize_t cfa_splice(int fd_in, loff_t *off_in, int fd_out, loff_t *off_out, size_t len, unsigned int flags, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context); 82 extern ssize_t cfa_tee(int fd_in, int fd_out, size_t len, unsigned int flags, int submit_flags, Duration timeout, io_cancellation * cancellation, io_context * context); 83 84 //---------- 85 // asynchronous calls 86 #if defined(CFA_HAVE_PREADV2) 87 extern void async_preadv2(io_future_t & future, int fd, const struct iovec *iov, int iovcnt, off_t offset, int flags, int submit_flags, io_cancellation * cancellation, io_context * context); 88 #endif 89 #if defined(CFA_HAVE_PWRITEV2) 90 extern void async_pwritev2(io_future_t & future, int fd, const struct iovec *iov, int iovcnt, off_t offset, int flags, int submit_flags, io_cancellation * cancellation, io_context * context); 91 #endif 92 extern void async_fsync(io_future_t & future, int fd, int submit_flags, io_cancellation * cancellation, io_context * context); 93 extern void async_epoll_ctl(io_future_t & future, int epfd, int op, int fd, struct epoll_event *event, int submit_flags, io_cancellation * cancellation, io_context * context); 94 extern void async_sync_file_range(io_future_t & future, int fd, off64_t offset, off64_t nbytes, unsigned int flags, int submit_flags, io_cancellation * cancellation, io_context * context); 95 extern void async_sendmsg(io_future_t & future, int sockfd, const struct msghdr *msg, int flags, int submit_flags, io_cancellation * cancellation, io_context * context); 96 extern void async_recvmsg(io_future_t & future, int sockfd, struct msghdr *msg, int flags, int submit_flags, io_cancellation * cancellation, io_context * context); 97 extern void async_send(io_future_t & future, int sockfd, const void *buf, size_t len, int flags, int submit_flags, io_cancellation * cancellation, io_context * context); 98 extern void async_recv(io_future_t & future, int sockfd, void *buf, size_t len, int flags, int submit_flags, io_cancellation * cancellation, io_context * context); 99 extern void async_accept4(io_future_t & future, int sockfd, struct sockaddr *addr, socklen_t *addrlen, int flags, int submit_flags, io_cancellation * cancellation, io_context * context); 100 extern void async_connect(io_future_t & future, int sockfd, const struct sockaddr *addr, socklen_t addrlen, int submit_flags, io_cancellation * cancellation, io_context * context); 101 extern void async_fallocate(io_future_t & future, int fd, int mode, off_t offset, off_t len, int submit_flags, io_cancellation * cancellation, io_context * context); 102 extern void async_posix_fadvise(io_future_t & future, int fd, off_t offset, off_t len, int advice, int submit_flags, io_cancellation * cancellation, io_context * context); 103 extern void async_madvise(io_future_t & future, void *addr, size_t length, int advice, int submit_flags, io_cancellation * cancellation, io_context * context); 104 extern void async_openat(io_future_t & future, int dirfd, const char *pathname, int flags, mode_t mode, int submit_flags, io_cancellation * cancellation, io_context * context); 105 #if defined(CFA_HAVE_OPENAT2) 106 extern void async_openat2(io_future_t & future, int dirfd, const char *pathname, struct open_how * how, size_t size, int submit_flags, io_cancellation * cancellation, io_context * context); 107 #endif 108 extern void async_close(io_future_t & future, int fd, int submit_flags, io_cancellation * cancellation, io_context * context); 109 #if defined(CFA_HAVE_STATX) 110 extern void async_statx(io_future_t & future, int dirfd, const char *pathname, int flags, unsigned int mask, struct statx *statxbuf, int submit_flags, io_cancellation * cancellation, io_context * context); 111 #endif 112 void async_read(io_future_t & future, int fd, void * buf, size_t count, int submit_flags, io_cancellation * cancellation, io_context * context); 113 extern void async_write(io_future_t & future, int fd, void * buf, size_t count, int submit_flags, io_cancellation * cancellation, io_context * context); 114 extern void async_splice(io_future_t & future, int fd_in, loff_t *off_in, int fd_out, loff_t *off_out, size_t len, unsigned int flags, int submit_flags, io_cancellation * cancellation, io_context * context); 115 extern void async_tee(io_future_t & future, int fd_in, int fd_out, size_t len, unsigned int flags, int submit_flags, io_cancellation * cancellation, io_context * context); 116 70 117 71 118 //----------------------------------------------------------------------------- -
libcfa/src/concurrency/kernel.hfa
r1c507eb r7a80113 23 23 24 24 extern "C" { 25 #include <bits/pthreadtypes.h> 25 #include <bits/pthreadtypes.h> 26 #include <linux/types.h> 26 27 } 27 28 … … 157 158 158 159 struct io_cancellation { 159 uint32_ttarget;160 __u64 target; 160 161 }; 161 162 -
libcfa/src/concurrency/monitor.cfa
r1c507eb r7a80113 89 89 __cfaabi_dbg_print_safe( "Kernel : %10p Entering mon %p (%p)\n", thrd, this, this->owner); 90 90 91 if( !this->owner ) { 91 if( unlikely(0 != (0x1 & (uintptr_t)this->owner)) ) { 92 abort( "Attempt by thread \"%.256s\" (%p) to access joined monitor %p.", thrd->self_cor.name, thrd, this ); 93 } 94 else if( !this->owner ) { 92 95 // No one has the monitor, just take it 93 96 __set_owner( this, thrd ); … … 137 140 } 138 141 139 static void __dtor_enter( $monitor * this, fptr_t func ) {142 static void __dtor_enter( $monitor * this, fptr_t func, bool join ) { 140 143 // Lock the monitor spinlock 141 144 lock( this->lock __cfaabi_dbg_ctx2 ); … … 157 160 return; 158 161 } 159 else if( this->owner == thrd ) {162 else if( this->owner == thrd && !join) { 160 163 // We already have the monitor... but where about to destroy it so the nesting will fail 161 164 // Abort! 162 165 abort( "Attempt to destroy monitor %p by thread \"%.256s\" (%p) in nested mutex.", this, thrd->self_cor.name, thrd ); 166 } 167 // SKULLDUGGERY: join will act as a dtor so it would normally trigger to above check 168 // to avoid that it sets the owner to the special value thrd | 1p before exiting 169 else if( this->owner == ($thread*)(1 | (uintptr_t)thrd) ) { 170 // restore the owner and just return 171 __cfaabi_dbg_print_safe( "Kernel : Destroying free mon %p\n", this); 172 173 // No one has the monitor, just take it 174 this->owner = thrd; 175 176 verifyf( kernelTLS.this_thread == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, this->owner, this->recursion, this ); 177 178 unlock( this->lock ); 179 return; 163 180 } 164 181 … … 251 268 252 269 // Leave single monitor for the last time 253 void __dtor_leave( $monitor * this ) {270 void __dtor_leave( $monitor * this, bool join ) { 254 271 __cfaabi_dbg_debug_do( 255 272 if( TL_GET( this_thread ) != this->owner ) { 256 273 abort( "Destroyed monitor %p has inconsistent owner, expected %p got %p.\n", this, TL_GET( this_thread ), this->owner); 257 274 } 258 if( this->recursion != 1 ) {275 if( this->recursion != 1 && !join ) { 259 276 abort( "Destroyed monitor %p has %d outstanding nested calls.\n", this, this->recursion - 1); 260 277 } 261 278 ) 279 280 this->owner = ($thread*)(1 | (uintptr_t)this->owner); 262 281 } 263 282 … … 307 326 } 308 327 328 // Join a thread 329 forall( dtype T | is_thread(T) ) 330 T & join( T & this ) { 331 $monitor * m = get_monitor(this); 332 void (*dtor)(T& mutex this) = ^?{}; 333 monitor_dtor_guard_t __guard = { &m, (fptr_t)dtor, true }; 334 { 335 return this; 336 } 337 } 338 309 339 // Enter multiple monitor 310 340 // relies on the monitor array being sorted … … 366 396 // Ctor for monitor guard 367 397 // Sorts monitors before entering 368 void ?{}( monitor_dtor_guard_t & this, $monitor * m [], fptr_t func ) {398 void ?{}( monitor_dtor_guard_t & this, $monitor * m [], fptr_t func, bool join ) { 369 399 // optimization 370 400 $thread * thrd = TL_GET( this_thread ); … … 376 406 this.prev = thrd->monitors; 377 407 408 // Save whether we are in a join or not 409 this.join = join; 410 378 411 // Update thread context (needed for conditions) 379 412 (thrd->monitors){m, 1, func}; 380 413 381 __dtor_enter( this.m, func );414 __dtor_enter( this.m, func, join ); 382 415 } 383 416 … … 385 418 void ^?{}( monitor_dtor_guard_t & this ) { 386 419 // Leave the monitors in order 387 __dtor_leave( this.m );420 __dtor_leave( this.m, this.join ); 388 421 389 422 // Restore thread context -
libcfa/src/concurrency/monitor.hfa
r1c507eb r7a80113 53 53 $monitor * m; 54 54 __monitor_group_t prev; 55 bool join; 55 56 }; 56 57 57 void ?{}( monitor_dtor_guard_t & this, $monitor ** m, void (*func)() );58 void ?{}( monitor_dtor_guard_t & this, $monitor ** m, void (*func)(), bool join ); 58 59 void ^?{}( monitor_dtor_guard_t & this ); 59 60 -
libcfa/src/concurrency/thread.hfa
r1c507eb r7a80113 106 106 void sleep( Duration duration ); 107 107 108 //---------- 109 // join 110 forall( dtype T | is_thread(T) ) 111 T & join( T & this ); 112 108 113 // Local Variables: // 109 114 // mode: c // -
libcfa/src/exception.h
r1c507eb r7a80113 76 76 // implemented in the .c file either so they all have to be inline. 77 77 78 trait is_exception(dtype T) {78 trait is_exception(dtype exceptT) { 79 79 /* The first field must be a pointer to a virtual table. 80 80 * That virtual table must be a decendent of the base exception virtual tab$ 81 81 */ 82 void mark_exception( T *);82 void mark_exception(exceptT *); 83 83 // This is never used and should be a no-op. 84 84 }; 85 85 86 trait is_termination_exception(dtype T | is_exception(T)) {87 void defaultTerminationHandler( T &);86 trait is_termination_exception(dtype exceptT | is_exception(exceptT)) { 87 void defaultTerminationHandler(exceptT &); 88 88 }; 89 89 90 trait is_resumption_exception(dtype T | is_exception(T)) {91 void defaultResumptionHandler( T &);90 trait is_resumption_exception(dtype exceptT | is_exception(exceptT)) { 91 void defaultResumptionHandler(exceptT &); 92 92 }; 93 93 94 forall(dtype T | is_termination_exception(T))95 static inline void $throw( T & except) {94 forall(dtype exceptT | is_termination_exception(exceptT)) 95 static inline void $throw(exceptT & except) { 96 96 __cfaehm_throw_terminate( 97 97 (exception_t *)&except, … … 100 100 } 101 101 102 forall(dtype T | is_resumption_exception(T))103 static inline void $throwResume( T & except) {102 forall(dtype exceptT | is_resumption_exception(exceptT)) 103 static inline void $throwResume(exceptT & except) { 104 104 __cfaehm_throw_resume( 105 105 (exception_t *)&except, … … 108 108 } 109 109 110 forall(dtype T | is_exception(T))111 static inline void cancel_stack( T & except) __attribute__((noreturn)) {110 forall(dtype exceptT | is_exception(exceptT)) 111 static inline void cancel_stack(exceptT & except) __attribute__((noreturn)) { 112 112 __cfaehm_cancel_stack( (exception_t *)&except ); 113 113 } 114 114 115 forall(dtype T | is_exception(T))116 static inline void defaultTerminationHandler( T & except) {115 forall(dtype exceptT | is_exception(exceptT)) 116 static inline void defaultTerminationHandler(exceptT & except) { 117 117 return cancel_stack( except ); 118 118 } 119 119 120 forall(dtype T | is_exception(T))121 static inline void defaultResumptionHandler( T & except) {120 forall(dtype exceptT | is_exception(exceptT)) 121 static inline void defaultResumptionHandler(exceptT & except) { 122 122 throw except; 123 123 } -
libcfa/src/exception.hfa
r1c507eb r7a80113 192 192 size_t size; \ 193 193 void (*copy)(exception_name * this, exception_name * other); \ 194 void (* free)(exception_name & this); \194 void (*^?{})(exception_name & this); \ 195 195 const char * (*msg)(exception_name * this); \ 196 196 _CLOSE … … 213 213 size_t size; \ 214 214 void (*copy)(exception_name parameters * this, exception_name parameters * other); \ 215 void (* free)(exception_name parameters & this); \215 void (*^?{})(exception_name parameters & this); \ 216 216 const char * (*msg)(exception_name parameters * this); \ 217 217 _CLOSE -
libcfa/src/heap.cfa
r1c507eb r7a80113 10 10 // Created On : Tue Dec 19 21:58:35 2017 11 11 // Last Modified By : Peter A. Buhr 12 // Last Modified On : Thu Sep 3 16:22:54202013 // Update Count : 9 4312 // Last Modified On : Mon Sep 7 22:17:46 2020 13 // Update Count : 957 14 14 // 15 15 … … 889 889 size_t bsize, oalign; 890 890 headers( "resize", oaddr, header, freeElem, bsize, oalign ); 891 892 891 size_t odsize = dataStorage( bsize, oaddr, header ); // data storage available in bucket 892 893 893 // same size, DO NOT preserve STICKY PROPERTIES. 894 if ( oalign <= libAlign() && size <= odsize && odsize <= size * 2 ) { // allow 50% wasted storage for smaller size894 if ( oalign == libAlign() && size <= odsize && odsize <= size * 2 ) { // allow 50% wasted storage for smaller size 895 895 header->kind.real.blockSize &= -2; // no alignment and turn off 0 fill 896 896 header->kind.real.size = size; // reset allocation size … … 931 931 size_t odsize = dataStorage( bsize, oaddr, header ); // data storage available in bucket 932 932 size_t osize = header->kind.real.size; // old allocation size 933 bool ozfill = (header->kind.real.blockSize & 2) != 0; // old allocation zero filled934 if ( unlikely( size <= odsize ) && size > odsize /2 ) { // allow up to 50% wasted storage933 bool ozfill = (header->kind.real.blockSize & 2); // old allocation zero filled 934 if ( unlikely( size <= odsize ) && odsize <= size * 2 ) { // allow up to 50% wasted storage 935 935 header->kind.real.size = size; // reset allocation size 936 936 if ( unlikely( ozfill ) && size > osize ) { // previous request zero fill and larger ? … … 947 947 948 948 void * naddr; 949 if ( likely( oalign <= libAlign() ) ) { // previous request not aligned ?949 if ( likely( oalign == libAlign() ) ) { // previous request not aligned ? 950 950 naddr = mallocNoStats( size ); // create new area 951 951 } else { … … 1231 1231 } // if 1232 1232 1233 // Attempt to reuse existing storage.1233 // Attempt to reuse existing alignment. 1234 1234 HeapManager.Storage.Header * header = headerAddr( oaddr ); 1235 if ( unlikely ( ( header->kind.fake.alignment & 1 == 1 && // old fake header ? 1236 (uintptr_t)oaddr % nalign == 0 && // lucky match ? 1237 header->kind.fake.alignment <= nalign && // ok to leave LSB at 1 1238 nalign <= 128 ) // not too much alignment storage wasted ? 1239 || ( header->kind.fake.alignment & 1 != 1 && // old real header ( aligned on libAlign ) ? 1240 nalign == libAlign() ) ) ) { // new alignment also on libAlign 1241 1242 HeapManager.FreeHeader * freeElem; 1243 size_t bsize, oalign; 1244 headers( "resize", oaddr, header, freeElem, bsize, oalign ); 1245 size_t odsize = dataStorage( bsize, oaddr, header ); // data storage available in bucket 1246 1247 if ( size <= odsize && odsize <= size * 2 ) { // allow 50% wasted data storage 1235 bool isFakeHeader = header->kind.fake.alignment & 1; // old fake header ? 1236 size_t oalign; 1237 if ( isFakeHeader ) { 1238 oalign = header->kind.fake.alignment & -2; // old alignment 1239 if ( (uintptr_t)oaddr % nalign == 0 // lucky match ? 1240 && ( oalign <= nalign // going down 1241 || (oalign >= nalign && oalign <= 256) ) // little alignment storage wasted ? 1242 ) { 1248 1243 headerAddr( oaddr )->kind.fake.alignment = nalign | 1; // update alignment (could be the same) 1249 1250 header->kind.real.blockSize &= -2; // turn off 0 fill 1251 header->kind.real.size = size; // reset allocation size 1252 return oaddr; 1253 } // if 1244 HeapManager.FreeHeader * freeElem; 1245 size_t bsize, oalign; 1246 headers( "resize", oaddr, header, freeElem, bsize, oalign ); 1247 size_t odsize = dataStorage( bsize, oaddr, header ); // data storage available in bucket 1248 1249 if ( size <= odsize && odsize <= size * 2 ) { // allow 50% wasted data storage 1250 headerAddr( oaddr )->kind.fake.alignment = nalign | 1; // update alignment (could be the same) 1251 1252 header->kind.real.blockSize &= -2; // turn off 0 fill 1253 header->kind.real.size = size; // reset allocation size 1254 return oaddr; 1255 } // if 1256 } // if 1257 } else if ( ! isFakeHeader // old real header (aligned on libAlign) ? 1258 && nalign == libAlign() ) { // new alignment also on libAlign => no fake header needed 1259 return resize( oaddr, size ); // duplicate special case checks 1254 1260 } // if 1255 1261 … … 1281 1287 } // if 1282 1288 1283 HeapManager.Storage.Header * header; 1284 HeapManager.FreeHeader * freeElem; 1285 size_t bsize, oalign; 1286 headers( "realloc", oaddr, header, freeElem, bsize, oalign ); 1287 1288 // Attempt to reuse existing storage. 1289 if ( unlikely ( ( header->kind.fake.alignment & 1 == 1 && // old fake header ? 1290 (uintptr_t)oaddr % nalign == 0 && // lucky match ? 1291 header->kind.fake.alignment <= nalign && // ok to leave LSB at 1 1292 nalign <= 128 ) // not too much alignment storage wasted ? 1293 || ( header->kind.fake.alignment & 1 != 1 && // old real header ( aligned on libAlign ) ? 1294 nalign == libAlign() ) ) ) { // new alignment also on libAlign 1295 1296 headerAddr( oaddr )->kind.fake.alignment = nalign | 1; // update alignment (could be the same) 1297 return realloc( oaddr, size ); 1298 1299 } // if 1300 1301 // change size and copy old content to new storage 1289 // Attempt to reuse existing alignment. 1290 HeapManager.Storage.Header * header = headerAddr( oaddr ); 1291 bool isFakeHeader = header->kind.fake.alignment & 1; // old fake header ? 1292 size_t oalign; 1293 if ( isFakeHeader ) { 1294 oalign = header->kind.fake.alignment & -2; // old alignment 1295 if ( (uintptr_t)oaddr % nalign == 0 // lucky match ? 1296 && ( oalign <= nalign // going down 1297 || (oalign >= nalign && oalign <= 256) ) // little alignment storage wasted ? 1298 ) { 1299 headerAddr( oaddr )->kind.fake.alignment = nalign | 1; // update alignment (could be the same) 1300 return realloc( oaddr, size ); // duplicate alignment and special case checks 1301 } // if 1302 } else if ( ! isFakeHeader // old real header (aligned on libAlign) ? 1303 && nalign == libAlign() ) // new alignment also on libAlign => no fake header needed 1304 return realloc( oaddr, size ); // duplicate alignment and special case checks 1302 1305 1303 1306 #ifdef __STATISTICS__ … … 1306 1309 #endif // __STATISTICS__ 1307 1310 1311 HeapManager.FreeHeader * freeElem; 1312 size_t bsize; 1313 headers( "realloc", oaddr, header, freeElem, bsize, oalign ); 1314 1315 // change size and copy old content to new storage 1316 1308 1317 size_t osize = header->kind.real.size; // old allocation size 1309 bool ozfill = (header->kind.real.blockSize & 2) != 0;// old allocation zero filled1318 bool ozfill = (header->kind.real.blockSize & 2); // old allocation zero filled 1310 1319 1311 1320 void * naddr = memalignNoStats( nalign, size ); // create new aligned area -
src/AST/Convert.cpp
r1c507eb r7a80113 1162 1162 } 1163 1163 1164 const ast::Type * postvisit( const ast:: ReferenceToType * old, ReferenceToType * ty ) {1164 const ast::Type * postvisit( const ast::BaseInstType * old, ReferenceToType * ty ) { 1165 1165 ty->forall = get<TypeDecl>().acceptL( old->forall ); 1166 1166 ty->parameters = get<Expression>().acceptL( old->params ); … … 2521 2521 } 2522 2522 2523 void postvisit( const ReferenceToType * old, ast:: ReferenceToType * ty ) {2523 void postvisit( const ReferenceToType * old, ast::BaseInstType * ty ) { 2524 2524 ty->forall = GET_ACCEPT_V( forall, TypeDecl ); 2525 2525 ty->params = GET_ACCEPT_V( parameters, Expr ); -
src/AST/Fwd.hpp
r1c507eb r7a80113 107 107 class QualifiedType; 108 108 class FunctionType; 109 class ReferenceToType;109 class BaseInstType; 110 110 template<typename decl_t> class SueInstType; 111 111 using StructInstType = SueInstType<StructDecl>; -
src/AST/GenericSubstitution.cpp
r1c507eb r7a80113 42 42 private: 43 43 // make substitution for generic type 44 void makeSub( const ReferenceToType * ty ) {44 void makeSub( const BaseInstType * ty ) { 45 45 visit_children = false; 46 46 const AggregateDecl * aggr = ty->aggr(); -
src/AST/Node.cpp
r1c507eb r7a80113 266 266 template class ast::ptr_base< ast::FunctionType, ast::Node::ref_type::weak >; 267 267 template class ast::ptr_base< ast::FunctionType, ast::Node::ref_type::strong >; 268 template class ast::ptr_base< ast:: ReferenceToType, ast::Node::ref_type::weak >;269 template class ast::ptr_base< ast:: ReferenceToType, ast::Node::ref_type::strong >;268 template class ast::ptr_base< ast::BaseInstType, ast::Node::ref_type::weak >; 269 template class ast::ptr_base< ast::BaseInstType, ast::Node::ref_type::strong >; 270 270 template class ast::ptr_base< ast::StructInstType, ast::Node::ref_type::weak >; 271 271 template class ast::ptr_base< ast::StructInstType, ast::Node::ref_type::strong >; -
src/AST/Pass.hpp
r1c507eb r7a80113 50 50 // | PureVisitor - makes the visitor pure, it never modifies nodes in place and always 51 51 // clones nodes it needs to make changes to 52 // | With TypeSubstitution - provides polymorphic const TypeSubstitution * envfor the52 // | WithConstTypeSubstitution - provides polymorphic const TypeSubstitution * typeSubs for the 53 53 // current expression 54 54 // | WithStmtsToAdd - provides the ability to insert statements before or after the current … … 67 67 // | WithSymbolTable - provides symbol table functionality 68 68 // | WithForallSubstitutor - maintains links between TypeInstType and TypeDecl under mutation 69 // 70 // Other Special Members: 71 // | result - Either a method that takes no parameters or a field. If a method (or 72 // callable field) get_result calls it, otherwise the value is returned. 69 73 //------------------------------------------------------------------------------------------------- 70 74 template< typename core_t > … … 89 93 virtual ~Pass() = default; 90 94 95 /// Storage for the actual pass. 96 core_t core; 97 98 /// If the core defines a result, call it if possible, otherwise return it. 99 inline auto get_result() -> decltype( __pass::get_result( core, '0' ) ) { 100 return __pass::get_result( core, '0' ); 101 } 102 91 103 /// Construct and run a pass on a translation unit. 92 104 template< typename... Args > … … 96 108 } 97 109 110 /// Contruct and run a pass on a pointer to extract a value. 111 template< typename node_type, typename... Args > 112 static auto read( node_type const * node, Args&&... args ) { 113 Pass<core_t> visitor( std::forward<Args>( args )... ); 114 node_type const * temp = node->accept( visitor ); 115 assert( temp == node ); 116 return visitor.get_result(); 117 } 118 119 // Versions of the above for older compilers. 98 120 template< typename... Args > 99 121 static void run( std::list< ptr<Decl> > & decls ) { … … 102 124 } 103 125 104 /// Storage for the actual pass 105 core_t core; 126 template< typename node_type, typename... Args > 127 static auto read( node_type const * node ) { 128 Pass<core_t> visitor; 129 node_type const * temp = node->accept( visitor ); 130 assert( temp == node ); 131 return visitor.get_result(); 132 } 106 133 107 134 /// Visit function declarations … … 267 294 //------------------------------------------------------------------------------------------------- 268 295 269 /// Keep track of the polymorphic const TypeSubstitution * env for the current expression270 271 296 /// If used the visitor will always clone nodes. 272 297 struct PureVisitor {}; 273 298 299 /// Keep track of the polymorphic const TypeSubstitution * typeSubs for the current expression. 274 300 struct WithConstTypeSubstitution { 275 const TypeSubstitution * env= nullptr;301 const TypeSubstitution * typeSubs = nullptr; 276 302 }; 277 303 -
src/AST/Pass.impl.hpp
r1c507eb r7a80113 154 154 __pedantic_pass_assert( expr ); 155 155 156 const ast::TypeSubstitution ** env_ptr = __pass::env( core, 0);157 if ( env_ptr && expr->env ) {158 * env_ptr = expr->env;156 const ast::TypeSubstitution ** typeSubs_ptr = __pass::typeSubs( core, 0 ); 157 if ( typeSubs_ptr && expr->env ) { 158 *typeSubs_ptr = expr->env; 159 159 } 160 160 … … 177 177 178 178 // These may be modified by subnode but most be restored once we exit this statemnet. 179 ValueGuardPtr< const ast::TypeSubstitution * > __old_env ( __pass:: env( core, 0) );179 ValueGuardPtr< const ast::TypeSubstitution * > __old_env ( __pass::typeSubs( core, 0 ) ); 180 180 ValueGuardPtr< typename std::remove_pointer< decltype(stmts_before) >::type > __old_decls_before( stmts_before ); 181 181 ValueGuardPtr< typename std::remove_pointer< decltype(stmts_after ) >::type > __old_decls_after ( stmts_after ); … … 1488 1488 1489 1489 // These may be modified by subnode but most be restored once we exit this statemnet. 1490 ValueGuardPtr< const ast::TypeSubstitution * > __old_env( __pass:: env( core, 0) );1490 ValueGuardPtr< const ast::TypeSubstitution * > __old_env( __pass::typeSubs( core, 0 ) ); 1491 1491 ValueGuardPtr< typename std::remove_pointer< decltype(stmts_before) >::type > __old_decls_before( stmts_before ); 1492 1492 ValueGuardPtr< typename std::remove_pointer< decltype(stmts_after ) >::type > __old_decls_after ( stmts_after ); -
src/AST/Pass.proto.hpp
r1c507eb r7a80113 236 236 237 237 // List of fields and their expected types 238 FIELD_PTR( env, const ast::TypeSubstitution * )238 FIELD_PTR( typeSubs, const ast::TypeSubstitution * ) 239 239 FIELD_PTR( stmtsToAddBefore, std::list< ast::ptr< ast::Stmt > > ) 240 240 FIELD_PTR( stmtsToAddAfter , std::list< ast::ptr< ast::Stmt > > ) … … 421 421 422 422 } // namespace forall 423 424 template<typename core_t> 425 static inline auto get_result( core_t & core, char ) -> decltype( core.result() ) { 426 return core.result(); 427 } 428 429 template<typename core_t> 430 static inline auto get_result( core_t & core, int ) -> decltype( core.result ) { 431 return core.result; 432 } 433 434 template<typename core_t> 435 static inline void get_result( core_t &, long ) {} 423 436 } // namespace __pass 424 437 } // namespace ast -
src/AST/Print.cpp
r1c507eb r7a80113 270 270 } 271 271 272 void preprint( const ast:: ReferenceToType * node ) {272 void preprint( const ast::BaseInstType * node ) { 273 273 print( node->forall ); 274 274 print( node->attributes ); -
src/AST/SymbolTable.cpp
r1c507eb r7a80113 313 313 if ( ! expr->result ) continue; 314 314 const Type * resTy = expr->result->stripReferences(); 315 auto aggrType = dynamic_cast< const ReferenceToType * >( resTy );315 auto aggrType = dynamic_cast< const BaseInstType * >( resTy ); 316 316 assertf( aggrType, "WithStmt expr has non-aggregate type: %s", 317 317 toString( expr->result ).c_str() ); … … 654 654 if ( dwt->name == "" ) { 655 655 const Type * t = dwt->get_type()->stripReferences(); 656 if ( auto rty = dynamic_cast<const ReferenceToType *>( t ) ) {656 if ( auto rty = dynamic_cast<const BaseInstType *>( t ) ) { 657 657 if ( ! dynamic_cast<const StructInstType *>(rty) 658 658 && ! dynamic_cast<const UnionInstType *>(rty) ) continue; -
src/AST/Type.cpp
r1c507eb r7a80113 124 124 } 125 125 126 // --- ReferenceToType127 128 void ReferenceToType::initWithSub( const ReferenceToType & o, Pass< ForallSubstitutor > & sub ) {126 // --- BaseInstType 127 128 void BaseInstType::initWithSub( const BaseInstType & o, Pass< ForallSubstitutor > & sub ) { 129 129 ParameterizedType::initWithSub( o, sub ); // initialize substitution 130 130 params = sub.core( o.params ); // apply to parameters 131 131 } 132 132 133 ReferenceToType::ReferenceToType( const ReferenceToType & o )133 BaseInstType::BaseInstType( const BaseInstType & o ) 134 134 : ParameterizedType( o.qualifiers, copy( o.attributes ) ), params(), name( o.name ), 135 135 hoistType( o.hoistType ) { … … 138 138 } 139 139 140 std::vector<readonly<Decl>> ReferenceToType::lookup( const std::string& name ) const {140 std::vector<readonly<Decl>> BaseInstType::lookup( const std::string& name ) const { 141 141 assertf( aggr(), "Must have aggregate to perform lookup" ); 142 142 … … 153 153 SueInstType<decl_t>::SueInstType( 154 154 const decl_t * b, CV::Qualifiers q, std::vector<ptr<Attribute>>&& as ) 155 : ReferenceToType( b->name, q, move(as) ), base( b ) {}155 : BaseInstType( b->name, q, move(as) ), base( b ) {} 156 156 157 157 template<typename decl_t> … … 168 168 TraitInstType::TraitInstType( 169 169 const TraitDecl * b, CV::Qualifiers q, std::vector<ptr<Attribute>>&& as ) 170 : ReferenceToType( b->name, q, move(as) ), base( b ) {}170 : BaseInstType( b->name, q, move(as) ), base( b ) {} 171 171 172 172 // --- TypeInstType 173 173 174 174 TypeInstType::TypeInstType( const TypeInstType & o ) 175 : ReferenceToType( o.name, o.qualifiers, copy( o.attributes ) ), base(), kind( o.kind ) {175 : BaseInstType( o.name, o.qualifiers, copy( o.attributes ) ), base(), kind( o.kind ) { 176 176 Pass< ForallSubstitutor > sub; 177 177 initWithSub( o, sub ); // initialize substitution -
src/AST/Type.hpp
r1c507eb r7a80113 329 329 330 330 /// base class for types that refer to types declared elsewhere (aggregates and typedefs) 331 class ReferenceToType : public ParameterizedType {331 class BaseInstType : public ParameterizedType { 332 332 protected: 333 333 /// Initializes forall and parameters based on substitutor 334 void initWithSub( const ReferenceToType & o, Pass< ForallSubstitutor > & sub );334 void initWithSub( const BaseInstType & o, Pass< ForallSubstitutor > & sub ); 335 335 public: 336 336 std::vector<ptr<Expr>> params; … … 338 338 bool hoistType = false; 339 339 340 ReferenceToType(340 BaseInstType( 341 341 const std::string& n, CV::Qualifiers q = {}, std::vector<ptr<Attribute>> && as = {} ) 342 342 : ParameterizedType(q, std::move(as)), params(), name(n) {} 343 343 344 ReferenceToType( const ReferenceToType & o );344 BaseInstType( const BaseInstType & o ); 345 345 346 346 /// Gets aggregate declaration this type refers to … … 350 350 351 351 private: 352 virtual ReferenceToType * clone() const override = 0;352 virtual BaseInstType * clone() const override = 0; 353 353 MUTATE_FRIEND 354 354 }; … … 356 356 // Common implementation for the SUE instance types. Not to be used directly. 357 357 template<typename decl_t> 358 class SueInstType final : public ReferenceToType {358 class SueInstType final : public BaseInstType { 359 359 public: 360 360 using base_type = decl_t; … … 363 363 SueInstType( 364 364 const std::string& n, CV::Qualifiers q = {}, std::vector<ptr<Attribute>> && as = {} ) 365 : ReferenceToType( n, q, std::move(as) ), base() {}365 : BaseInstType( n, q, std::move(as) ), base() {} 366 366 367 367 SueInstType( … … 388 388 389 389 /// An instance of a trait type. 390 class TraitInstType final : public ReferenceToType {390 class TraitInstType final : public BaseInstType { 391 391 public: 392 392 readonly<TraitDecl> base; … … 394 394 TraitInstType( 395 395 const std::string& n, CV::Qualifiers q = {}, std::vector<ptr<Attribute>> && as = {} ) 396 : ReferenceToType( n, q, std::move(as) ), base() {}396 : BaseInstType( n, q, std::move(as) ), base() {} 397 397 398 398 TraitInstType( … … 411 411 412 412 /// instance of named type alias (typedef or variable) 413 class TypeInstType final : public ReferenceToType {413 class TypeInstType final : public BaseInstType { 414 414 public: 415 415 readonly<TypeDecl> base; … … 419 419 const std::string& n, const TypeDecl * b, CV::Qualifiers q = {}, 420 420 std::vector<ptr<Attribute>> && as = {} ) 421 : ReferenceToType( n, q, std::move(as) ), base( b ), kind( b->kind ) {}421 : BaseInstType( n, q, std::move(as) ), base( b ), kind( b->kind ) {} 422 422 TypeInstType( const std::string& n, TypeDecl::Kind k, CV::Qualifiers q = {}, 423 423 std::vector<ptr<Attribute>> && as = {} ) 424 : ReferenceToType( n, q, std::move(as) ), base(), kind( k ) {}424 : BaseInstType( n, q, std::move(as) ), base(), kind( k ) {} 425 425 426 426 TypeInstType( const TypeInstType & o ); -
src/AST/TypeSubstitution.cpp
r1c507eb r7a80113 176 176 } 177 177 178 void TypeSubstitution::Substituter::handleAggregateType( const ReferenceToType * type ) {178 void TypeSubstitution::Substituter::handleAggregateType( const BaseInstType * type ) { 179 179 GuardValue( boundVars ); 180 180 // bind type variables from forall-qualifiers -
src/AST/TypeSubstitution.hpp
r1c507eb r7a80113 169 169 void previsit( const ParameterizedType * type ); 170 170 /// Records type variable bindings from forall-statements and instantiations of generic types 171 void handleAggregateType( const ReferenceToType * type );171 void handleAggregateType( const BaseInstType * type ); 172 172 173 173 void previsit( const StructInstType * aggregateUseType ); -
src/Common/Stats/Stats.cc
r1c507eb r7a80113 35 35 } 36 36 37 namespace ResolveTime { 38 bool enabled = false; 39 } 40 37 41 struct { 38 42 const char * const opt; … … 43 47 { "heap" , Heap::enabled }, 44 48 { "time" , Time::enabled }, 49 { "resolve" , ResolveTime::enabled }, 45 50 }; 46 51 -
src/Common/module.mk
r1c507eb r7a80113 22 22 Common/ErrorObjects.h \ 23 23 Common/Eval.cc \ 24 Common/Examine.cc \ 25 Common/Examine.h \ 24 26 Common/FilterCombos.h \ 25 27 Common/Indenter.h \ … … 38 40 Common/Stats/Heap.cc \ 39 41 Common/Stats/Heap.h \ 42 Common/Stats/ResolveTime.cc \ 43 Common/Stats/ResolveTime.h \ 40 44 Common/Stats/Stats.cc \ 41 45 Common/Stats/Time.cc \ -
src/Concurrency/Keywords.cc
r1c507eb r7a80113 19 19 #include <string> // for string, operator== 20 20 21 #include <iostream> 22 23 #include "Common/Examine.h" // for isMainFor 21 24 #include "Common/PassVisitor.h" // for PassVisitor 22 25 #include "Common/SemanticError.h" // for SemanticError … … 34 37 #include "SynTree/Type.h" // for StructInstType, Type, PointerType 35 38 #include "SynTree/Visitor.h" // for Visitor, acceptAll 39 #include "Virtual/Tables.h" 36 40 37 41 class Attribute; 38 42 39 43 namespace Concurrency { 44 inline static std::string getVTableName( std::string const & exception_name ) { 45 return exception_name.empty() ? std::string() : Virtual::vtableTypeName(exception_name); 46 } 47 40 48 //============================================================================================= 41 49 // Pass declarations … … 54 62 public: 55 63 56 ConcurrentSueKeyword( std::string&& type_name, std::string&& field_name, std::string&& getter_name, std::string&& context_error, bool needs_main, AggregateDecl::Aggregate cast_target ) : 57 type_name( type_name ), field_name( field_name ), getter_name( getter_name ), context_error( context_error ), needs_main( needs_main ), cast_target( cast_target ) {} 64 ConcurrentSueKeyword( std::string&& type_name, std::string&& field_name, 65 std::string&& getter_name, std::string&& context_error, std::string&& exception_name, 66 bool needs_main, AggregateDecl::Aggregate cast_target ) : 67 type_name( type_name ), field_name( field_name ), getter_name( getter_name ), 68 context_error( context_error ), vtable_name( getVTableName( exception_name ) ), 69 needs_main( needs_main ), cast_target( cast_target ) {} 58 70 59 71 virtual ~ConcurrentSueKeyword() {} … … 63 75 64 76 void handle( StructDecl * ); 77 void addVtableForward( StructDecl * ); 65 78 FunctionDecl * forwardDeclare( StructDecl * ); 66 79 ObjectDecl * addField( StructDecl * ); … … 76 89 const std::string getter_name; 77 90 const std::string context_error; 91 const std::string vtable_name; 78 92 bool needs_main; 79 93 AggregateDecl::Aggregate cast_target; … … 81 95 StructDecl * type_decl = nullptr; 82 96 FunctionDecl * dtor_decl = nullptr; 97 StructDecl * vtable_decl = nullptr; 83 98 }; 84 99 … … 101 116 "get_thread", 102 117 "thread keyword requires threads to be in scope, add #include <thread.hfa>\n", 118 "", 103 119 true, 104 120 AggregateDecl::Thread … … 133 149 "get_coroutine", 134 150 "coroutine keyword requires coroutines to be in scope, add #include <coroutine.hfa>\n", 151 "CoroutineCancelled", 135 152 true, 136 153 AggregateDecl::Coroutine … … 167 184 "get_monitor", 168 185 "monitor keyword requires monitors to be in scope, add #include <monitor.hfa>\n", 186 "", 169 187 false, 170 188 AggregateDecl::Monitor … … 198 216 "get_generator", 199 217 "Unable to find builtin type $generator\n", 218 "", 200 219 true, 201 220 AggregateDecl::Generator … … 231 250 232 251 private: 233 DeclarationWithType * is_main( FunctionDecl * );234 252 bool is_real_suspend( FunctionDecl * ); 235 253 … … 359 377 handle( decl ); 360 378 } 379 else if ( !vtable_decl && vtable_name == decl->name && decl->body ) { 380 vtable_decl = decl; 381 } 382 // Might be able to get ride of is target. 383 assert( is_target(decl) == (cast_target == decl->kind) ); 361 384 return decl; 362 385 } 363 386 364 387 DeclarationWithType * ConcurrentSueKeyword::postmutate( FunctionDecl * decl ) { 365 if( !type_decl ) return decl; 366 if( !CodeGen::isDestructor( decl->name ) ) return decl; 367 368 auto params = decl->type->parameters; 369 if( params.size() != 1 ) return decl; 370 371 auto type = dynamic_cast<ReferenceType*>( params.front()->get_type() ); 372 if( !type ) return decl; 373 374 auto stype = dynamic_cast<StructInstType*>( type->base ); 375 if( !stype ) return decl; 376 if( stype->baseStruct != type_decl ) return decl; 377 378 if( !dtor_decl ) dtor_decl = decl; 388 if ( type_decl && isDestructorFor( decl, type_decl ) ) 389 dtor_decl = decl; 390 else if ( vtable_name.empty() ) 391 ; 392 else if ( auto param = isMainFor( decl, cast_target ) ) { 393 // This should never trigger. 394 assert( vtable_decl ); 395 // Should be safe because of isMainFor. 396 StructInstType * struct_type = static_cast<StructInstType *>( 397 static_cast<ReferenceType *>( param->get_type() )->base ); 398 assert( struct_type ); 399 400 declsToAddAfter.push_back( Virtual::makeVtableInstance( vtable_decl, { 401 new TypeExpr( struct_type->clone() ), 402 }, struct_type, nullptr ) ); 403 } 404 379 405 return decl; 380 406 } … … 400 426 if( !dtor_decl ) SemanticError( decl, context_error ); 401 427 428 addVtableForward( decl ); 402 429 FunctionDecl * func = forwardDeclare( decl ); 403 430 ObjectDecl * field = addField( decl ); 404 431 addRoutines( field, func ); 432 } 433 434 void ConcurrentSueKeyword::addVtableForward( StructDecl * decl ) { 435 if ( vtable_decl ) { 436 declsToAddBefore.push_back( Virtual::makeVtableForward( vtable_decl, { 437 new TypeExpr( new StructInstType( noQualifiers, decl ) ), 438 } ) ); 439 // Its only an error if we want a vtable and don't have one. 440 } else if ( ! vtable_name.empty() ) { 441 SemanticError( decl, context_error ); 442 } 405 443 } 406 444 … … 528 566 // Suspend keyword implementation 529 567 //============================================================================================= 530 DeclarationWithType * SuspendKeyword::is_main( FunctionDecl * func) {531 if(func->name != "main") return nullptr;532 if(func->type->parameters.size() != 1) return nullptr;533 534 auto param = func->type->parameters.front();535 536 auto type = dynamic_cast<ReferenceType * >(param->get_type());537 if(!type) return nullptr;538 539 auto obj = dynamic_cast<StructInstType *>(type->base);540 if(!obj) return nullptr;541 542 if(!obj->baseStruct->is_generator()) return nullptr;543 544 return param;545 }546 547 568 bool SuspendKeyword::is_real_suspend( FunctionDecl * func ) { 548 569 if(isMangled(func->linkage)) return false; // the real suspend isn't mangled … … 565 586 566 587 // Is this the main of a generator? 567 auto param = is _main( func);588 auto param = isMainFor( func, AggregateDecl::Aggregate::Generator ); 568 589 if(!param) return; 569 590 … … 910 931 { 911 932 new SingleInit( new AddressExpr( new VariableExpr( monitors ) ) ), 912 new SingleInit( new CastExpr( new VariableExpr( func ), generic_func->clone(), false ) ) 933 new SingleInit( new CastExpr( new VariableExpr( func ), generic_func->clone(), false ) ), 934 new SingleInit( new ConstantExpr( Constant::from_bool( false ) ) ) 913 935 }, 914 936 noDesignators, … … 1033 1055 // tab-width: 4 // 1034 1056 // End: // 1057 -
src/ResolvExpr/CandidateFinder.cpp
r1c507eb r7a80113 816 816 /// Adds aggregate member interpretations 817 817 void addAggMembers( 818 const ast:: ReferenceToType * aggrInst, const ast::Expr * expr,818 const ast::BaseInstType * aggrInst, const ast::Expr * expr, 819 819 const Candidate & cand, const Cost & addedCost, const std::string & name 820 820 ) { … … 1263 1263 1264 1264 void postvisit( const ast::UntypedOffsetofExpr * offsetofExpr ) { 1265 const ast:: ReferenceToType * aggInst;1265 const ast::BaseInstType * aggInst; 1266 1266 if (( aggInst = offsetofExpr->type.as< ast::StructInstType >() )) ; 1267 1267 else if (( aggInst = offsetofExpr->type.as< ast::UnionInstType >() )) ; -
src/ResolvExpr/ConversionCost.cc
r1c507eb r7a80113 520 520 return convertToReferenceCost( src, refType, srcIsLvalue, symtab, env, localPtrsAssignable ); 521 521 } else { 522 ast::Pass<ConversionCost_new> converter( dst, srcIsLvalue, symtab, env, localConversionCost ); 523 src->accept( converter ); 524 return converter.core.cost; 522 return ast::Pass<ConversionCost_new>::read( src, dst, srcIsLvalue, symtab, env, localConversionCost ); 525 523 } 526 524 } … … 563 561 } 564 562 } else { 565 ast::Pass<ConversionCost_new> converter( dst, srcIsLvalue, symtab, env, localConversionCost ); 566 src->accept( converter ); 567 return converter.core.cost; 563 return ast::Pass<ConversionCost_new>::read( src, dst, srcIsLvalue, symtab, env, localConversionCost ); 568 564 } 569 565 } else { -
src/ResolvExpr/ConversionCost.h
r1c507eb r7a80113 88 88 static size_t traceId; 89 89 Cost cost; 90 Cost result() { return cost; } 90 91 91 92 ConversionCost_new( const ast::Type * dst, bool srcIsLvalue, const ast::SymbolTable & symtab, -
src/ResolvExpr/CurrentObject.cc
r1c507eb r7a80113 923 923 924 924 MemberIterator * createMemberIterator( const CodeLocation & loc, const Type * type ) { 925 if ( auto aggr = dynamic_cast< const ReferenceToType * >( type ) ) {925 if ( auto aggr = dynamic_cast< const BaseInstType * >( type ) ) { 926 926 if ( auto sit = dynamic_cast< const StructInstType * >( aggr ) ) { 927 927 return new StructIterator{ loc, sit }; … … 932 932 dynamic_cast< const EnumInstType * >( type ) 933 933 || dynamic_cast< const TypeInstType * >( type ), 934 "Encountered unhandled ReferenceToType in createMemberIterator: %s",934 "Encountered unhandled BaseInstType in createMemberIterator: %s", 935 935 toString( type ).c_str() ); 936 936 return new SimpleIterator{ loc, type }; … … 965 965 DesignatorChain & d = *dit; 966 966 PRINT( std::cerr << "____actual: " << t << std::endl; ) 967 if ( auto refType = dynamic_cast< const ReferenceToType * >( t ) ) {967 if ( auto refType = dynamic_cast< const BaseInstType * >( t ) ) { 968 968 // concatenate identical field names 969 969 for ( const Decl * mem : refType->lookup( nexpr->name ) ) { -
src/ResolvExpr/Resolver.cc
r1c507eb r7a80113 38 38 #include "Common/PassVisitor.h" // for PassVisitor 39 39 #include "Common/SemanticError.h" // for SemanticError 40 #include "Common/Stats/ResolveTime.h" // for ResolveTime::start(), ResolveTime::stop() 40 41 #include "Common/utility.h" // for ValueGuard, group_iterate 41 42 #include "InitTweak/GenInit.h" … … 965 966 /// Finds deleted expressions in an expression tree 966 967 struct DeleteFinder_new final : public ast::WithShortCircuiting { 967 const ast::DeletedExpr * delExpr= nullptr;968 const ast::DeletedExpr * result = nullptr; 968 969 969 970 void previsit( const ast::DeletedExpr * expr ) { 970 if ( delExpr) { visit_children = false; }971 else { delExpr= expr; }971 if ( result ) { visit_children = false; } 972 else { result = expr; } 972 973 } 973 974 974 975 void previsit( const ast::Expr * ) { 975 if ( delExpr) { visit_children = false; }976 if ( result ) { visit_children = false; } 976 977 } 977 978 }; … … 980 981 /// Check if this expression is or includes a deleted expression 981 982 const ast::DeletedExpr * findDeletedExpr( const ast::Expr * expr ) { 982 ast::Pass<DeleteFinder_new> finder; 983 expr->accept( finder ); 984 return finder.core.delExpr; 983 return ast::Pass<DeleteFinder_new>::read( expr ); 985 984 } 986 985 … … 1171 1170 const ast::Expr * untyped, const ast::SymbolTable & symtab 1172 1171 ) { 1173 return findKindExpression( untyped, symtab ); 1172 Stats::ResolveTime::start( untyped ); 1173 auto res = findKindExpression( untyped, symtab ); 1174 Stats::ResolveTime::stop(); 1175 return res; 1174 1176 } 1175 1177 } // anonymous namespace … … 1261 1263 const ast::ThrowStmt * previsit( const ast::ThrowStmt * ); 1262 1264 const ast::CatchStmt * previsit( const ast::CatchStmt * ); 1265 const ast::CatchStmt * postvisit( const ast::CatchStmt * ); 1263 1266 const ast::WaitForStmt * previsit( const ast::WaitForStmt * ); 1264 1267 … … 1493 1496 1494 1497 const ast::CatchStmt * Resolver_new::previsit( const ast::CatchStmt * catchStmt ) { 1495 // TODO: This will need a fix for the decl/cond scoping problem. 1498 // Until we are very sure this invarent (ifs that move between passes have thenPart) 1499 // holds, check it. This allows a check for when to decode the mangling. 1500 if ( auto ifStmt = catchStmt->body.as<ast::IfStmt>() ) { 1501 assert( ifStmt->thenPart ); 1502 } 1503 // Encode the catchStmt so the condition can see the declaration. 1496 1504 if ( catchStmt->cond ) { 1497 ast::ptr< ast::Type > boolType = new ast::BasicType{ ast::BasicType::Bool }; 1498 catchStmt = ast::mutate_field( 1499 catchStmt, &ast::CatchStmt::cond, 1500 findSingleExpression( catchStmt->cond, boolType, symtab ) ); 1505 ast::CatchStmt * stmt = mutate( catchStmt ); 1506 stmt->body = new ast::IfStmt( stmt->location, stmt->cond, nullptr, stmt->body ); 1507 stmt->cond = nullptr; 1508 return stmt; 1509 } 1510 return catchStmt; 1511 } 1512 1513 const ast::CatchStmt * Resolver_new::postvisit( const ast::CatchStmt * catchStmt ) { 1514 // Decode the catchStmt so everything is stored properly. 1515 const ast::IfStmt * ifStmt = catchStmt->body.as<ast::IfStmt>(); 1516 if ( nullptr != ifStmt && nullptr == ifStmt->thenPart ) { 1517 assert( ifStmt->cond ); 1518 assert( ifStmt->elsePart ); 1519 ast::CatchStmt * stmt = ast::mutate( catchStmt ); 1520 stmt->cond = ifStmt->cond; 1521 stmt->body = ifStmt->elsePart; 1522 // ifStmt should be implicately deleted here. 1523 return stmt; 1501 1524 } 1502 1525 return catchStmt; -
src/SymTab/Mangler.cc
r1c507eb r7a80113 437 437 private: 438 438 void mangleDecl( const ast::DeclWithType *declaration ); 439 void mangleRef( const ast:: ReferenceToType *refType, std::string prefix );439 void mangleRef( const ast::BaseInstType *refType, std::string prefix ); 440 440 441 441 void printQualifiers( const ast::Type *type ); … … 560 560 } 561 561 562 void Mangler_new::mangleRef( const ast:: ReferenceToType * refType, std::string prefix ) {562 void Mangler_new::mangleRef( const ast::BaseInstType * refType, std::string prefix ) { 563 563 printQualifiers( refType ); 564 564 -
src/SymTab/Validate.cc
r1c507eb r7a80113 960 960 } 961 961 962 static bool isNonParameterAttribute( Attribute * attr ) { 963 static const std::vector<std::string> bad_names = { 964 "aligned", "__aligned__", 965 }; 966 for ( auto name : bad_names ) { 967 if ( name == attr->name ) { 968 return true; 969 } 970 } 971 return false; 972 } 973 962 974 Type * ReplaceTypedef::postmutate( TypeInstType * typeInst ) { 963 975 // instances of typedef types will come here. If it is an instance … … 968 980 ret->location = typeInst->location; 969 981 ret->get_qualifiers() |= typeInst->get_qualifiers(); 970 // attributes are not carried over from typedef to function parameters/return values 971 if ( ! inFunctionType ) { 972 ret->attributes.splice( ret->attributes.end(), typeInst->attributes ); 973 } else { 974 deleteAll( ret->attributes ); 975 ret->attributes.clear(); 976 } 982 // GCC ignores certain attributes if they arrive by typedef, this mimics that. 983 if ( inFunctionType ) { 984 ret->attributes.remove_if( isNonParameterAttribute ); 985 } 986 ret->attributes.splice( ret->attributes.end(), typeInst->attributes ); 977 987 // place instance parameters on the typedef'd type 978 988 if ( ! typeInst->parameters.empty() ) { … … 1508 1518 } 1509 1519 1510 void checkGenericParameters( const ast:: ReferenceToType * inst ) {1520 void checkGenericParameters( const ast::BaseInstType * inst ) { 1511 1521 for ( const ast::Expr * param : inst->params ) { 1512 1522 if ( ! dynamic_cast< const ast::TypeExpr * >( param ) ) { -
src/Virtual/module.mk
r1c507eb r7a80113 15 15 ############################################################################### 16 16 17 SRC += Virtual/ExpandCasts.cc Virtual/ExpandCasts.h 17 SRC += Virtual/ExpandCasts.cc Virtual/ExpandCasts.h \ 18 Virtual/Tables.cc Virtual/Tables.h 19 20 SRCDEMANGLE += Virtual/Tables.cc -
tests/Makefile.am
r1c507eb r7a80113 38 38 # since automake doesn't have support for CFA we have to 39 39 AM_CFLAGS = $(if $(test), 2> $(test), ) \ 40 -fdebug-prefix-map=$(abspath ${abs_srcdir})= \ 41 -fdebug-prefix-map=/tmp= \ 40 42 -g \ 41 43 -Wall \ … … 58 60 # adjusted CC but without the actual distcc call 59 61 CFACCLOCAL = $(if $(DISTCC_CFA_PATH),$(DISTCC_CFA_PATH) ${ARCH_FLAGS},$(TARGET_CFA) ${DEBUG_FLAGS} ${ARCH_FLAGS}) 62 CFACCLINK = $(CFACCLOCAL) $(if $(test), 2> $(test), ) $($(shell echo "${@}_FLAGSLD" | sed 's/-\|\//_/g')) 60 63 61 64 PRETTY_PATH=mkdir -p $(dir $(abspath ${@})) && cd ${srcdir} && … … 110 113 % : %.cfa $(CFACCBIN) 111 114 $(CFACOMPILETEST) -c -o $(abspath ${@}).o 112 $(CFACCLOCAL) $($(shell echo "${@}_FLAGSLD" | sed 's/-\|\//_/g')) $(abspath ${@}).o -o $(abspath ${@}) 115 $(CFACCLINK) ${@}.o -o $(abspath ${@}) 116 rm $(abspath ${@}).o 113 117 114 118 # implicit rule for c++ test … … 137 141 # CUSTOM TARGET 138 142 #------------------------------------------------------------------------------ 143 # tests that just validate syntax 144 expression : expression.cfa $(CFACCBIN) 145 $(CFACOMPILETEST) -c -fsyntax-only 2> $(abspath ${@}) 146 139 147 # expected failures 140 148 # use custom target since they require a custom define and custom dependencies … … 170 178 $(CFACCLOCAL) $($(shell echo "${@}_FLAGSLD" | sed 's/-\|\//_/g')) $(abspath ${@}).o -o $(abspath ${@}) 171 179 180 # Linking tests 181 # Meta tests to make sure we see linking errors (can't compile with -O2 since it may multiply number of calls) 182 linking/linkerror : linking/linkerror.cfa $(CFACCBIN) 183 $(CFACOMPILETEST) -O0 -c -o $(abspath ${@}).o 184 $(CFACCLINK) -O0 ${@}.o -o $(abspath ${@}) 185 rm $(abspath ${@}).o 186 172 187 #------------------------------------------------------------------------------ 173 188 # Other targets -
tests/alloc2.cfa
r1c507eb r7a80113 13 13 void test_base( void * ip, size_t size, size_t align) { 14 14 tests_total += 1; 15 // printf("DEBUG: starting test %d\n", tests_total); 15 16 bool passed = (malloc_size(ip) == size) && (malloc_usable_size(ip) >= size) && (malloc_alignment(ip) == align) && ((uintptr_t)ip % align == 0); 16 17 if (!passed) { … … 18 19 tests_failed += 1; 19 20 } 21 // printf("DEBUG: done test %d\n", tests_total); 20 22 } 21 23 22 24 void test_fill( void * ip_, size_t start, size_t end, char fill) { 23 25 tests_total += 1; 26 // printf("DEBUG: starting test %d\n", tests_total); 24 27 bool passed = true; 25 28 char * ip = (char *) ip_; … … 29 32 tests_failed += 1; 30 33 } 34 // printf("DEBUG: done test %d\n", tests_total); 31 35 } 32 36 33 37 void test_fill( void * ip_, size_t start, size_t end, int fill) { 34 38 tests_total += 1; 39 // printf("DEBUG: starting test %d\n", tests_total); 35 40 bool passed = true; 36 41 int * ip = (int *) ip_; … … 40 45 tests_failed += 1; 41 46 } 47 // printf("DEBUG: done test %d\n", tests_total); 42 48 } 43 49 44 50 void test_fill( void * ip_, size_t start, size_t end, int * fill) { 45 51 tests_total += 1; 52 // printf("DEBUG: starting test %d\n", tests_total); 46 53 bool passed = (memcmp((void*)((uintptr_t)ip_ + start), (void*)fill, end) == 0); 47 54 if (!passed) { … … 49 56 tests_failed += 1; 50 57 } 58 // printf("DEBUG: done test %d\n", tests_total); 51 59 } 52 60 53 61 void test_fill( void * ip_, size_t start, size_t end, T1 fill) { 54 62 tests_total += 1; 63 // printf("DEBUG: starting test %d\n", tests_total); 55 64 bool passed = true; 56 65 T1 * ip = (T1 *) ip_; … … 60 69 tests_failed += 1; 61 70 } 71 // printf("DEBUG: done test %d\n", tests_total); 62 72 } 63 73 64 74 void test_fill( void * ip_, size_t start, size_t end, T1 * fill) { 65 75 tests_total += 1; 76 // printf("DEBUG: starting test %d\n", tests_total); 66 77 bool passed = (memcmp((void*)((uintptr_t)ip_ + start), (void*)fill, end) == 0); 67 78 if (!passed) { … … 69 80 tests_failed += 1; 70 81 } 82 // printf("DEBUG: done test %d\n", tests_total); 71 83 } 72 84 73 85 void test_use( int * ip, size_t dim) { 74 86 tests_total += 1; 87 // printf("DEBUG: starting test %d\n", tests_total); 75 88 bool passed = true; 76 89 for (i; 0 ~ dim) ip[i] = 0xdeadbeef; … … 80 93 tests_failed += 1; 81 94 } 95 // printf("DEBUG: done test %d\n", tests_total); 82 96 } 83 97 84 98 void test_use( T1 * ip, size_t dim) { 85 99 tests_total += 1; 100 // printf("DEBUG: starting test %d\n", tests_total); 86 101 bool passed = true; 87 102 for (i; 0 ~ dim) ip[i].data = 0xdeadbeef; … … 91 106 tests_failed += 1; 92 107 } 108 // printf("DEBUG: done test %d\n", tests_total); 93 109 } 94 110 -
tests/heap.cfa
r1c507eb r7a80113 10 10 // Created On : Tue Nov 6 17:54:56 2018 11 11 // Last Modified By : Peter A. Buhr 12 // Last Modified On : Sun Aug 9 08:05:16202013 // Update Count : 5712 // Last Modified On : Mon Sep 7 18:37:41 2020 13 // Update Count : 72 14 14 // 15 15 … … 205 205 free( area ); 206 206 } // for 207 } // for 208 209 // check malloc/resize/free (sbrk) 210 211 for ( i; 2 ~ NoOfAllocs ~ 12 ) { 212 // initial N byte allocation 213 char * area = (char *)malloc( i ); 214 area[0] = '\345'; area[i - 1] = '\345'; // fill first/penultimate byte 215 216 // Do not start this loop index at 0 because resize of 0 bytes frees the storage. 217 int prev = i; 218 for ( s; i ~ 256 * 1024 ~ 26 ) { // start at initial memory request 219 if ( area[0] != '\345' || area[prev - 1] != '\345' ) abort( "malloc/resize/free corrupt storage" ); 220 area = (char *)resize( area, s ); // attempt to reuse storage 221 area[0] = area[s - 1] = '\345'; // fill last byte 222 prev = s; 223 } // for 224 free( area ); 225 } // for 226 227 // check malloc/resize/free (mmap) 228 229 for ( i; 2 ~ NoOfAllocs ~ 12 ) { 230 // initial N byte allocation 231 size_t s = i + default_mmap_start(); // cross over point 232 char * area = (char *)malloc( s ); 233 area[0] = '\345'; area[s - 1] = '\345'; // fill first/penultimate byte 234 235 // Do not start this loop index at 0 because resize of 0 bytes frees the storage. 236 int prev = s; 237 for ( r; s ~ 256 * 1024 ~ 26 ) { // start at initial memory request 238 if ( area[0] != '\345' || area[prev - 1] != '\345' ) abort( "malloc/resize/free corrupt storage" ); 239 area = (char *)resize( area, s ); // attempt to reuse storage 240 area[0] = area[r - 1] = '\345'; // fill last byte 241 prev = r; 242 } // for 243 free( area ); 244 } // for 245 246 // check malloc/realloc/free (sbrk) 247 248 for ( i; 2 ~ NoOfAllocs ~ 12 ) { 249 // initial N byte allocation 250 char * area = (char *)malloc( i ); 251 area[0] = '\345'; area[i - 1] = '\345'; // fill first/penultimate byte 252 253 // Do not start this loop index at 0 because realloc of 0 bytes frees the storage. 254 int prev = i; 255 for ( s; i ~ 256 * 1024 ~ 26 ) { // start at initial memory request 256 if ( area[0] != '\345' || area[prev - 1] != '\345' ) abort( "malloc/realloc/free corrupt storage" ); 257 area = (char *)realloc( area, s ); // attempt to reuse storage 258 area[s - 1] = '\345'; // fill last byte 259 prev = s; 260 } // for 261 free( area ); 262 } // for 263 264 // check malloc/realloc/free (mmap) 265 266 for ( i; 2 ~ NoOfAllocs ~ 12 ) { 267 // initial N byte allocation 268 size_t s = i + default_mmap_start(); // cross over point 269 char * area = (char *)malloc( s ); 270 area[0] = '\345'; area[s - 1] = '\345'; // fill first/penultimate byte 271 272 // Do not start this loop index at 0 because realloc of 0 bytes frees the storage. 273 int prev = s; 274 for ( r; s ~ 256 * 1024 ~ 26 ) { // start at initial memory request 275 if ( area[0] != '\345' || area[prev - 1] != '\345' ) abort( "malloc/realloc/free corrupt storage" ); 276 area = (char *)realloc( area, s ); // attempt to reuse storage 277 area[r - 1] = '\345'; // fill last byte 278 prev = r; 279 } // for 280 free( area ); 207 281 } // for 208 282 … … 320 394 } // for 321 395 396 // check memalign/resize with align/free 397 398 amount = 2; 399 for ( a; libAlign() ~= limit ~ a ) { // generate powers of 2 400 // initial N byte allocation 401 char * area = (char *)memalign( a, amount ); // aligned N-byte allocation 402 //sout | alignments[a] | area | endl; 403 if ( (size_t)area % a != 0 || malloc_alignment( area ) != a ) { // check for initial alignment 404 abort( "memalign/resize with align/free bad alignment : memalign(%d,%d) = %p", (int)a, (int)amount, area ); 405 } // if 406 area[0] = '\345'; area[amount - 2] = '\345'; // fill first/penultimate byte 407 408 // Do not start this loop index at 0 because resize of 0 bytes frees the storage. 409 for ( s; amount ~ 256 * 1024 ) { // start at initial memory request 410 area = (char *)resize( area, a * 2, s ); // attempt to reuse storage 411 //sout | i | area | endl; 412 if ( (size_t)area % a * 2 != 0 ) { // check for initial alignment 413 abort( "memalign/resize with align/free bad alignment %p", area ); 414 } // if 415 area[s - 1] = '\345'; // fill last byte 416 } // for 417 free( area ); 418 } // for 419 322 420 // check memalign/realloc with align/free 323 421 -
tests/pybin/tools.py
r1c507eb r7a80113 120 120 return None 121 121 122 file = open(file, mode )122 file = open(file, mode, encoding="latin-1") # use latin-1 so all chars mean something. 123 123 exitstack.push(file) 124 124 return file -
tests/test.py
r1c507eb r7a80113 207 207 else: 208 208 if os.stat(out_file).st_size < 1048576: 209 with open (out_file, "r" ) as myfile:209 with open (out_file, "r", encoding='latin-1') as myfile: # use latin-1 so all chars mean something. 210 210 error = myfile.read() 211 211 else:
Note: See TracChangeset
for help on using the changeset viewer.