Changes in / [342af53:4468a70]
- Files:
-
- 2 added
- 39 deleted
- 45 edited
-
Jenkins/FullBuild (modified) (1 diff)
-
benchmark/creation/node_cor.js (modified) (1 diff)
-
benchmark/ctxswitch/node_cor.js (modified) (1 diff)
-
benchmark/io/http/Makefile.am (modified) (1 diff)
-
benchmark/io/http/main.cfa (modified) (7 diffs)
-
benchmark/io/http/options.cfa (modified) (2 diffs)
-
benchmark/io/http/options.hfa (modified) (1 diff)
-
benchmark/io/http/protocol.cfa (modified) (8 diffs)
-
benchmark/io/http/protocol.hfa (modified) (2 diffs)
-
benchmark/io/http/worker.cfa (modified) (6 diffs)
-
benchmark/io/http/worker.hfa (modified) (1 diff)
-
benchmark/io/readv.cfa (modified) (1 diff)
-
benchmark/vector/glm_vec2.cc (deleted)
-
benchmark/vector/vec2_notemplates.cfa (deleted)
-
benchmark/vector/vec2_templates.cfa (deleted)
-
doc/bibliography/pl.bib (modified) (6 diffs)
-
doc/theses/andrew_beach_MMath/existing.tex (modified) (3 diffs)
-
doc/theses/andrew_beach_MMath/implement.tex (deleted)
-
doc/theses/fangren_yu_COOP_F20/Makefile (deleted)
-
doc/theses/fangren_yu_COOP_F20/Report.tex (deleted)
-
doc/theses/fangren_yu_COOP_S20/Report.tex (modified) (2 diffs)
-
doc/theses/fangren_yu_COOP_S20/cfa_developer_reference.pdf (added)
-
doc/theses/fangren_yu_COOP_S20/figures/DeepNodeSharing.fig.bak (added)
-
doc/theses/thierry_delisle_PhD/thesis/Makefile (modified) (3 diffs)
-
doc/theses/thierry_delisle_PhD/thesis/text/core.tex (modified) (2 diffs)
-
doc/theses/thierry_delisle_PhD/thesis/text/io.tex (modified) (2 diffs)
-
doc/theses/thierry_delisle_PhD/thesis/text/runtime.tex (modified) (1 diff)
-
doc/theses/thierry_delisle_PhD/thesis/thesis.tex (modified) (6 diffs)
-
example/io/simple/server_epoll.c (modified) (2 diffs)
-
libcfa/src/Makefile.am (modified) (2 diffs)
-
libcfa/src/bits/locks.hfa (modified) (3 diffs)
-
libcfa/src/concurrency/future.hfa (deleted)
-
libcfa/src/concurrency/io.cfa (modified) (22 diffs)
-
libcfa/src/concurrency/io/call.cfa.in (modified) (4 diffs)
-
libcfa/src/concurrency/io/setup.cfa (modified) (13 diffs)
-
libcfa/src/concurrency/io/types.hfa (modified) (4 diffs)
-
libcfa/src/concurrency/monitor.hfa (modified) (1 diff)
-
libcfa/src/heap.cfa (modified) (25 diffs)
-
libcfa/src/startup.cfa (modified) (2 diffs)
-
libcfa/src/vec/vec.hfa (deleted)
-
libcfa/src/vec/vec2.hfa (deleted)
-
libcfa/src/vec/vec3.hfa (deleted)
-
libcfa/src/vec/vec4.hfa (deleted)
-
src/AST/Decl.cpp (modified) (3 diffs)
-
src/AST/Decl.hpp (modified) (2 diffs)
-
src/Parser/DeclarationNode.cc (modified) (2 diffs)
-
src/Parser/ParseNode.h (modified) (2 diffs)
-
src/Parser/parser.yy (modified) (9 diffs)
-
src/ResolvExpr/ResolveAssertions.cc (modified) (1 diff)
-
src/ResolvExpr/SatisfyAssertions.cpp (modified) (17 diffs)
-
src/SymTab/Demangle.cc (modified) (2 diffs)
-
src/SymTab/Mangler.cc (modified) (4 diffs)
-
src/SymTab/ManglerCommon.cc (modified) (2 diffs)
-
src/SynTree/Declaration.h (modified) (2 diffs)
-
src/SynTree/TypeDecl.cc (modified) (3 diffs)
-
tests/Makefile.am (modified) (1 diff)
-
tests/concurrent/futures/.expect/abandon.txt (deleted)
-
tests/concurrent/futures/.expect/basic.txt (modified) (1 diff)
-
tests/concurrent/futures/.expect/multi.txt (deleted)
-
tests/concurrent/futures/.expect/typed.txt (deleted)
-
tests/concurrent/futures/abandon.cfa (deleted)
-
tests/concurrent/futures/basic.cfa (modified) (2 diffs)
-
tests/concurrent/futures/multi.cfa (deleted)
-
tests/concurrent/futures/typed.cfa (deleted)
-
tests/vector_math/.expect/vec2_double.txt (deleted)
-
tests/vector_math/.expect/vec2_float.txt (deleted)
-
tests/vector_math/.expect/vec2_int.txt (deleted)
-
tests/vector_math/.expect/vec2_ldouble.txt (deleted)
-
tests/vector_math/.expect/vec2_uint.txt (deleted)
-
tests/vector_math/.expect/vec3_float.txt (deleted)
-
tests/vector_math/.expect/vec3_int.txt (deleted)
-
tests/vector_math/.expect/vec4_float.txt (deleted)
-
tests/vector_math/.expect/vec4_int.txt (deleted)
-
tests/vector_math/glm_equivalents/vec2_float.cc (deleted)
-
tests/vector_math/glm_equivalents/vec2_int.cc (deleted)
-
tests/vector_math/glm_equivalents/vec3_float.cc (deleted)
-
tests/vector_math/glm_equivalents/vec4_float.cc (deleted)
-
tests/vector_math/vec2_double.cfa (deleted)
-
tests/vector_math/vec2_float.cfa (deleted)
-
tests/vector_math/vec2_int.cfa (deleted)
-
tests/vector_math/vec2_ldouble.cfa (deleted)
-
tests/vector_math/vec2_uint.cfa (deleted)
-
tests/vector_math/vec3_float.cfa (deleted)
-
tests/vector_math/vec3_int.cfa (deleted)
-
tests/vector_math/vec4_float.cfa (deleted)
-
tests/vector_math/vec4_int.cfa (deleted)
Legend:
- Unmodified
- Added
- Removed
-
Jenkins/FullBuild
r342af53 r4468a70 108 108 string(name: 'GitRef', value: commitId), \ 109 109 string(name: 'Build' , value: buildNum) \ 110 ], \ 111 propagate: false 110 ] 112 111 113 112 echo(result.result) -
benchmark/creation/node_cor.js
r342af53 r4468a70 6 6 function * coroutine() { yield } 7 7 8 for ( var i = 0; i < times; i += 1 ) { // warm JIT8 for ( var i = 0; i < times; i += 1 ) { // warm jit 9 9 cor = coroutine() 10 10 } -
benchmark/ctxswitch/node_cor.js
r342af53 r4468a70 11 11 cor = coroutine() 12 12 13 for ( var i = 0; i < times; i += 1 ) { // warm JIT13 for ( var i = 0; i < times; i += 1 ) { // warm git 14 14 cor.next(); 15 15 } -
benchmark/io/http/Makefile.am
r342af53 r4468a70 29 29 EXTRA_PROGRAMS = httpforall .dummy_hack 30 30 31 CLEANFILES = httpforall32 33 31 nodist_httpforall_SOURCES = \ 34 32 filecache.cfa \ -
benchmark/io/http/main.cfa
r342af53 r4468a70 46 46 } 47 47 48 extern void init_protocol(void);49 extern void deinit_protocol(void);50 51 48 //============================================================================================= 52 49 // Main … … 64 61 //=================== 65 62 // Open Socket 66 printf(" %ld : Listening on port %d\n", getpid(), options.socket.port);63 printf("Listening on port %d\n", options.socket.port); 67 64 int server_fd = socket(AF_INET, SOCK_STREAM, 0); 68 65 if(server_fd < 0) { … … 82 79 ret = bind( server_fd, (struct sockaddr *)&address, sizeof(address) ); 83 80 if(ret < 0) { 84 if(errno == EADDRINUSE) {81 if(errno == 98) { 85 82 if(waited == 0) { 86 83 printf("Waiting for port\n"); … … 112 109 options.clopts.instance = &cl; 113 110 114 115 111 int pipe_cnt = options.clopts.nworkers * 2; 116 112 int pipe_off; … … 128 124 { 129 125 ServerProc procs[options.clopts.nprocs]; 130 131 init_protocol();132 126 { 133 127 Worker workers[options.clopts.nworkers]; … … 157 151 printf("Shutting Down\n"); 158 152 } 159 160 for(i; options.clopts.nworkers) {161 printf("Cancelling %p\n", (void*)workers[i].cancel.target);162 workers[i].done = true;163 cancel(workers[i].cancel);164 }165 166 printf("Shutting down socket\n");167 int ret = shutdown( server_fd, SHUT_RD );168 if( ret < 0 ) { abort( "shutdown error: (%d) %s\n", (int)errno, strerror(errno) ); }169 170 //===================171 // Close Socket172 printf("Closing Socket\n");173 ret = close( server_fd );174 if(ret < 0) {175 abort( "close socket error: (%d) %s\n", (int)errno, strerror(errno) );176 }177 153 } 178 154 printf("Workers Closed\n"); 179 180 deinit_protocol();181 155 } 182 156 … … 188 162 } 189 163 free(fds); 164 } 190 165 166 //=================== 167 // Close Socket 168 printf("Closing Socket\n"); 169 ret = close( server_fd ); 170 if(ret < 0) { 171 abort( "close socket error: (%d) %s\n", (int)errno, strerror(errno) ); 191 172 } 192 173 -
benchmark/io/http/options.cfa
r342af53 r4468a70 12 12 #include <parseargs.hfa> 13 13 14 #include <string.h>15 16 14 Options options @= { 17 false, // log18 19 15 { // file_cache 20 16 0, // open_flags; … … 52 48 {'p', "port", "Port the server will listen on", options.socket.port}, 53 49 {'c', "cpus", "Number of processors to use", options.clopts.nprocs}, 54 {'L', "log", "Enable logs", options.log, parse_settrue},55 50 {'t', "threads", "Number of worker threads to use", options.clopts.nworkers}, 56 51 {'b', "accept-backlog", "Maximum number of pending accepts", options.socket.backlog}, -
benchmark/io/http/options.hfa
r342af53 r4468a70 8 8 9 9 struct Options { 10 bool log;11 12 10 struct { 13 11 int open_flags; -
benchmark/io/http/protocol.cfa
r342af53 r4468a70 18 18 #include "options.hfa" 19 19 20 const char * volatile date = 0p;21 22 20 const char * http_msgs[] = { 23 "HTTP/1.1 200 OK\n Server: HttoForall\nDate: %s \nContent-Type: text/plain\nContent-Length: %zu\n\n",24 "HTTP/1.1 400 Bad Request\n Server: HttoForall\nDate: %s \nContent-Type: text/plain\nContent-Length: 0\n\n",25 "HTTP/1.1 404 Not Found\n Server: HttoForall\nDate: %s \nContent-Type: text/plain\nContent-Length: 0\n\n",26 "HTTP/1.1 413 Payload Too Large\n Server: HttoForall\nDate: %s \nContent-Type: text/plain\nContent-Length: 0\n\n",27 "HTTP/1.1 414 URI Too Long\n Server: HttoForall\nDate: %s \nContent-Type: text/plain\nContent-Length: 0\n\n",21 "HTTP/1.1 200 OK\nContent-Type: text/plain\nContent-Length: %zu\n\n", 22 "HTTP/1.1 400 Bad Request\nContent-Type: text/plain\nContent-Length: 0\n\n", 23 "HTTP/1.1 404 Not Found\nContent-Type: text/plain\nContent-Length: 0\n\n", 24 "HTTP/1.1 413 Payload Too Large\nContent-Type: text/plain\nContent-Length: 0\n\n", 25 "HTTP/1.1 414 URI Too Long\nContent-Type: text/plain\nContent-Length: 0\n\n", 28 26 }; 29 27 … … 47 45 while(len > 0) { 48 46 // Call write 49 int ret = cfa_write(fd, it, len, 0, -1`s, 0p, 0p); 50 // int ret = write(fd, it, len); 47 int ret = write(fd, it, len); 51 48 if( ret < 0 ) { if( errno != EAGAIN && errno != EWOULDBLOCK) abort( "'answer error' error: (%d) %s\n", (int)errno, strerror(errno) ); } 52 49 … … 66 63 int answer_header( int fd, size_t size ) { 67 64 const char * fmt = http_msgs[OK200]; 68 int len = 200;65 int len = 100; 69 66 char buffer[len]; 70 len = snprintf(buffer, len, fmt, date,size);67 len = snprintf(buffer, len, fmt, size); 71 68 return answer( fd, buffer, len ); 72 69 } 73 70 74 int answer_plain( int fd, char buffer[], size_t size ) { 75 int ret = answer_header(fd, size); 76 if( ret < 0 ) return ret; 77 return answer(fd, buffer, size); 78 } 79 80 int answer_empty( int fd ) { 81 return answer_header(fd, 0); 82 } 83 84 85 [HttpCode code, bool closed, * const char file, size_t len] http_read(int fd, []char buffer, size_t len, io_cancellation * cancel) { 71 [HttpCode code, bool closed, * const char file, size_t len] http_read(int fd, []char buffer, size_t len) { 86 72 char * it = buffer; 87 73 size_t count = len - 1; … … 89 75 READ: 90 76 for() { 91 int ret = cfa_read(fd, (void*)it, count, 0, -1`s, cancel, 0p); 92 // int ret = read(fd, (void*)it, count); 77 int ret = cfa_read(fd, (void*)it, count, 0, -1`s, 0p, 0p); 93 78 if(ret == 0 ) return [OK200, true, 0, 0]; 94 79 if(ret < 0 ) { 95 80 if( errno == EAGAIN || errno == EWOULDBLOCK) continue READ; 96 // if( errno == EINVAL ) return [E400, true, 0, 0];97 81 abort( "read error: (%d) %s\n", (int)errno, strerror(errno) ); 98 82 } … … 108 92 } 109 93 110 if( options.log )printf("%.*s\n", rlen, buffer);94 printf("%.*s\n", rlen, buffer); 111 95 112 96 it = buffer; … … 120 104 121 105 void sendfile( int pipe[2], int fd, int ans_fd, size_t count ) { 122 unsigned sflags = SPLICE_F_MOVE; // | SPLICE_F_MORE;123 106 off_t offset = 0; 124 107 ssize_t ret; 125 108 SPLICE1: while(count > 0) { 126 ret = cfa_splice(ans_fd, &offset, pipe[1], 0p, count, sflags, 0, -1`s, 0p, 0p); 127 // ret = splice(ans_fd, &offset, pipe[1], 0p, count, sflags); 109 ret = cfa_splice(ans_fd, &offset, pipe[1], 0p, count, SPLICE_F_MOVE | SPLICE_F_MORE, 0, -1`s, 0p, 0p); 128 110 if( ret < 0 ) { 129 111 if( errno != EAGAIN && errno != EWOULDBLOCK) continue SPLICE1; … … 135 117 size_t in_pipe = ret; 136 118 SPLICE2: while(in_pipe > 0) { 137 ret = cfa_splice(pipe[0], 0p, fd, 0p, in_pipe, sflags, 0, -1`s, 0p, 0p); 138 // ret = splice(pipe[0], 0p, fd, 0p, in_pipe, sflags); 119 ret = cfa_splice(pipe[0], 0p, fd, 0p, in_pipe, SPLICE_F_MOVE | SPLICE_F_MORE, 0, -1`s, 0p, 0p); 139 120 if( ret < 0 ) { 140 121 if( errno != EAGAIN && errno != EWOULDBLOCK) continue SPLICE2; … … 146 127 } 147 128 } 148 149 //=============================================================================================150 151 #include <clock.hfa>152 #include <time.hfa>153 #include <thread.hfa>154 155 struct date_buffer {156 char buff[100];157 };158 159 thread DateFormater {160 int idx;161 date_buffer buffers[2];162 };163 164 void ?{}( DateFormater & this ) {165 ((thread&)this){ "Server Date Thread", *options.clopts.instance };166 this.idx = 0;167 memset( this.buffers[0].buff, 0, sizeof(this.buffers[0]) );168 memset( this.buffers[1].buff, 0, sizeof(this.buffers[1]) );169 }170 171 void main(DateFormater & this) {172 LOOP: for() {173 waitfor( ^?{} : this) {174 break LOOP;175 }176 or else {}177 178 Time now = getTimeNsec();179 180 strftime( this.buffers[this.idx].buff, 100, "%a, %d %b %Y %H:%M:%S %Z", now );181 182 char * next = this.buffers[this.idx].buff;183 __atomic_exchange_n((char * volatile *)&date, next, __ATOMIC_SEQ_CST);184 this.idx = (this.idx + 1) % 2;185 186 sleep(1`s);187 }188 }189 190 //=============================================================================================191 DateFormater * the_date_formatter;192 193 void init_protocol(void) {194 the_date_formatter = alloc();195 (*the_date_formatter){};196 }197 198 void deinit_protocol(void) {199 ^(*the_date_formatter){};200 free( the_date_formatter );201 } -
benchmark/io/http/protocol.hfa
r342af53 r4468a70 1 1 #pragma once 2 3 struct io_cancellation;4 2 5 3 enum HttpCode { … … 16 14 int answer_error( int fd, HttpCode code ); 17 15 int answer_header( int fd, size_t size ); 18 int answer_plain( int fd, char buffer [], size_t size );19 int answer_empty( int fd );20 16 21 [HttpCode code, bool closed, * const char file, size_t len] http_read(int fd, []char buffer, size_t len , io_cancellation *);17 [HttpCode code, bool closed, * const char file, size_t len] http_read(int fd, []char buffer, size_t len); 22 18 23 19 void sendfile( int pipe[2], int fd, int ans_fd, size_t count ); -
benchmark/io/http/worker.cfa
r342af53 r4468a70 19 19 this.pipe[0] = -1; 20 20 this.pipe[1] = -1; 21 this.done = false;22 }23 24 extern "C" {25 extern int accept4(int sockfd, struct sockaddr *addr, socklen_t *addrlen, int flags);26 21 } 27 22 … … 33 28 CONNECTION: 34 29 for() { 35 if( options.log ) printf("=== Accepting connection ===\n"); 36 int fd = cfa_accept4( this.[sockfd, addr, addrlen, flags], 0, -1`s, &this.cancel, 0p ); 37 // int fd = accept4( this.[sockfd, addr, addrlen, flags] ); 30 int fd = cfa_accept4( this.[sockfd, addr, addrlen, flags], 0, -1`s, 0p, 0p ); 38 31 if(fd < 0) { 39 32 if( errno == ECONNABORTED ) break; 40 if( errno == EINVAL && this.done ) break;41 33 abort( "accept error: (%d) %s\n", (int)errno, strerror(errno) ); 42 34 } 43 35 44 if( options.log ) printf("=== New connection %d, waiting for requests ===\n", fd);36 printf("New connection %d, waiting for requests\n", fd); 45 37 REQUEST: 46 38 for() { … … 53 45 size_t len = options.socket.buflen; 54 46 char buffer[len]; 55 if( options.log ) printf("=== Reading request ===\n");56 [code, closed, file, name_size] = http_read(fd, buffer, len , &this.cancel);47 printf("Reading request\n"); 48 [code, closed, file, name_size] = http_read(fd, buffer, len); 57 49 58 50 // if we are done, break out of the loop 59 51 if( closed ) { 60 if( options.log ) printf("=== Connection closed ===\n"); 61 close(fd); 52 printf("Connection closed\n"); 62 53 continue CONNECTION; 63 54 } … … 65 56 // If this wasn't a request retrun 400 66 57 if( code != OK200 ) { 67 printf(" === Invalid Request : %d ===\n", code_val(code));58 printf("Invalid Request : %d\n", code_val(code)); 68 59 answer_error(fd, code); 69 60 continue REQUEST; 70 61 } 71 62 72 if(0 == strncmp(file, "plaintext", min(name_size, sizeof("plaintext") ))) { 73 if( options.log ) printf("=== Request for /plaintext ===\n"); 74 75 char text[] = "Hello, World!\n"; 76 77 // Send the header 78 answer_plain(fd, text, sizeof(text)); 79 80 if( options.log ) printf("=== Answer sent ===\n"); 81 continue REQUEST; 82 } 83 84 if(0 == strncmp(file, "ping", min(name_size, sizeof("ping") ))) { 85 if( options.log ) printf("=== Request for /ping ===\n"); 86 87 // Send the header 88 answer_empty(fd); 89 90 if( options.log ) printf("=== Answer sent ===\n"); 91 continue REQUEST; 92 } 93 94 if( options.log ) printf("=== Request for file %.*s ===\n", (int)name_size, file); 63 printf("Request for file %.*s\n", (int)name_size, file); 95 64 96 65 // Get the fd from the file cache … … 101 70 // If we can't find the file, return 404 102 71 if( ans_fd < 0 ) { 103 printf(" === File Not Found ===\n");72 printf("File Not Found\n"); 104 73 answer_error(fd, E404); 105 74 continue REQUEST; … … 112 81 sendfile( this.pipe, fd, ans_fd, count); 113 82 114 if( options.log ) printf("=== Answer sent ===\n");83 printf("File sent\n"); 115 84 } 116 85 } -
benchmark/io/http/worker.hfa
r342af53 r4468a70 17 17 socklen_t * addrlen; 18 18 int flags; 19 io_cancellation cancel;20 volatile bool done;21 19 }; 22 20 void ?{}( Worker & this); -
benchmark/io/readv.cfa
r342af53 r4468a70 96 96 97 97 char **left; 98 parse_args( opt, opt_cnt, "[OPTIONS]...\ncforall readvbenchmark", left );98 parse_args( opt, opt_cnt, "[OPTIONS]...\ncforall yield benchmark", left ); 99 99 100 100 if(kpollcp || odirect) { 101 101 if( (buflen % 512) != 0 ) { 102 102 fprintf(stderr, "Buffer length must be a multiple of 512 when using O_DIRECT, was %lu\n\n", buflen); 103 print_args_usage(opt, opt_cnt, "[OPTIONS]...\ncforall readvbenchmark", true);103 print_args_usage(opt, opt_cnt, "[OPTIONS]...\ncforall yield benchmark", true); 104 104 } 105 105 } -
doc/bibliography/pl.bib
r342af53 r4468a70 990 990 } 991 991 992 @techreport{cfa-cc,993 keywords = {Cforall, cfa-cc, transpiler},994 contributer = {pabuhr@plg},995 title = {{\textsf{cfa-cc}} Developer's Reference Manual},996 author = {Fangren Yu},997 institution = {School of Computer Science},998 address = {University of Waterloo, Waterloo, Ontario, Canada},999 month = aug,1000 year = {2020},1001 note = {\href{https://cforall.uwaterloo.ca/doc/Fangren_Yu_Report_S20.pdf}{https://\-cforall.uwaterloo.ca/\-doc/\-Fangren\_Yu\_Report\_S20.pdf}},1002 }1003 1004 992 @article{Moss18, 1005 993 keywords = {type systems, polymorphism, tuples, Cforall}, … … 1052 1040 keywords = {type system, generic type, resolution algorithm, type environment, Cforall}, 1053 1041 author = {Aaron Moss}, 1054 title = {\textsf{C} \,$\mathbf{\forall}$ Type System Implementation},1042 title = {\textsf{C}$\mathbf{\forall}$ Type System Implementation}, 1055 1043 school = {School of Computer Science, University of Waterloo}, 1056 1044 year = 2019, … … 1173 1161 keywords = {ctrie, concurrent map}, 1174 1162 contributer = {a3moss@uwaterloo.ca}, 1175 title = {Cache-aware lock-free concurrent hash tries},1176 author = {Prokopec, Aleksandar and Bagwell, Phil and Odersky, Martin},1177 institution = {EPFL},1178 year = {2011}1163 title ={Cache-aware lock-free concurrent hash tries}, 1164 author ={Prokopec, Aleksandar and Bagwell, Phil and Odersky, Martin}, 1165 institution ={EPFL}, 1166 year ={2011} 1179 1167 } 1180 1168 … … 3940 3928 school = {School of Computer Science, University of Waterloo}, 3941 3929 year = 2003, 3942 optaddress = {Waterloo, Ontario, Canada, N2L 3G1},3930 address = {Waterloo, Ontario, Canada, N2L 3G1}, 3943 3931 note = {\href{http://plg.uwaterloo.ca/theses/BilsonThesis.pdf}{http://\-plg.uwaterloo.ca/\-theses/\-BilsonThesis.pdf}}, 3944 3932 } … … 5222 5210 } 5223 5211 5224 @manual{gcc-nested-func,5225 keywords = {gcc nested functions},5226 contributer = {pabuhr@plg},5227 key = {gcc nested functions},5228 title = {Nested Functions},5229 organization= {{gcc} 9.3 Manual},5230 year = 2019,5231 note = {\href{https://gcc.gnu.org/onlinedocs/gcc-9.3.0/gcc/Nested-Functions.html}{https://\-gcc.gnu.org/\-onlinedocs/\-gcc-9.3.0/\-gcc/\-Nested-Functions.html}},5232 }5233 5234 5212 @article{Haddon77, 5235 5213 keywords = {monitors, nested monitor calls}, … … 7435 7413 } 7436 7414 7437 @manual{POSIX17,7438 keywords = {POSIX, Standard},7439 contributer = {pabuhr@plg},7440 key = {POSIX},7441 title = {1003.1 Standard for Information Technology -- Portable Operating System Interface (POSIX), Base Specifications, Issue 7},7442 organization= {IEEE and The Open Group},7443 year = 2017,7444 note = {\href{https://pubs.opengroup.org/onlinepubs/9699919799}{https://\-pubs.opengroup.org/\-onlinepubs/\-9699919799}},7445 }7446 7447 7415 @inproceedings{ML:NJ, 7448 7416 keywords = {continuations, ML}, -
doc/theses/andrew_beach_MMath/existing.tex
r342af53 r4468a70 83 83 the the call site. 84 84 85 As an example, even if no function named \codeCFA{do _once} is not defined86 near the definition of \codeCFA{do _twice} the following code will work.85 As an example, even if no function named \codeCFA{do\_once} is not defined 86 near the definition of \codeCFA{do\_twice} the following code will work. 87 87 \begin{lstlisting} 88 88 int quadruple(int x) { … … 95 95 \end{lstlisting} 96 96 This is not the recommended way to implement a quadruple function but it 97 does work. The complier will deduce that \codeCFA{do _twice}'s T is an97 does work. The complier will deduce that \codeCFA{do\_twice}'s T is an 98 98 integer from the argument. It will then look for a definition matching the 99 assertion which is the \codeCFA{do _once} defined within the function. That100 function will be passed in as a function pointer to \codeCFA{do _twice} and99 assertion which is the \codeCFA{do\_once} defined within the function. That 100 function will be passed in as a function pointer to \codeCFA{do\_twice} and 101 101 called within it. 102 102 … … 156 156 In \CFA coroutines are created using the \codeCFA{coroutine} keyword which 157 157 works just like \codeCFA{struct} except that the created structure will be 158 modified by the compiler to satify the \codeCFA{is _coroutine} trait.158 modified by the compiler to satify the \codeCFA{is\_coroutine} trait. 159 159 160 160 These structures act as the interface between callers and the coroutine, -
doc/theses/fangren_yu_COOP_S20/Report.tex
r342af53 r4468a70 56 56 57 57 \title{\Huge 58 \lstinline|cfa-cc|Developer's Reference58 cfa-cc Developer's Reference 59 59 }% title 60 60 … … 728 728 The \CFA compiler sets a limit on assertion depth and reports an error if assertion resolution does not terminate within the limit (as for \lstinline[language=C++]@templates@ in \CC). 729 729 730 \addcontentsline{toc}{section}{\refname}731 730 \bibliographystyle{plain} 732 731 \bibliography{pl} -
doc/theses/thierry_delisle_PhD/thesis/Makefile
r342af53 r4468a70 8 8 BibTeX = BIBINPUTS=${TeXLIB} && export BIBINPUTS && bibtex 9 9 10 MAKEFLAGS = --no-print-directory --silent 10 MAKEFLAGS = --no-print-directory --silent # 11 11 VPATH = ${Build} ${Figures} 12 12 … … 66 66 67 67 build/%.dvi : %.tex Makefile | ${Build} 68 # Conditionally create an empty *.ind (index) file for inclusion until makeindex is run.69 if [ ! -r ${basename $@}.ind ] ; then touch ${basename $@}.ind ; fi70 68 # Must have *.aux file containing citations for bibtex 71 69 if [ ! -r ${basename $@}.aux ] ; then ${LaTeX} $< ; fi … … 76 74 # Make index from *.aux entries and input index at end of document 77 75 -makeglossaries -q -s ${basename $@}.ist ${basename $@} 78 # Make index from *.aux entries and input index at end of document79 -makeindex ${basename $@}.idx80 76 # Run again to finish citations 81 77 ${LaTeX} $< -
doc/theses/thierry_delisle_PhD/thesis/text/core.tex
r342af53 r4468a70 1 1 \chapter{Scheduling Core}\label{core} 2 2 3 Before discussing scheduling in general, where it is important to address systems that are changing states, this document discusses scheduling in a somewhat ideal scen ario, where the system has reached a steady state. For this purpose, a steady state is loosely defined as a state where there are always \glspl{thrd} ready to run and the system has the resources necessary to accomplish the work, \eg, enough workers. In short, the system is neither overloaded nor underloaded.3 Before discussing scheduling in general, where it is important to address systems that are changing states, this document discusses scheduling in a somewhat ideal scenerio, where the system has reached a steady state. For this purpose, a steady state is loosely defined as a state where there are always \glspl{thrd} ready to run and the system has the ressources necessary to accomplish the work, \eg, enough workers. In short, the system is neither overloaded nor underloaded. 4 4 5 I believe it is important to discuss the steady state first because it is the easiest case to handle and, relatedly, the case in which the best performance is to be expected. As such, when the system is either overloaded or underloaded, a common approach is to try to adapt the system to th is new load and return to the steady state, \eg, by adding or removing workers. Therefore, flaws in scheduling the steady state canto be pervasive in all states.5 I believe it is important to discuss the steady state first because it is the easiest case to handle and, relatedly, the case in which the best performance is to be expected. As such, when the system is either overloaded or underloaded, a common approach is to try to adapt the system to the new load and return to the steady state, \eg, adding or removing workers. Flaws in the scheduling when the system is in the steady state can therefore to be pervasive in all states. 6 6 7 7 \section{Design Goals} 8 As with most of the design decisions behind \CFA, an important goal is to match the expectation of the programmer according to their execution mental-model. To match expectations, the design must offer the programmer sufficient guarantees so that, as long as they respect the execution mental-model, the system also respectsthis model.8 As with most of the design decisions behind \CFA, an important goal is to match the expectation of the programmer, according to their probable mental model. To match these expectations, the design must offer the programmers sufficient guarantees so that, as long as they respect the mental model, the system will also respect this model. 9 9 10 For threading, a simple and common execution mental-model is the ``Ideal multi-tasking CPU'' :10 For threading, a simple and common mental model is the ``Ideal multi-tasking CPU'' : 11 11 12 12 \begin{displayquote}[Linux CFS\cit{https://www.kernel.org/doc/Documentation/scheduler/sched-design-CFS.txt}] 13 13 {[The]} ``Ideal multi-tasking CPU'' is a (non-existent :-)) CPU that has 100\% physical power and which can run each task at precise equal speed, in parallel, each at [an equal fraction of the] speed. For example: if there are 2 tasks running, then it runs each at 50\% physical power --- i.e., actually in parallel. 14 \label{q:LinuxCFS}15 14 \end{displayquote} 16 15 17 16 Applied to threads, this model states that every ready \gls{thrd} immediately runs in parallel with all other ready \glspl{thrd}. While a strict implementation of this model is not feasible, programmers still have expectations about scheduling that come from this model. 18 17 19 In general, the expectation at the center of this model is that ready \glspl{thrd} do not interfere with each other but simply share the hardware. This assumption makes it easier to reason about threading because ready \glspl{thrd} can be thought ofin isolation and the effect of the scheduler can be virtually ignored. This expectation of \gls{thrd} independence means the scheduler is expected to offer two guarantees:18 In general, the expectation at the center of this model is that ready \glspl{thrd} do not interfere with eachother but simply share the hardware. This makes it easier to reason about threading because ready \glspl{thrd} can be taken in isolation and the effect of the scheduler can be virtually ignored. This expectation of \gls{thrd} independence means the scheduler is expected to offer two guarantees: 20 19 \begin{enumerate} 21 \item A fairness guarantee: a \gls{thrd} that is ready to run is not preventedby another thread.22 \item A performance guarantee: a \gls{thrd} that wants to start or stop running is not preventedby other threads wanting to do the same.20 \item A fairness guarantee: a \gls{thrd} that is ready to run will not be prevented to do so by another thread. 21 \item A performance guarantee: a \gls{thrd} that wants to start or stop running will not be slowed down by other threads wanting to do the same. 23 22 \end{enumerate} 24 23 25 It is important to note that these guarantees are expected only up to a point. \Glspl{thrd} that are ready to run should not be prevented to do so, but they still share the limited hardware resources. Therefore, the guarantee is considered respected if a \gls{thrd} gets access to a \emph{fair share} of the hardware resources, even if that share is very small.24 It is important to note that these guarantees are expected only up to a point. \Glspl{thrd} that are ready to run should not be prevented to do so, but they still need to share a limited amount of hardware. Therefore, the guarantee is considered respected if a \gls{thrd} gets access to a \emph{fair share} of the hardware, even if that share is very small. 26 25 27 Similarly the performance guarantee, the lack of interfer ence among threads, is only relevant up to a point. Ideally, the cost of running and blocking should be constant regardless of contention, but the guarantee is considered satisfied if the cost is not \emph{too high} with or without contention. How much is an acceptable cost is obviously highly variable. For this document, the performance experimentation attempts to show the cost of scheduling is at worst equivalent to existing algorithms used in popular languages. This demonstration can be made by comparing applications built in \CFA to applications built with other languages or other models. Recall programmer expectation is that the impact of the scheduler can be ignored. Therefore, if the cost of scheduling is equivalent to or lower than other popular languages, Iconsider the guarantee achieved.26 Similarly the performance guarantee, the lack of interferance between threads, is only relevant up to a point. Ideally the cost of running and blocking would be constant regardless of contention, but the guarantee is considered satisfied if the cost is not \emph{too high} with or without contention. How much is an acceptable cost is obviously highly variable. For this document the performance experimentation will attempt to show that the cost of scheduling is at worst equivalent to existing algorithms used in popular languages. This demonstration can be made by comparing application built in \CFA to applications built with other languages or other models. Recall from a few paragraphs ago that the expectation of programmers is that the impact of the scheduler can be ignored. Therefore, if the cost of scheduling is equivalent or lower to other popular languages, I will consider the guarantee achieved. 28 27 29 28 More precisely the scheduler should be: 30 29 \begin{itemize} 31 30 \item As fast as other schedulers that are less fair. 32 \item Faster than other scheduler sthat have equal or better fairness.31 \item Faster than other scheduler that have equal or better fairness. 33 32 \end{itemize} 34 33 35 34 \subsection{Fairness vs Scheduler Locality} 36 An important performance factor in modern architectures is cache locality. Waiting for data at lower levels or not present in the cache can have a major impact on performance. Having multiple \glspl{hthrd} writing to the same cache lines also leads to cache lines that must be waited on. It is therefore preferable to divide data among each \gls{hthrd}\footnote{This partitioningcan be an explicit division up front or using data structures where different \glspl{hthrd} are naturally routed to different cache lines.}.35 An important performance factor in modern architectures is cache locality. Waiting for data not present in the cache can have a major impact on performance, and having multiple \glspl{hthrd} writing to the same cache lines can lead to cache lines that need to be waited on again. It is therefore preferable to divide the data among each \gls{hthrd}\footnote{This can be an explicit division up front or using data structures where different \glspl{hthrd} are naturally routed to different cache lines.}. 37 36 38 For a scheduler, having good locality\footnote{This section discusses \emph{internal locality}, \ie, the locality of the data used by the scheduler versus \emph{external locality}, \ie, how the data used by the application is affected by scheduling. External locality is a much more complicated subject and is discussed in part~\ref{Evaluation} on evaluation.}, \ie, having the data local to each \gls{hthrd}, generally conflicts with fairness. Indeed, good locality often requires avoiding the movement of cache lines, while fairness requires dynamically moving a \gls{thrd}, and as consequence cache lines, to a \gls{hthrd} that is currently available.37 For a scheduler, having good locality\footnote{This section discusses \emph{internal} locality, \ie, the locality of the data used by the scheduler. \emph{External locality}, \ie, how the data used by the application is affected by scheduling, is a much more complicated subject and will be discussed in the chapters on evaluation.}, \ie, having the data be local to each \gls{hthrd}, generally conflicts with fairness. Indeed, good locality often requires avoiding the movement of cache lines, while fairness requires dynamically moving \gls{thrd}, and as a consequence cache lines, to \glspl{hthrd} that are currently more appropriate. 39 38 40 However, I claim that in practice it is possible to strike a balance between fairness and performance because the se goals do not necessarily overlap temporally, where Figure~\ref{fig:fair} shows a visual representation of this behaviour. As mentioned, some unfairness is acceptable; therefore it is desirable to have an algorithm that prioritizes cache locality as long as thread delay does not exceed the execution mental-model.39 However, I claim that in practice it is possible to strike a balance between fairness and performance because the need for these do not necessarily overlap temporaly. Figure~\ref{fig:fair} shows an visual representation of this behaviour. As mentionned, a little bit of unfairness can be acceptable, therefore it can be desirable to have an algorithm that prioritizes cache locality as long as no threads is left behind for too long. 41 40 42 41 \begin{figure} 43 \ centering44 \input{fairness.pstex_t}45 \ vspace*{-10pt}46 \caption [Fairness vs Locality graph]{Rule of thumb Fairness vs Locality graph \smallskip\newline The importance of Fairness and Locality while a ready \gls{thrd} awaits running is shown as the time the ready \gls{thrd} waits increases, Ready Time, the chances that its data is still in cache, Locality, decreases. At the same time, the need for fairness increases since other \glspl{thrd} may have the chance to run many times, breaking the fairness model. Since the actual values and curves of this graph can be highly variable, the graph is an idealized representation of the two opposing goals.}42 \begin{center} 43 \input{fairness.pstex_t} 44 \end{center} 45 \caption{Fairness vs Locality} 47 46 \label{fig:fair} 47 Rule of thumb graph: Importance of Fairness and Locality while a ready \gls{thrd} waits run. 48 As the time a ready \gls{thrd} waits increases, ``Ready Time'', the chances that its data is still in cache decreases. At the same time, the need for fairness increases since other \glspl{thrd} may have the chance to run many times, breaking the fairness model mentionned above. Since the actual values and curves of this graph can be highly variable, the graph is left intentionally fuzzy and innacurate. 48 49 \end{figure} 49 50 50 51 \section{Design} 51 In general, a na\"{i}ve \glsxtrshort{fifo} ready-queue does not scale with increased parallelism from \glspl{hthrd}, resulting in decreased performance. The problem is adding/removing \glspl{thrd} is a single point of contention. As shown in the evaluation sections, most production schedulers do scale when adding \glspl{hthrd}. The common solution to the single point of contention is to shard the ready-queue so each \gls{hthrd} can access the ready-queue without contention, increasing performance though lack of contention.52 A naive strictly \glsxtrshort{fifo} ready-queue does not offer sufficient performance. As shown in the evaluation sections, most production schedulers scale when adding multiple \glspl{hthrd} and that is not possible with a single point of contention. Therefore it is vital to shard the ready-queue so that multiple \glspl{hthrd} can access the ready-queue without performance degradation. 52 53 53 54 \subsection{Sharding} \label{sec:sharding} 54 An interesting approach to sharding a queue is presented in \cit{Trevors paper}. This algorithm presents a queue with a relaxed \glsxtrshort{fifo} guarantee using an array of strictly \glsxtrshort{fifo} sublists as shown in Figure~\ref{fig:base}. Each \emph{cell} of the array has a timestamp for the last operation and a pointer to a linked-list with a lock and each node in the list is marked with a timestamp indicating when it is added to the list. A push operation is done by picking a random cell, acquiring the list lock, and pushing to the list. If the cell is locked, the operation is simply retried on another random cell until a lock is acquired. A pop operation is done in a similar fashion except two random cells are picked. If both cells are unlocked with non-empty lists, the operation pops the node with the oldest cell timestamp. If one of the cells is unlocked and non-empty, the operation pops from that cell. If both cells are either locked or empty, the operation picks two new randomcells and tries again.55 An interesting approach to sharding a queue is presented in \cit{Trevors paper}. This algorithm represents a queue with relaxed \glsxtrshort{fifo} guarantee using an array of strictly \glsxtrshort{fifo} sublists as shown in Figure~\ref{fig:base}. Each cell of the array contains a linked-list with a lock and each node in these list is marked with a timestamp indicating when they were added to the list. Push operations are done by picking a random cell and attempting to push to its list. If the cell is already locked, the operation is simply retried on a new cell until a lock is acquired. Pop operations are done in a similar fashion except two random cells are picked. If both cells are not already locked and both cells contain non-empty lists, the operation pops the node with the oldest timestamp. If only one of the cell is unlocked and non-empty, the operation pops from that cell. If both cells are either locked or empty, the operation picks two new cells and tries again. 55 56 56 57 \begin{figure} 57 \centering 58 \input{base.pstex_t} 59 \caption[Relaxed FIFO list]{Relaxed FIFO list \smallskip\newline List at the base of the scheduler: an array of strictly FIFO lists. The timestamp is in all nodes and cell arrays.} 58 \begin{center} 59 \input{base.pstex_t} 60 \end{center} 61 \caption{Relaxed FIFO list} 60 62 \label{fig:base} 63 List at the base of the scheduler: an array of strictly FIFO lists. 64 The timestamp is in all nodes and cell arrays. 61 65 \end{figure} 62 66 63 67 \subsection{Finding threads} 64 Once threads have been distributed onto multiple queues, identifying empty queues becomes a problem. Indeed, if the number of \glspl{thrd} does not far exceed the number of queues, it is probable that several of the cell queues are empty. Figure~\ref{fig:empty} shows an example with 2 \glspl{thrd} running on 8 queues, where the chances of getting an empty queue is 75\% per pick, meaning two random picks yield a \gls{thrd} only half the time. This scenario leads to performance problems since picks that do not yield a \gls{thrd} are not useful and do not necessarily help make more informed guesses. 68 Once threads have been distributed onto multiple queues, indentifying which queues are empty and which are not can become a problem. Indeed, if the number of \glspl{thrd} does not far exceed the number of queues, it is probable that several of these queues are empty. Figure~\ref{fig:empty} shows an example with 2 \glspl{thrd} running on 8 queues, where the chances of getting an empty queue is 75\% per pick, meaning two random picks yield a \gls{thrd} only half the time. 69 65 70 66 71 \begin{figure} 67 \centering 68 \input{empty.pstex_t} 69 \caption[``More empty'' Relaxed FIFO list]{``More empty'' Relaxed FIFO list \smallskip\newline Emptier state of the queue: the array contains many empty cells, that is strictly FIFO lists containing no elements.} 72 \begin{center} 73 \input{empty.pstex_t} 74 \end{center} 75 \caption{``More empty'' Relaxed FIFO list} 70 76 \label{fig:empty} 77 Emptier state of the queue: the array contains many empty cells, that is strictly FIFO lists containing no elements. 71 78 \end{figure} 72 79 73 Th ere are several solutions to this problem, but they ultimately all have to encode if a cell has an empty list. My results show the density and locality of this encoding is generally the dominating factor in these scheme. Classic solutions to this problem use one of three techniques to encode the information:80 This can lead to performance problems since picks that do not yield a \gls{thrd} are not useful and do not necessarily help make more informed guesses. 74 81 75 \paragraph{Dense Information} Figure~\ref{fig:emptybit} shows a dense bitmask to identify the cell queues currently in use. This approach means processors can often find \glspl{thrd} in constant time, regardless of how many underlying queues are empty. Furthermore, modern x86 CPUs have extended bit manipulation instructions (BMI2) that allow searching the bitmask with very little overhead compared to the randomized selection approach for a filled ready queue, offering good performance even in cases with many empty inner queues. However, this technique has its limits: with a single word\footnote{Word refers here to however many bits can be written atomically.} bitmask, the total amount of ready-queue sharding is limited to the number of bits in the word. With a multi-word bitmask, this maximum limit can be increased arbitrarily, but the look-up time increases. Finally, a dense bitmap, either single or multi-word, causes additional contention problems that reduces performance because of cache misses after updates. This central update bottleneck also means the information in the bitmask is more often stale before a processor can use it to find an item, \ie mask read says there are available \glspl{thrd} but none on queue when the subsequent atomic check is done. 82 Solutions to this problem can take many forms, but they ultimately all have to encode where the threads are in some form. My results show that the density and locality of this encoding is generally the dominating factor in these scheme. Classic solutions to this problem use one of three techniques to encode the information: 76 83 77 84 \begin{figure} 78 \centering 85 \begin{center} 86 {\resizebox{0.73\textwidth}{!}{\input{emptybit.pstex_t}}} 87 \end{center} 79 88 \vspace*{-5pt} 80 {\resizebox{0.75\textwidth}{!}{\input{emptybit.pstex_t}}} 89 \caption{Underloaded queue with added bitmask to indicate which array cells have items.} 90 \label{fig:emptybit} 91 \begin{center} 92 {\resizebox{0.73\textwidth}{!}{\input{emptytree.pstex_t}}} 93 \end{center} 81 94 \vspace*{-5pt} 82 \caption [Underloaded queue with bitmask]{Underloaded queue with bitmask indicating array cells withitems.}83 \label{fig:empty bit}84 85 \vspace*{10pt}86 {\resizebox{0.75\textwidth}{!}{\input{emptytree.pstex_t}}}95 \caption{Underloaded queue with added binary search tree indicate which array cells have items.} 96 \label{fig:emptytree} 97 \begin{center} 98 {\resizebox{0.9\textwidth}{!}{\input{emptytls.pstex_t}}} 99 \end{center} 87 100 \vspace*{-5pt} 88 \caption[Underloaded queue with binary search-tree]{Underloaded queue with binary search-tree indicating array cells with items.} 89 \label{fig:emptytree} 90 91 \vspace*{10pt} 92 {\resizebox{0.95\textwidth}{!}{\input{emptytls.pstex_t}}} 93 \vspace*{-5pt} 94 \caption[Underloaded queue with per processor bitmask]{Underloaded queue with per processor bitmask indicating array cells with items.} 101 \caption{Underloaded queue with added per processor bitmask to indicate which array cells have items.} 95 102 \label{fig:emptytls} 96 103 \end{figure} 97 104 98 \paragraph{ Sparse Information} Figure~\ref{fig:emptytree} shows an approach using a hierarchical tree data-structure to reduce contention and has been shown to work in similar cases~\cite{ellen2007snzi}. However, this approach may lead to poorer performance due to the inherent pointer chasing cost while still allowing significant contention on the nodes of the tree if the tree is shallow.105 \paragraph{Dense Information} Figure~\ref{fig:emptybit} shows a dense bitmask to identify which inner queues are currently in use. This approach means processors can often find \glspl{thrd} in constant time, regardless of how many underlying queues are empty. Furthermore, modern x86 CPUs have extended bit manipulation instructions (BMI2) that allow using the bitmask with very little overhead compared to the randomized selection approach for a filled ready queue, offering good performance even in cases with many empty inner queues. However, this technique has its limits: with a single word\footnote{Word refers here to however many bits can be written atomically.} bitmask, the total number of underlying queues in the ready queue is limited to the number of bits in the word. With a multi-word bitmask, this maximum limit can be increased arbitrarily, but the look-up will nolonger be constant time. Finally, a dense bitmap, either single or multi-word, causes additional contention problems which reduces performance because of cache misses after updates. This central update bottleneck also means the information in the bitmask is more often stale before a processor can use it to find an item, \ie mask read says there are available \glspl{thrd} but none on queue. 99 106 100 \paragraph{ Local Information} Figure~\ref{fig:emptytls} shows an approach using dense information, similar to the bitmap, but each \gls{hthrd} keeps its own independent copy. While this approach can offer good scalability \emph{and} low latency, the liveliness and discovery of the information can become a problem. This case is made worst in systems with few processors where even blind random picks can find \glspl{thrd} in a few tries.107 \paragraph{Sparse Information} Figure~\ref{fig:emptytree} shows an approach using a hierarchical tree data-structure to reduce contention and has been shown to work in similar cases~\cite{ellen2007snzi}. However, this approach may lead to poorer performance due to the inherent pointer chasing cost while still allowing more contention on the nodes of the tree if the tree is not deep enough. 101 108 102 I built a prototype of these approaches and none of these techniques offer satisfying performance when few threads are present. All of these approach hit the same 2 problems. First, randomly picking sub-queues is very fast but means any improvement to the hit rate can easily be countered by a slow-down in look-up speed when there are empty lists. Second, the array is already as sharded to avoid contention bottlenecks, so any denser data structure tends to become a bottleneck. In all cases, these factors meant the best cases scenario, \ie many threads, would get worst throughput, and the worst-case scenario, few threads, would get a better hit rate, but an equivalent poor throughput. As a result I tried an entirely different approach. 109 \paragraph{Local Information} Figure~\ref{fig:emptytls} shows an approach using dense information, similar to the bitmap, but have each thread keep its own independent copy of it. While this approach can offer good scalability \emph{and} low latency, the liveliness and discovery of the information can become a problem. This case is made worst in systems with few processors where even blind random picks can find \glspl{thrd} in few tries. 110 111 I built a prototype of these approach and none of these techniques offer satisfying performance when few threads are present. All of these approach hit the same 2 problems. First, blindly picking two sub-queues is very fast which means that any improvement to the hit rate can easily be countered by a slow-down in look-up speed. Second, the array is already as sharded as is needed to avoid contention bottlenecks, so any denser data structure will tend to become a bottleneck. In all cases, these factors meant that the best cases scenerio, many threads, would get worst throughput and the worst case scenario, few threads, would get a better hit rate, but an equivalent throughput. As a result I tried an entirely different approach. 103 112 104 113 \subsection{Dynamic Entropy}\cit{https://xkcd.com/2318/} 105 In the worst -case scenario there are only few \glspl{thrd} ready to run, or more precisely given $P$ \glspl{proc}\footnote{For simplicity, this assumes there is a one-to-one match between \glspl{proc} and \glspl{hthrd}.}, $T$ \glspl{thrd} and $\epsilon$ a very small number, than the worst case scenario can be represented by $\epsilon \ll P$, than $T = P + \epsilon$. It is important to note in this case that fairness is effectively irrelevant. Indeed, this case is close to \emph{actually matching} the model of the ``Ideal multi-tasking CPU'' on page \pageref{q:LinuxCFS}. In this context, it is possible to use a purely internal-locality based approach and still meet the fairness requirements. This approach simply has each \gls{proc} running a single \gls{thrd} repeatedly. Or from the shared ready-queue viewpoint, each \gls{proc} pushes to a given sub-queue and then popes from the \emph{same} subqueue. In cases where $T \gg P$, the scheduler should also achieves similar performance without affecting the fairness guarantees.114 In the worst case scenario there are few \glspl{thrd} ready to run, or more accuratly given $P$ \glspl{proc}, $T$ \glspl{thrd} and $\epsilon$, as usual, a very small number, in this case $\epsilon \ll P$, we have $T = P + \epsilon$. An important thing to note is that in this case, fairness is effectively irrelevant. Indeed, this case is close to \emph{actually matching} the model of the ``Ideal multi-tasking CPU'' presented in this chapter\footnote{For simplicity, this assumes there is a one-to-one match between \glspl{proc} and \glspl{hthrd}.}. Therefore, in this context it is possible to use a purely internal locality based approach and still meet the fairness requirements. This approach would simply have each \gls{proc} running a single \gls{thrd} repeatedly. Or from the shared ready-queue viewpoint, each \gls{proc} would push to a given sub-queue and then pop from the \emph{same} subqueue. Ideally, the scheduler would achieve this without affecting the fairness guarantees in cases where $T \gg P$. 106 115 107 To handle this case, I use a pseudo random-number generator, \glsxtrshort{prng} in a novel way. When the scheduler uses a \glsxtrshort{prng} instance per \gls{proc} exclusively, the random-number seed effectively starts an encoding that produces a list of all accessed subqueues, from latest to oldest. The novel approach is to be able to ``replay'' the \glsxtrshort{prng} backwards and there exist \glsxtrshort{prng}s that are fast, compact \emph{and} can be run forward and backwards. Linear congruential generators~\cite{wiki:lcg} are an example of \glsxtrshort{prng}s that match these requirements.116 To achieve this, I use a communication channel I have not mentionned yet and which I believe I use in a novel way : the \glsxtrshort{prng}. If the scheduler has a \glsxtrshort{prng} instance per \gls{proc} exclusively used for scheduling, its seed effectively encodes a list of all the accessed subqueues, from the latest to the oldest. The only requirement to achieve this is to be able to ``replay'' the \glsxtrshort{prng} backwards. As it turns out, this is an entirely reasonnable requirement and there already exist \glsxtrshort{prng}s that are fast, compact \emph{and} can be run forward and backwards. Linear congruential generators\cite{wiki:lcg} are an example of \glsxtrshort{prng}s that match these requirements. 108 117 109 The algorithm works as follows :118 The algorithm works as follows : 110 119 \begin{itemize} 111 120 \item Each \gls{proc} has two \glsxtrshort{prng} instances, $F$ and $B$. 112 \item Push and Pop operations occur as discussed in Section~\ref{sec:sharding} with the following exceptions:121 \item Push and Pop operations happen as mentionned in Section~\ref{sec:sharding} with the following exceptions: 113 122 \begin{itemize} 114 123 \item Push operations use $F$ going forward on each try and on success $F$ is copied into $B$. … … 117 126 \end{itemize} 118 127 119 The main benefit of this technique is that it basically re spects the desired properties of Figure~\ref{fig:fair}. When looking for work, a \gls{proc} first looks at the last cell they pushed to, if any, and then move backwards through its accessed cells. As the \gls{proc} continues looking for work, $F$ moves backwards and $B$ stays in place. As a result,the relation between the two becomes weaker, which means that the probablisitic fairness of the algorithm reverts to normal. Chapter~\ref{proofs} discusses more formally the fairness guarantees of this algorithm.128 The main benefit of this technique is that it basically repects the desired properties of Figure~\ref{fig:fair}. When looking for work, \glspl{proc} will look first at the last cells they pushed to, if any, and then move backwards through the cells. As the \glspl{proc} continue looking for work, $F$ moves back and $B$ stays in place. As a result the relation between the two becomes weaker, which means that the probablisitic fairness of the algorithm reverts to normal. Chapter~\ref{proofs} discusses more formally the fairness guarantees of this algorithm. 120 129 121 130 \section{Details} -
doc/theses/thierry_delisle_PhD/thesis/text/io.tex
r342af53 r4468a70 5 5 Since \glsxtrshort{io} operations are generally handled by the 6 6 7 \subsection{\ lstinline|epoll|, \lstinline|poll| and \lstinline|select|}7 \subsection{\texttt{epoll}, \texttt{poll} and \texttt{select}} 8 8 9 9 \subsection{Linux's AIO} … … 33 33 34 34 \subsection{\texttt{io\_uring}} 35 A very recent addition to Linux, @io_uring@\cit{io\_uring} is a framework that aims to solve many of the problems listed with the above mentioned solutions.35 A very recent addition to Linux, \texttt{io\_uring}\cit{io\_uring} is a framework that aims to solve many of the problems listed with the above mentioned solutions. 36 36 37 37 \subsection{Extra Kernel Threads}\label{io:morethreads} -
doc/theses/thierry_delisle_PhD/thesis/text/runtime.tex
r342af53 r4468a70 1 1 \chapter{\CFA Runtime} 2 This chapter presents an overview of the capabilities of the \CFA runtime prior to this thesis work.2 This chapter offers an overview of the capabilities of the \CFA runtime prior to this work. 3 3 4 \Celeven introduced threading features, such the @_Thread_local@ storage class, and libraries @stdatomic.h@ and @threads.h@. Interestingly, almost a decade after the \Celeven standard, the most recent versions of gcc, clang, and msvc do not support the \Celeven include @threads.h@, indicating no interest in the C11 concurrency approach (possibly because of the recent effort to add concurrency to \CC). While the \Celeven standard does not state a threading model, the historical association with pthreads suggests implementations would adopt kernel-level threading (1:1)~\cite{ThreadModel}, as for \CC. This model uses \glspl{kthrd} to achieve parallelism and concurrency. In this model, every thread of computation maps to an object in the kernel. The kernel then has the responsibility of managing these threads, \eg creating, scheduling, blocking. This also entails that the kernel has a perfect view of every thread executing in the system\footnote{This is not completely true due to primitives like \lstinline|futex|es, which have a significant portion of their logic in user space.}.4 Threading in \CFA offers is based on \Gls{uthrding}, where \glspl{thrd} are the representation of a unit of work. As such, \CFA programmers should expect these units to be fairly inexpensive, that is: programmers should be able to create a large number of \glspl{thrd} and switch between \glspl{thrd} liberally without many concerns for performance. 5 5 6 6 \section{M:N Threading}\label{prev:model} 7 7 8 Threading in \CFA is based on \Gls{uthrding}, where \glspl{thrd} are the representation of a unit of work. As such, \CFA programmers should expect these units to be fairly inexpensive, \ie programmers should be able to create a large number of \glspl{thrd} and switch among \glspl{thrd} liberally without many concerns for performance.8 C traditionnally uses a 1:1 threading model. This model uses \glspl{kthrd} to achive parallelism and concurrency. In this model, every thread of computation maps to an object in the kernel. The kernel then has the responsibility of managing these threads, \eg creating, scheduling, blocking. This also entails that the kernel has a perfect view of every thread executing in the system\footnote{This is not completly true due to primitives like \texttt{futex}es, which have a significant portion of their logic in user space.}. 9 9 10 The \CFA M:N threading models is implemented using many user-level threads mapped onto fewer \glspl{kthrd}. The user-level threads have the same semantic meaning as a \glspl{kthrd} in the 1:1 model: they represent an independent thread of execution with its own stack. The difference is that user-level threads do not have a corresponding object in the kernel, they are handled by the runtime in user space and scheduled onto \glspl{kthrd}, referred to as \glspl{proc} in this document. \Glspl{proc} run a \gls{thrd} until it context switches out, it then chooses a different \gls{thrd} to run.10 By contrast \CFA uses an M:N threading models, where concurrency is achieved using many user-level threads mapped onto fewer \glspl{kthrd}. The user-level threads have the same semantic meaning as a \glspl{kthrd} in the 1:1 model, they represent an independant thread of execution with it's on stack. The difference is that user-level threads do not have a corresponding object in the kernel, they are handled by the runtime in user space and scheduled onto \glspl{kthrd}, referred to as \glspl{proc} in this document. \Glspl{proc} run a \gls{thrd} until it context switches out, it then choses a different \gls{thrd} to run. 11 11 12 12 \section{Clusters} 13 \CFA allows the option to group user-level threading, in the form of clusters. Both \glspl{thrd} and \glspl{proc} belong to a specific cluster. \Glspl{thrd} are only be scheduled onto \glspl{proc} in the same cluster and scheduling is done independently of other clusters. Figure~\ref{fig:system} shows an overview of the \CFA runtime, which allows programmers to tightly control parallelism. It also opens the door to handling effects like NUMA, by pining clusters to a specific NUMA node\footnote{This is not currently implemented in \CFA, but the only hurdle left is creating a generic interface for cpu masks.}.14 15 13 \begin{figure} 16 14 \begin{center} 17 15 \input{system.pstex_t} 18 16 \end{center} 19 \caption [Overview of the \CFA runtime]{Overview of the \CFA runtime \newline \Glspl{thrd} are scheduled inside a particular cluster, where it only runs on the \glspl{proc} which belong to the cluster. The discrete-event manager, which handles preemption and timeout, is a \gls{kthrd} which lives outside any cluster and does not run \glspl{thrd}.}17 \caption{Overview of the \CFA runtime} 20 18 \label{fig:system} 19 \Glspl{thrd} are scheduled inside a particular cluster, where it only runs on the \glspl{proc} which belong to the cluster. The discrete-event manager, which handles preemption and timeout, is a \gls{kthrd} which lives outside any cluster and does not run \glspl{thrd}. 21 20 \end{figure} 21 \CFA allows the option to group user-level threading, in the form of clusters. Both \glspl{thrd} and \glspl{proc} belong to a specific cluster. \Glspl{thrd} will only be scheduled onto \glspl{proc} in the same cluster and scheduling is done independantly of other clusters. Figure~\ref{fig:system} shows an overview if this system. This allows programmers to control more tightly parallelism. It also opens the door to handling effects like NUMA, by pining clusters to specific NUMA node\footnote{This is not currently implemented in \CFA, but the only hurdle left is creating a generic interface for cpu masks.}. 22 22 23 23 \section{Scheduling} 24 The \CFA runtime previously used a \glsxtrshort{fifo} ready-queue with a single lock. This setup offers perfect fairness in terms of opportunities to run. However, it offers poor scalability, since the performance of the ready queue can never be improved by adding more \glspl{hthrd} andcontention can cause significant performance degradation.24 The \CFA runtime was previously using a strictly \glsxtrshort{fifo} ready queue with a single lock. This setup offers perfect fairness in terms of opportunities to run/ However, it offers poor scalability, since the performance of the ready queue can never be improved by adding more \glspl{hthrd}, but the contention can cause significant performance degradation. 25 25 26 26 \section{\glsxtrshort{io}}\label{prev:io} 27 Prior to this work, the \CFA runtime did not add any particular support for \glsxtrshort{io} operations. %\CFA being built on C, this means that, 28 While all I/O operations available in C are available in \CFA, \glsxtrshort{io} operations are designed for the POSIX threading model~\cite{pthreads}. Using these 1:1 threading operations in an M:N threading model means I/O operations block \glspl{proc} instead of \glspl{thrd}. While this can work in certain cases, it limits the number of concurrent operations to the number of \glspl{proc} rather than \glspl{thrd}. It also means deadlock can occur because all \glspl{proc} are blocked even if at least one \gls{thrd} is ready to run. A simple example of this type of deadlock would be as follows: 29 \begin{quote} 30 Given a simple network program with 2 \glspl{thrd} and a single \gls{proc}, one \gls{thrd} sends network requests to a server and the other \gls{thrd} waits for a response from the server. If the second \gls{thrd} races ahead, it may wait for responses to requests that have not been sent yet. In theory, this should not be a problem, even if the second \gls{thrd} waits, because the first \gls{thrd} is still ready to run and should be able to get CPU time to send the request. With M:N threading, while the first \gls{thrd} is ready, the lone \gls{proc} \emph{cannot} run the first \gls{thrd} if it is blocked in the \glsxtrshort{io} operation of the second \gls{thrd}. If this happen, the system is in a synchronization deadlock\footnote{In this example, the deadlocked could be resolved if the server sends unprompted messages to the client. However, this solution is not general and may not be appropriate even in this simple case.}. 31 \end{quote} 32 Therefore, one of the objective of this work is to introduce \emph{User-Level \glsxtrshort{io}}, like \glslink{uthrding}{User-Level \emph{Threading}} blocks \glspl{thrd} rather than \glspl{proc} when doing \glsxtrshort{io} operations, which entails multiplexing the \glsxtrshort{io} operations of many \glspl{thrd} onto fewer \glspl{proc}. This multiplexing requires that a single \gls{proc} be able to execute multiple I/O operations in parallel. This requirement cannot be done with operations that block \glspl{proc}, \ie \glspl{kthrd}, since the first operation would prevent starting new operations for its blocking duration. Executing I/O operations in parallel requires \emph{asynchronous} \glsxtrshort{io}, sometimes referred to as \emph{non-blocking}, since the \gls{kthrd} does not block. 27 Prior to this work, the \CFA runtime did not add any particular support for \glsxtrshort{io} operations. \CFA being built on C, this means that, while all the operations available in C are available in \CFA, \glsxtrshort{io} operations are designed for the POSIX threading model\cit{pthreads}. Using these operations in a M:N threading model, when they are built for 1:1 threading, means that operations block \glspl{proc} instead of \glspl{thrd}. While this can work in certain cases, it limits the number of concurrent operations to the number of \glspl{proc} rather than \glspl{thrd}. This also means that deadlocks can occur because all \glspl{proc} are blocked even if at least one \gls{thrd} is ready to run. A simple example of this type of deadlock would be as follows: 33 28 34 \section{Interoperating with C} 35 While \glsxtrshort{io} operations are the classical example of operations that block \glspl{kthrd}, the non-blocking challenge extends to all blocking system-calls. The POSIX standard states~\cite[\S~2.9.1]{POSIX17}: 36 \begin{quote} 37 All functions defined by this volume of POSIX.1-2017 shall be thread-safe, except that the following functions1 need not be thread-safe. ... (list of 70+ potentially excluded functions) 38 \end{quote} 39 Only UNIX @man@ pages identify whether or not a library function is thread safe, and hence, may block on a pthread lock or system call; hence interoperability with UNIX library functions is a challenge for an M:N threading model. 29 Given a simple network program with 2 \glspl{thrd} and a single \gls{proc}, one \gls{thrd} sends network requests to a server and the other \gls{thrd} waits for response from the server. If the second \gls{thrd} races ahead, it may wait for responses to requests that have not been sent yet. In theory, this should not be a problem, even if the second \gls{thrd} waits, the first \gls{thrd} is still ready to run and should just be able to get CPU time and send the request. In practice with M:N threading, while the first \gls{thrd} is ready, the lone \gls{proc} in this example will \emph{not} try to run the first \gls{thrd} if it is blocked in the \glsxtrshort{io} operation of the second \gls{thrd}. If this happen, the system is effectively deadlocked\footnote{In this example, the deadlocked could be resolved if the server sends unprompted messages to the client. However, this solution is not general and may not be appropriate even in this simple case.}. 40 30 41 Languages like Go and Java, which have strict interoperability with C\cit{JNI, GoLang with C}, can control operations in C by ``sandboxing'' them, \eg a blocking function may be delegated to a \gls{kthrd}. Sandboxing may help towards guaranteeing that the kind of deadlock mentioned above does not occur.31 One of the objective of this work, is to introduce \emph{User-Level \glsxtrshort{io}} which, as a parallel to \glslink{uthrding}{User-Level \emph{Threading}}, blocks \glspl{thrd} rather than \glspl{proc} when doing \glsxtrshort{io} operations. This entails multiplexing the \glsxtrshort{io} operations of many \glspl{thrd} onto fewer \glspl{proc}. This multiplexing requires that a single \gls{proc} be able to execute multiple operations in parallel. This cannot be done with operations that block \glspl{proc}, \ie \glspl{kthrd}, since the first operation would prevent starting new operations for its duration. Executing operations in parallel requires \emph{asynchronous} \glsxtrshort{io}, sometimes referred to as \emph{non-blocking}, since the \gls{kthrd} is not blocked. 42 32 43 As mentioned in Section~\ref{intro}, \CFA is binary compatible with C and, as such, must support all C library functions. Furthermore, interoperability can happen at the function-call level, inline code, or C and \CFA translation units linked together. This fine-grained interoperability between C and \CFA has two consequences: 33 \section{Interoperating with \texttt{C}} 34 While \glsxtrshort{io} operations are the classical example of operations that block \glspl{kthrd}, the challenges mentioned in the previous section do not require \glsxtrshort{io} to be involved. These challenges are a product of blocking system calls rather than \glsxtrshort{io}. C offers no tools to identify whether or not a librairy function will lead to a blocking system call. This fact means interoperatability with C becomes a challenge in a M:N threading model. 35 36 Languages like Go and Java, which have strict interoperatability with C\cit{JNI, GoLang with C}, can control operations in C by ``sandboxing'' them. They can, for example, delegate C operations to \glspl{kthrd} that are not \glspl{proc}. Sandboxing may help towards guaranteeing that the deadlocks mentioned in the previous section do not occur. 37 38 As mentioned in Section~\cit{\CFA intro}, \CFA is binary compatible with C and, as such, trivially supports calls to and from C librairies. Furthermore, interoperatability can happen within a single library, through inline code or simply C and \CFA translation units archived together. The fine-grained interoperatability between C and \CFA has two consequences: 44 39 \begin{enumerate} 45 \item Precisely identifying blocking C callsis difficult.46 \item Introducing new code canhave a significant impact on general performance.40 \item Precisely identifying C calls that could block is difficult. 41 \item Introducing code where interoperatability occurs could have a significant impact on general performance. 47 42 \end{enumerate} 48 Because of these consequences, this work does not attempt to ``sandbox'' calls to C. Therefore, it is possible for an unidentified library calls to block a \gls{kthrd} leading to deadlocks in \CFA's M:N threading model, which would not occur in a traditional 1:1 threading model. Currently, all M:N thread systems interacting with UNIX without sandboxing suffer from this problem but manage to work very well in the majority of applications. Therefore, a complete solution to this problem is outside the scope of this thesis. 43 44 Because of these consequences, this work does not attempt to ``sandbox'' calls to C. It is possible that conflicting calls to C could lead to deadlocks on \CFA's M:N threading model where they would not in the traditionnal 1:1 threading model. However, I judge that solving this problem in general, in a way that is composable and flexible, is too complex in itself and would add too much work to this thesis. Therefore it is outside the scope of this thesis. -
doc/theses/thierry_delisle_PhD/thesis/thesis.tex
r342af53 r4468a70 120 120 % although it's supposed to be in both the TeX Live and MikTeX distributions. There are also documentation and 121 121 % installation instructions there. 122 \renewcommand*{\glstextformat}[1]{\textsf{#1}}123 122 124 123 \usepackage{csquotes} … … 126 125 127 126 % Setting up the page margins... 128 \setlength{\textheight}{9in}\setlength{\topmargin}{-0.45in}\setlength{\headsep}{0.25in}129 127 % uWaterloo thesis requirements specify a minimum of 1 inch (72pt) margin at the 130 128 % top, bottom, and outside page edges and a 1.125 in. (81pt) gutter … … 193 191 % cfa macros used in the document 194 192 \input{common} 195 \CFAStyle % CFA code-style for all languages196 \lstset{basicstyle=\linespread{0.9}\tt}197 193 198 194 % glossary of terms to use 199 195 \input{glossary} 200 \makeindex201 196 202 197 %====================================================================== … … 235 230 \input{text/io.tex} 236 231 \part{Evaluation} 237 \label{Evaluation}238 232 \chapter{Theoretical and Existance Proofs} 239 233 \chapter{Micro-Benchmarks} … … 262 256 \addcontentsline{toc}{chapter}{\textbf{References}} 263 257 264 \bibliography{local ,pl}258 \bibliography{local} 265 259 % Tip 5: You can create multiple .bib files to organize your references. 266 260 % Just list them all in the \bibliogaphy command, separated by commas (no spaces). … … 307 301 \printglossary 308 302 \cleardoublepage 309 310 % Index311 % -----------------------------312 %\input{thesis.ind} % index313 314 303 \phantomsection 315 304 -
example/io/simple/server_epoll.c
r342af53 r4468a70 88 88 } 89 89 90 ev.events = EPOLL OUT | EPOLLIN | EPOLLONESHOT;90 ev.events = EPOLLIN | EPOLLONESHOT; 91 91 ev.data.u64 = (uint64_t)˚ 92 92 if (epoll_ctl(epollfd, EPOLL_CTL_ADD, ring.ring_fd, &ev) == -1) { … … 99 99 100 100 while(1) { 101 BLOCK: ;101 BLOCK: 102 102 int nfds = epoll_wait(epollfd, events, MAX_EVENTS, -1); 103 103 if (nfds == -1) { -
libcfa/src/Makefile.am
r342af53 r4468a70 58 58 concurrency/iofwd.hfa \ 59 59 containers/list.hfa \ 60 containers/stackLockFree.hfa \ 61 vec/vec.hfa \ 62 vec/vec2.hfa \ 63 vec/vec3.hfa \ 64 vec/vec4.hfa 60 containers/stackLockFree.hfa 65 61 66 62 inst_headers_src = \ … … 98 94 concurrency/clib/cfathread.h \ 99 95 concurrency/invoke.h \ 100 concurrency/future.hfa \101 96 concurrency/kernel/fwd.hfa 102 97 -
libcfa/src/bits/locks.hfa
r342af53 r4468a70 283 283 void ^?{}(future_t &) {} 284 284 285 void reset(future_t & this) {286 // needs to be in 0p or 1p287 __atomic_exchange_n( &this.ptr, 0p, __ATOMIC_SEQ_CST);288 }289 290 285 // check if the future is available 291 286 bool available( future_t & this ) { … … 345 340 346 341 // Mark the future as abandoned, meaning it will be deleted by the server 347 bool abandon( future_t & this ) { 348 /* paranoid */ verify( this.ptr != 3p ); 349 350 // Mark the future as abandonned 342 void abandon( future_t & this ) { 351 343 struct oneshot * got = __atomic_exchange_n( &this.ptr, 3p, __ATOMIC_SEQ_CST); 352 353 // If the future isn't already fulfilled, let the server delete it354 if( got == 0p ) return false;355 344 356 345 // got == 2p: the future is ready but the context hasn't fully been consumed … … 358 347 if( got == 2p ) { 359 348 while( this.ptr != 1p ) Pause(); 360 got = 1p; 361 } 362 363 // The future is completed delete it now 364 /* paranoid */ verify( this.ptr != 1p ); 365 free( &this ); 366 return true; 349 } 350 return; 367 351 } 368 352 -
libcfa/src/concurrency/io.cfa
r342af53 r4468a70 31 31 32 32 extern "C" { 33 #include <sys/epoll.h> 33 34 #include <sys/syscall.h> 34 35 … … 40 41 #include "kernel/fwd.hfa" 41 42 #include "io/types.hfa" 42 43 static const char * opcodes[] = {44 "OP_NOP",45 "OP_READV",46 "OP_WRITEV",47 "OP_FSYNC",48 "OP_READ_FIXED",49 "OP_WRITE_FIXED",50 "OP_POLL_ADD",51 "OP_POLL_REMOVE",52 "OP_SYNC_FILE_RANGE",53 "OP_SENDMSG",54 "OP_RECVMSG",55 "OP_TIMEOUT",56 "OP_TIMEOUT_REMOVE",57 "OP_ACCEPT",58 "OP_ASYNC_CANCEL",59 "OP_LINK_TIMEOUT",60 "OP_CONNECT",61 "OP_FALLOCATE",62 "OP_OPENAT",63 "OP_CLOSE",64 "OP_FILES_UPDATE",65 "OP_STATX",66 "OP_READ",67 "OP_WRITE",68 "OP_FADVISE",69 "OP_MADVISE",70 "OP_SEND",71 "OP_RECV",72 "OP_OPENAT2",73 "OP_EPOLL_CTL",74 "OP_SPLICE",75 "OP_PROVIDE_BUFFERS",76 "OP_REMOVE_BUFFERS",77 "OP_TEE",78 "INVALID_OP"79 };80 43 81 44 // returns true of acquired as leader or second leader … … 171 134 int ret = 0; 172 135 if( need_sys_to_submit || need_sys_to_complete ) { 173 __cfadbg_print_safe(io_core, "Kernel I/O : IO_URING enter %d %u %u\n", ring.fd, to_submit, flags);174 136 ret = syscall( __NR_io_uring_enter, ring.fd, to_submit, 0, flags, (sigset_t *)0p, _NSIG / 8); 175 137 if( ret < 0 ) { … … 195 157 static unsigned __collect_submitions( struct __io_data & ring ); 196 158 static __u32 __release_consumed_submission( struct __io_data & ring ); 197 static inline void __clean( volatile struct io_uring_sqe * sqe ); 159 160 static inline void process(struct io_uring_cqe & cqe ) { 161 struct io_future_t * future = (struct io_future_t *)(uintptr_t)cqe.user_data; 162 __cfadbg_print_safe( io, "Kernel I/O : Syscall completed : cqe %p, result %d for %p\n", &cqe, cqe.res, future ); 163 164 fulfil( *future, cqe.res ); 165 } 198 166 199 167 // Process a single completion message from the io_uring 200 168 // This is NOT thread-safe 201 static inline void process( volatile struct io_uring_cqe & cqe ) {202 struct io_future_t * future = (struct io_future_t *)(uintptr_t)cqe.user_data;203 __cfadbg_print_safe( io, "Kernel I/O : Syscall completed : cqe %p, result %d for %p\n", &cqe, cqe.res, future );204 205 fulfil( *future, cqe.res );206 }207 208 169 static [int, bool] __drain_io( & struct __io_data ring ) { 209 170 /* paranoid */ verify( ! __preemption_enabled() ); … … 231 192 } 232 193 233 __atomic_thread_fence( __ATOMIC_SEQ_CST );234 235 194 // Release the consumed SQEs 236 195 __release_consumed_submission( ring ); … … 250 209 for(i; count) { 251 210 unsigned idx = (head + i) & mask; 252 volatilestruct io_uring_cqe & cqe = ring.completion_q.cqes[idx];211 struct io_uring_cqe & cqe = ring.completion_q.cqes[idx]; 253 212 254 213 /* paranoid */ verify(&cqe); … … 259 218 // Mark to the kernel that the cqe has been seen 260 219 // Ensure that the kernel only sees the new value of the head index after the CQEs have been read. 261 __atomic_fetch_add( ring.completion_q.head, count, __ATOMIC_SEQ_CST ); 220 __atomic_thread_fence( __ATOMIC_SEQ_CST ); 221 __atomic_fetch_add( ring.completion_q.head, count, __ATOMIC_RELAXED ); 262 222 263 223 return [count, count > 0 || to_submit > 0]; … … 265 225 266 226 void main( $io_ctx_thread & this ) { 267 __ioctx_register( this );268 269 __cfadbg_print_safe(io_core, "Kernel I/O : IO poller %d (%p) ready\n", this.ring->fd, &this); 270 271 const int reset_cnt = 5; 272 int reset = reset_cnt;227 epoll_event ev; 228 __ioctx_register( this, ev ); 229 230 __cfadbg_print_safe(io_core, "Kernel I/O : IO poller %p for ring %p ready\n", &this, &this.ring); 231 232 int reset = 0; 273 233 // Then loop until we need to start 274 LOOP:275 234 while(!__atomic_load_n(&this.done, __ATOMIC_SEQ_CST)) { 276 235 // Drain the io … … 280 239 [count, again] = __drain_io( *this.ring ); 281 240 282 if(!again) reset --;241 if(!again) reset++; 283 242 284 243 // Update statistics … … 290 249 291 250 // If we got something, just yield and check again 292 if(reset > 1) {251 if(reset < 5) { 293 252 yield(); 294 continue LOOP; 295 } 296 297 // We alread failed to find completed entries a few time. 298 if(reset == 1) { 299 // Rearm the context so it can block 300 // but don't block right away 301 // we need to retry one last time in case 302 // something completed *just now* 303 __ioctx_prepare_block( this ); 304 continue LOOP; 305 } 306 253 } 254 // We didn't get anything baton pass to the slow poller 255 else { 307 256 __STATS__( false, 308 257 io.complete_q.blocks += 1; 309 258 ) 310 __cfadbg_print_safe(io_core, "Kernel I/O : Parking io poller %d (%p)\n", this.ring->fd, &this); 259 __cfadbg_print_safe(io_core, "Kernel I/O : Parking io poller %p\n", &this.self); 260 reset = 0; 311 261 312 262 // block this thread 263 __ioctx_prepare_block( this, ev ); 313 264 wait( this.sem ); 314 315 // restore counter 316 reset = reset_cnt; 317 } 318 319 __cfadbg_print_safe(io_core, "Kernel I/O : Fast poller %d (%p) stopping\n", this.ring->fd, &this); 265 } 266 } 267 268 __cfadbg_print_safe(io_core, "Kernel I/O : Fast poller for ring %p stopping\n", &this.ring); 320 269 } 321 270 … … 340 289 // 341 290 342 // Allocate an submit queue entry. 343 // The kernel cannot see these entries until they are submitted, but other threads must be 344 // able to see which entries can be used and which are already un used by an other thread 345 // for convenience, return both the index and the pointer to the sqe 346 // sqe == &sqes[idx] 347 [* volatile struct io_uring_sqe, __u32] __submit_alloc( struct __io_data & ring, __u64 data ) { 291 [* struct io_uring_sqe, __u32] __submit_alloc( struct __io_data & ring, __u64 data ) { 348 292 /* paranoid */ verify( data != 0 ); 349 293 … … 360 304 // Look through the list starting at some offset 361 305 for(i; cnt) { 362 __u64 expected = 3;363 __u32 idx = (i + off) & mask; // Get an index from a random364 volatilestruct io_uring_sqe * sqe = &ring.submit_q.sqes[idx];306 __u64 expected = 0; 307 __u32 idx = (i + off) & mask; 308 struct io_uring_sqe * sqe = &ring.submit_q.sqes[idx]; 365 309 volatile __u64 * udata = &sqe->user_data; 366 310 367 // Allocate the entry by CASing the user_data field from 0 to the future address368 311 if( *udata == expected && 369 312 __atomic_compare_exchange_n( udata, &expected, data, true, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED ) ) … … 376 319 ) 377 320 378 // debug log379 __cfadbg_print_safe( io, "Kernel I/O : allocated [%p, %u] for %p (%p)\n", sqe, idx, active_thread(), (void*)data );380 321 381 322 // Success return the data … … 384 325 verify(expected != data); 385 326 386 // This one was used387 327 len ++; 388 328 } 389 329 390 330 block++; 391 392 abort( "Kernel I/O : all submit queue entries used, yielding\n" );393 394 331 yield(); 395 332 } … … 440 377 void __submit( struct io_context * ctx, __u32 idx ) __attribute__((nonnull (1))) { 441 378 __io_data & ring = *ctx->thrd.ring; 442 443 {444 __attribute__((unused)) volatile struct io_uring_sqe * sqe = &ring.submit_q.sqes[idx];445 __cfadbg_print_safe( io,446 "Kernel I/O : submitting %u (%p) for %p\n"447 " data: %p\n"448 " opcode: %s\n"449 " fd: %d\n"450 " flags: %d\n"451 " prio: %d\n"452 " off: %p\n"453 " addr: %p\n"454 " len: %d\n"455 " other flags: %d\n"456 " splice fd: %d\n"457 " pad[0]: %llu\n"458 " pad[1]: %llu\n"459 " pad[2]: %llu\n",460 idx, sqe,461 active_thread(),462 (void*)sqe->user_data,463 opcodes[sqe->opcode],464 sqe->fd,465 sqe->flags,466 sqe->ioprio,467 sqe->off,468 sqe->addr,469 sqe->len,470 sqe->accept_flags,471 sqe->splice_fd_in,472 sqe->__pad2[0],473 sqe->__pad2[1],474 sqe->__pad2[2]475 );476 }477 478 479 379 // Get now the data we definetely need 480 380 volatile __u32 * const tail = ring.submit_q.tail; … … 543 443 unlock(ring.submit_q.submit_lock); 544 444 #endif 545 if( ret < 0 ) { 546 return; 547 } 445 if( ret < 0 ) return; 548 446 549 447 // Release the consumed SQEs … … 556 454 io.submit_q.submit_avg.cnt += 1; 557 455 ) 558 559 __cfadbg_print_safe( io, "Kernel I/O : submitted %u (among %u) for %p\n", idx, ret, active_thread() ); 560 } 561 else 562 { 456 } 457 else { 563 458 // get mutual exclusion 564 459 #if defined(LEADER_LOCK) … … 568 463 #endif 569 464 570 /* paranoid */ verifyf( ring.submit_q.sqes[ idx ].user_data != 3ul64,465 /* paranoid */ verifyf( ring.submit_q.sqes[ idx ].user_data != 0, 571 466 /* paranoid */ "index %u already reclaimed\n" 572 467 /* paranoid */ "head %u, prev %u, tail %u\n" … … 595 490 } 596 491 597 /* paranoid */ verify(ret == 1);598 599 492 // update statistics 600 493 __STATS__( false, … … 603 496 ) 604 497 605 {606 __attribute__((unused)) volatile __u32 * const head = ring.submit_q.head;607 __attribute__((unused)) __u32 last_idx = ring.submit_q.array[ ((*head) - 1) & mask ];608 __attribute__((unused)) volatile struct io_uring_sqe * sqe = &ring.submit_q.sqes[last_idx];609 610 __cfadbg_print_safe( io,611 "Kernel I/O : last submitted is %u (%p)\n"612 " data: %p\n"613 " opcode: %s\n"614 " fd: %d\n"615 " flags: %d\n"616 " prio: %d\n"617 " off: %p\n"618 " addr: %p\n"619 " len: %d\n"620 " other flags: %d\n"621 " splice fd: %d\n"622 " pad[0]: %llu\n"623 " pad[1]: %llu\n"624 " pad[2]: %llu\n",625 last_idx, sqe,626 (void*)sqe->user_data,627 opcodes[sqe->opcode],628 sqe->fd,629 sqe->flags,630 sqe->ioprio,631 sqe->off,632 sqe->addr,633 sqe->len,634 sqe->accept_flags,635 sqe->splice_fd_in,636 sqe->__pad2[0],637 sqe->__pad2[1],638 sqe->__pad2[2]639 );640 }641 642 __atomic_thread_fence( __ATOMIC_SEQ_CST );643 498 // Release the consumed SQEs 644 499 __release_consumed_submission( ring ); 645 // ring.submit_q.sqes[idx].user_data = 3ul64;646 500 647 501 #if defined(LEADER_LOCK) … … 651 505 #endif 652 506 653 __cfadbg_print_safe( io, "Kernel I/O : submitted %u for %p\n", idx, active_thread());507 __cfadbg_print_safe( io, "Kernel I/O : Performed io_submit for %p, returned %d\n", active_thread(), ret ); 654 508 } 655 509 } 656 510 657 511 // #define PARTIAL_SUBMIT 32 658 659 // go through the list of submissions in the ready array and moved them into660 // the ring's submit queue661 512 static unsigned __collect_submitions( struct __io_data & ring ) { 662 513 /* paranoid */ verify( ring.submit_q.ready != 0p ); … … 699 550 } 700 551 701 // Go through the ring's submit queue and release everything that has already been consumed702 // by io_uring703 552 static __u32 __release_consumed_submission( struct __io_data & ring ) { 704 553 const __u32 smask = *ring.submit_q.mask; 705 554 706 // We need to get the lock to copy the old head and new head707 555 if( !try_lock(ring.submit_q.release_lock __cfaabi_dbg_ctx2) ) return 0; 708 __attribute__((unused)) 709 __u32 ctail = *ring.submit_q.tail; // get the current tail of the queue 710 __u32 chead = *ring.submit_q.head; // get the current head of the queue 711 __u32 phead = ring.submit_q.prev_head; // get the head the last time we were here 712 ring.submit_q.prev_head = chead; // note up to were we processed 556 __u32 chead = *ring.submit_q.head; 557 __u32 phead = ring.submit_q.prev_head; 558 ring.submit_q.prev_head = chead; 713 559 unlock(ring.submit_q.release_lock); 714 560 715 // the 3 fields are organized like this diagram716 // except it's are ring717 // ---+--------+--------+----718 // ---+--------+--------+----719 // ^ ^ ^720 // phead chead ctail721 722 // make sure ctail doesn't wrap around and reach phead723 /* paranoid */ verify(724 (ctail >= chead && chead >= phead)725 || (chead >= phead && phead >= ctail)726 || (phead >= ctail && ctail >= chead)727 );728 729 // find the range we need to clear730 561 __u32 count = chead - phead; 731 732 // We acquired an previous-head/current-head range733 // go through the range and release the sqes734 562 for( i; count ) { 735 563 __u32 idx = ring.submit_q.array[ (phead + i) & smask ]; 736 737 /* paranoid */ verify( 0 != ring.submit_q.sqes[ idx ].user_data ); 738 __clean( &ring.submit_q.sqes[ idx ] ); 564 ring.submit_q.sqes[ idx ].user_data = 0; 739 565 } 740 566 return count; 741 567 } 742 743 void __sqe_clean( volatile struct io_uring_sqe * sqe ) {744 __clean( sqe );745 }746 747 static inline void __clean( volatile struct io_uring_sqe * sqe ) {748 // If we are in debug mode, thrash the fields to make sure we catch reclamation errors749 __cfaabi_dbg_debug_do(750 memset(sqe, 0xde, sizeof(*sqe));751 sqe->opcode = (sizeof(opcodes) / sizeof(const char *)) - 1;752 );753 754 // Mark the entry as unused755 __atomic_store_n(&sqe->user_data, 3ul64, __ATOMIC_SEQ_CST);756 }757 568 #endif -
libcfa/src/concurrency/io/call.cfa.in
r342af53 r4468a70 74 74 ; 75 75 76 extern [* volatilestruct io_uring_sqe, __u32] __submit_alloc( struct __io_data & ring, __u64 data );76 extern [* struct io_uring_sqe, __u32] __submit_alloc( struct __io_data & ring, __u64 data ); 77 77 extern void __submit( struct io_context * ctx, __u32 idx ) __attribute__((nonnull (1))); 78 78 … … 222 222 __u32 idx; 223 223 struct io_uring_sqe * sqe; 224 [(volatile struct io_uring_sqe *) sqe, idx] = __submit_alloc( ring, (__u64)(uintptr_t)&future ); 225 224 [sqe, idx] = __submit_alloc( ring, (__u64)(uintptr_t)&future ); 225 226 sqe->__pad2[0] = sqe->__pad2[1] = sqe->__pad2[2] = 0; 226 227 sqe->opcode = IORING_OP_{op}; 227 sqe->flags = sflags; 228 sqe->ioprio = 0; 229 sqe->fd = 0; 230 sqe->off = 0; 231 sqe->addr = 0; 232 sqe->len = 0; 233 sqe->fsync_flags = 0; 234 sqe->__pad2[0] = 0; 235 sqe->__pad2[1] = 0; 236 sqe->__pad2[2] = 0;{body} 237 238 asm volatile("": : :"memory"); 228 sqe->flags = sflags;{body} 239 229 240 230 verify( sqe->user_data == (__u64)(uintptr_t)&future ); … … 322 312 }), 323 313 # CFA_HAVE_IORING_OP_ACCEPT 324 Call('ACCEPT ', 'int accept4(int sockfd, struct sockaddr *addr, socklen_t *addrlen, int flags)', {325 'fd': 'sockfd', 326 'addr': ' (__u64)addr',327 'addr2': ' (__u64)addrlen',314 Call('ACCEPT4', 'int accept4(int sockfd, struct sockaddr *addr, socklen_t *addrlen, int flags)', { 315 'fd': 'sockfd', 316 'addr': 'addr', 317 'addr2': 'addrlen', 328 318 'accept_flags': 'flags' 329 319 }), … … 474 464 475 465 print(""" 476 //-----------------------------------------------------------------------------477 bool cancel(io_cancellation & this) {478 #if !defined(CFA_HAVE_LINUX_IO_URING_H) || !defined(CFA_HAVE_IORING_OP_ASYNC_CANCEL)479 return false;480 #else481 io_future_t future;482 483 io_context * context = __get_io_context();484 485 __u8 sflags = 0;486 struct __io_data & ring = *context->thrd.ring;487 488 __u32 idx;489 volatile struct io_uring_sqe * sqe;490 [sqe, idx] = __submit_alloc( ring, (__u64)(uintptr_t)&future );491 492 sqe->__pad2[0] = sqe->__pad2[1] = sqe->__pad2[2] = 0;493 sqe->opcode = IORING_OP_ASYNC_CANCEL;494 sqe->flags = sflags;495 sqe->addr = this.target;496 497 verify( sqe->user_data == (__u64)(uintptr_t)&future );498 __submit( context, idx );499 500 wait(future);501 502 if( future.result == 0 ) return true; // Entry found503 if( future.result == -EALREADY) return true; // Entry found but in progress504 if( future.result == -ENOENT ) return false; // Entry not found505 return false;506 #endif507 }508 509 466 //----------------------------------------------------------------------------- 510 467 // Check if a function is has asynchronous -
libcfa/src/concurrency/io/setup.cfa
r342af53 r4468a70 52 52 #include <pthread.h> 53 53 #include <sys/epoll.h> 54 #include <sys/eventfd.h>55 54 #include <sys/mman.h> 56 55 #include <sys/syscall.h> … … 170 169 // Main loop 171 170 while( iopoll.run ) { 172 __cfadbg_print_safe(io_core, "Kernel I/O - epoll : waiting on io_uring contexts\n");173 174 171 // Wait for events 175 172 int nfds = epoll_pwait( iopoll.epollfd, events, 10, -1, &mask ); 176 177 __cfadbg_print_safe(io_core, "Kernel I/O - epoll : %d io contexts events, waking up\n", nfds);178 173 179 174 // Check if an error occured … … 186 181 $io_ctx_thread * io_ctx = ($io_ctx_thread *)(uintptr_t)events[i].data.u64; 187 182 /* paranoid */ verify( io_ctx ); 188 __cfadbg_print_safe(io_core, "Kernel I/O - epoll : Unparking io poller %d (%p)\n", io_ctx->ring->fd, io_ctx);183 __cfadbg_print_safe(io_core, "Kernel I/O : Unparking io poller %p\n", io_ctx); 189 184 #if !defined( __CFA_NO_STATISTICS__ ) 190 185 __cfaabi_tls.this_stats = io_ctx->self.curr_cluster->stats; 191 186 #endif 192 193 eventfd_t v;194 eventfd_read(io_ctx->ring->efd, &v);195 196 187 post( io_ctx->sem ); 197 188 } … … 242 233 $thread & thrd = this.thrd.self; 243 234 if( cluster_context ) { 244 // We are about to do weird things with the threads245 // we don't need interrupts to complicate everything246 disable_interrupts();247 248 // Get cluster info249 235 cluster & cltr = *thrd.curr_cluster; 250 236 /* paranoid */ verify( cltr.idles.total == 0 || &cltr == mainCluster ); … … 253 239 // We need to adjust the clean-up based on where the thread is 254 240 if( thrd.state == Ready || thrd.preempted != __NO_PREEMPTION ) { 255 // This is the tricky case256 // The thread was preempted or ready to run and now it is on the ready queue257 // but the cluster is shutting down, so there aren't any processors to run the ready queue258 // the solution is to steal the thread from the ready-queue and pretend it was blocked all along259 241 260 242 ready_schedule_lock(); 261 // The thread should on the list 243 244 // This is the tricky case 245 // The thread was preempted and now it is on the ready queue 246 // The thread should be the last on the list 262 247 /* paranoid */ verify( thrd.link.next != 0p ); 263 248 264 249 // Remove the thread from the ready queue of this cluster 265 // The thread should be the last on the list266 250 __attribute__((unused)) bool removed = remove_head( &cltr, &thrd ); 267 251 /* paranoid */ verify( removed ); … … 279 263 } 280 264 // !!! This is not an else if !!! 281 // Ok, now the thread is blocked (whether we cheated to get here or not)282 265 if( thrd.state == Blocked ) { 266 283 267 // This is the "easy case" 284 268 // The thread is parked and can easily be moved to active cluster … … 290 274 } 291 275 else { 276 292 277 // The thread is in a weird state 293 278 // I don't know what to do here 294 279 abort("io_context poller thread is in unexpected state, cannot clean-up correctly\n"); 295 280 } 296 297 // The weird thread kidnapping stuff is over, restore interrupts.298 enable_interrupts( __cfaabi_dbg_ctx );299 281 } else { 300 282 post( this.thrd.sem ); … … 383 365 } 384 366 385 // Step 3 : Initialize the data structure386 367 // Get the pointers from the kernel to fill the structure 387 368 // submit queue … … 398 379 const __u32 num = *sq.num; 399 380 for( i; num ) { 400 __sqe_clean( &sq.sqes[i] );381 sq.sqes[i].user_data = 0ul64; 401 382 } 402 383 } … … 428 409 cq.cqes = (struct io_uring_cqe *)(((intptr_t)cq.ring_ptr) + params.cq_off.cqes); 429 410 430 // Step 4 : eventfd431 int efd;432 for() {433 efd = eventfd(0, 0);434 if (efd < 0) {435 if (errno == EINTR) continue;436 abort("KERNEL ERROR: IO_URING EVENTFD - %s\n", strerror(errno));437 }438 break;439 }440 441 int ret;442 for() {443 ret = syscall( __NR_io_uring_register, fd, IORING_REGISTER_EVENTFD, &efd, 1);444 if (ret < 0) {445 if (errno == EINTR) continue;446 abort("KERNEL ERROR: IO_URING EVENTFD REGISTER - %s\n", strerror(errno));447 }448 break;449 }450 451 411 // some paranoid checks 452 412 /* paranoid */ verifyf( (*cq.mask) == ((*cq.num) - 1ul32), "IO_URING Expected mask to be %u (%u entries), was %u", (*cq.num) - 1ul32, *cq.num, *cq.mask ); … … 463 423 this.ring_flags = params.flags; 464 424 this.fd = fd; 465 this.efd = efd;466 425 this.eager_submits = params_in.eager_submits; 467 426 this.poller_submits = params_in.poller_submits; … … 486 445 // close the file descriptor 487 446 close(this.fd); 488 close(this.efd);489 447 490 448 free( this.submit_q.ready ); // Maybe null, doesn't matter … … 494 452 // I/O Context Sleep 495 453 //============================================================================================= 496 #define IOEVENTS EPOLLIN | EPOLLONESHOT 497 498 static inline void __ioctx_epoll_ctl($io_ctx_thread & ctx, int op, const char * error) { 499 struct epoll_event ev; 500 ev.events = IOEVENTS; 454 455 void __ioctx_register($io_ctx_thread & ctx, struct epoll_event & ev) { 456 ev.events = EPOLLIN | EPOLLONESHOT; 501 457 ev.data.u64 = (__u64)&ctx; 502 int ret = epoll_ctl(iopoll.epollfd, op, ctx.ring->efd, &ev);458 int ret = epoll_ctl(iopoll.epollfd, EPOLL_CTL_ADD, ctx.ring->fd, &ev); 503 459 if (ret < 0) { 504 abort( "KERNEL ERROR: EPOLL %s - (%d) %s\n", error, (int)errno, strerror(errno) ); 505 } 506 } 507 508 void __ioctx_register($io_ctx_thread & ctx) { 509 __ioctx_epoll_ctl(ctx, EPOLL_CTL_ADD, "ADD"); 510 } 511 512 void __ioctx_prepare_block($io_ctx_thread & ctx) { 513 __cfadbg_print_safe(io_core, "Kernel I/O - epoll : Re-arming io poller %d (%p)\n", ctx.ring->fd, &ctx); 514 __ioctx_epoll_ctl(ctx, EPOLL_CTL_MOD, "REARM"); 460 abort( "KERNEL ERROR: EPOLL ADD - (%d) %s\n", (int)errno, strerror(errno) ); 461 } 462 } 463 464 void __ioctx_prepare_block($io_ctx_thread & ctx, struct epoll_event & ev) { 465 int ret = epoll_ctl(iopoll.epollfd, EPOLL_CTL_MOD, ctx.ring->fd, &ev); 466 if (ret < 0) { 467 abort( "KERNEL ERROR: EPOLL REARM - (%d) %s\n", (int)errno, strerror(errno) ); 468 } 515 469 } 516 470 -
libcfa/src/concurrency/io/types.hfa
r342af53 r4468a70 65 65 66 66 // A buffer of sqes (not the actual ring) 67 volatilestruct io_uring_sqe * sqes;67 struct io_uring_sqe * sqes; 68 68 69 69 // The location and size of the mmaped area … … 85 85 86 86 // the kernel ring 87 volatilestruct io_uring_cqe * cqes;87 struct io_uring_cqe * cqes; 88 88 89 89 // The location and size of the mmaped area … … 97 97 __u32 ring_flags; 98 98 int fd; 99 int efd;100 99 bool eager_submits:1; 101 100 bool poller_submits:1; … … 131 130 #endif 132 131 132 struct epoll_event; 133 133 struct $io_ctx_thread; 134 void __ioctx_register($io_ctx_thread & ctx); 135 void __ioctx_prepare_block($io_ctx_thread & ctx); 136 void __sqe_clean( volatile struct io_uring_sqe * sqe ); 134 void __ioctx_register($io_ctx_thread & ctx, struct epoll_event & ev); 135 void __ioctx_prepare_block($io_ctx_thread & ctx, struct epoll_event & ev); 137 136 #endif 138 137 -
libcfa/src/concurrency/monitor.hfa
r342af53 r4468a70 132 132 133 133 void wait ( condition & this, uintptr_t user_info = 0 ); 134 static inline bool is_empty ( condition & this ) { return this.blocked.head == 1p; }135 134 bool signal ( condition & this ); 136 135 bool signal_block( condition & this ); 137 static inline bool signal_all ( condition & this ) { bool ret = false; while(!is_empty(this)) { ret = signal(this) || ret; } return ret; }136 static inline bool is_empty ( condition & this ) { return this.blocked.head == 1p; } 138 137 uintptr_t front ( condition & this ); 139 138 -
libcfa/src/heap.cfa
r342af53 r4468a70 10 10 // Created On : Tue Dec 19 21:58:35 2017 11 11 // Last Modified By : Peter A. Buhr 12 // Last Modified On : Sun Jan 10 11:20:49 202113 // Update Count : 10 3112 // Last Modified On : Wed Dec 16 12:28:25 2020 13 // Update Count : 1023 14 14 // 15 15 … … 262 262 #ifdef __STATISTICS__ 263 263 // Heap statistics counters. 264 static unsigned int malloc_ zero_calls, malloc_calls;264 static unsigned int malloc_calls; 265 265 static unsigned long long int malloc_storage; 266 static unsigned int aalloc_ zero_calls, aalloc_calls;266 static unsigned int aalloc_calls; 267 267 static unsigned long long int aalloc_storage; 268 static unsigned int calloc_ zero_calls, calloc_calls;268 static unsigned int calloc_calls; 269 269 static unsigned long long int calloc_storage; 270 static unsigned int memalign_ zero_calls, memalign_calls;270 static unsigned int memalign_calls; 271 271 static unsigned long long int memalign_storage; 272 static unsigned int amemalign_ zero_calls, amemalign_calls;272 static unsigned int amemalign_calls; 273 273 static unsigned long long int amemalign_storage; 274 static unsigned int cmemalign_ zero_calls, cmemalign_calls;274 static unsigned int cmemalign_calls; 275 275 static unsigned long long int cmemalign_storage; 276 static unsigned int resize_ zero_calls, resize_calls;276 static unsigned int resize_calls; 277 277 static unsigned long long int resize_storage; 278 static unsigned int realloc_ zero_calls, realloc_calls;278 static unsigned int realloc_calls; 279 279 static unsigned long long int realloc_storage; 280 static unsigned int free_ zero_calls, free_calls;280 static unsigned int free_calls; 281 281 static unsigned long long int free_storage; 282 282 static unsigned int mmap_calls; … … 287 287 static unsigned long long int sbrk_storage; 288 288 // Statistics file descriptor (changed by malloc_stats_fd). 289 static int stat s_fd = STDERR_FILENO;// default stderr289 static int stat_fd = STDERR_FILENO; // default stderr 290 290 291 291 // Use "write" because streams may be shutdown when calls are made. … … 293 293 char helpText[1024]; 294 294 __cfaabi_bits_print_buffer( STDERR_FILENO, helpText, sizeof(helpText), 295 "\nHeap statistics:\n"296 " malloc 0-calls %'u; >0-calls %'u; storage %'llu bytes\n"297 " aalloc 0-calls %'u; >0-calls %'u; storage %'llu bytes\n"298 " calloc 0-calls %'u; >0-calls %'u; storage %'llu bytes\n"299 " memalign 0-calls %'u; >0-calls %'u; storage %'llu bytes\n"300 " amemalign 0-calls %'u; >0-calls %'u; storage %'llu bytes\n"301 " cmemalign 0-calls %'u; >0-calls %'u; storage %'llu bytes\n"302 " resize 0-calls %'u; >0-calls %'u; storage %'llu bytes\n"303 " realloc 0-calls %'u; >0-calls %'u; storage %'llu bytes\n"304 " free 0-calls %'u; >0-calls %'u; storage %'llu bytes\n"305 " mmap calls %'u; storage %'llu bytes\n"306 " munmap calls %'u; storage %'llu bytes\n"307 " sbrk calls %'u; storage %'llu bytes\n",308 malloc_zero_calls,malloc_calls, malloc_storage,309 aalloc_zero_calls,aalloc_calls, aalloc_storage,310 calloc_zero_calls,calloc_calls, calloc_storage,311 memalign_zero_calls,memalign_calls, memalign_storage,312 amemalign_zero_calls,amemalign_calls, amemalign_storage,313 cmemalign_zero_calls,cmemalign_calls, cmemalign_storage,314 resize_zero_calls,resize_calls, resize_storage,315 realloc_zero_calls,realloc_calls, realloc_storage,316 free_zero_calls,free_calls, free_storage,317 mmap_calls, mmap_storage,318 munmap_calls, munmap_storage,319 sbrk_calls, sbrk_storage295 "\nHeap statistics:\n" 296 " malloc: calls %u / storage %llu\n" 297 " aalloc: calls %u / storage %llu\n" 298 " calloc: calls %u / storage %llu\n" 299 " memalign: calls %u / storage %llu\n" 300 " amemalign: calls %u / storage %llu\n" 301 " cmemalign: calls %u / storage %llu\n" 302 " resize: calls %u / storage %llu\n" 303 " realloc: calls %u / storage %llu\n" 304 " free: calls %u / storage %llu\n" 305 " mmap: calls %u / storage %llu\n" 306 " munmap: calls %u / storage %llu\n" 307 " sbrk: calls %u / storage %llu\n", 308 malloc_calls, malloc_storage, 309 aalloc_calls, aalloc_storage, 310 calloc_calls, calloc_storage, 311 memalign_calls, memalign_storage, 312 amemalign_calls, amemalign_storage, 313 cmemalign_calls, cmemalign_storage, 314 resize_calls, resize_storage, 315 realloc_calls, realloc_storage, 316 free_calls, free_storage, 317 mmap_calls, mmap_storage, 318 munmap_calls, munmap_storage, 319 sbrk_calls, sbrk_storage 320 320 ); 321 321 } // printStats … … 328 328 "<sizes>\n" 329 329 "</sizes>\n" 330 "<total type=\"malloc\" 0 count=\"%'u;\" >0 count=\"%'u;\" size=\"%'llu\"/> bytes\n"331 "<total type=\"aalloc\" 0 count=\"%'u;\" >0 count=\"%'u;\" size=\"%'llu\"/> bytes\n"332 "<total type=\"calloc\" 0 count=\"%'u;\" >0 count=\"%'u;\" size=\"%'llu\"/> bytes\n"333 "<total type=\"memalign\" 0 count=\"%'u;\" >0 count=\"%'u;\" size=\"%'llu\"/> bytes\n"334 "<total type=\"amemalign\" 0 count=\"%'u;\" >0 count=\"%'u;\" size=\"%'llu\"/> bytes\n"335 "<total type=\"cmemalign\" 0 count=\"%'u;\" >0 count=\"%'u;\" size=\"%'llu\"/> bytes\n"336 "<total type=\"resize\" 0 count=\"%'u;\" >0 count=\"%'u;\" size=\"%'llu\"/> bytes\n"337 "<total type=\"realloc\" 0 count=\"%'u;\" >0 count=\"%'u;\" size=\"%'llu\"/> bytes\n"338 "<total type=\"free\" 0 count=\"%'u;\" >0 count=\"%'u;\" size=\"%'llu\"/> bytes\n"339 "<total type=\"mmap\" count=\"% 'u;\" size=\"%'llu\"/> bytes\n"340 "<total type=\"munmap\" count=\"% 'u;\" size=\"%'llu\"/> bytes\n"341 "<total type=\"sbrk\" count=\"% 'u;\" size=\"%'llu\"/> bytes\n"330 "<total type=\"malloc\" count=\"%u\" size=\"%llu\"/>\n" 331 "<total type=\"aalloc\" count=\"%u\" size=\"%llu\"/>\n" 332 "<total type=\"calloc\" count=\"%u\" size=\"%llu\"/>\n" 333 "<total type=\"memalign\" count=\"%u\" size=\"%llu\"/>\n" 334 "<total type=\"amemalign\" count=\"%u\" size=\"%llu\"/>\n" 335 "<total type=\"cmemalign\" count=\"%u\" size=\"%llu\"/>\n" 336 "<total type=\"resize\" count=\"%u\" size=\"%llu\"/>\n" 337 "<total type=\"realloc\" count=\"%u\" size=\"%llu\"/>\n" 338 "<total type=\"free\" count=\"%u\" size=\"%llu\"/>\n" 339 "<total type=\"mmap\" count=\"%u\" size=\"%llu\"/>\n" 340 "<total type=\"munmap\" count=\"%u\" size=\"%llu\"/>\n" 341 "<total type=\"sbrk\" count=\"%u\" size=\"%llu\"/>\n" 342 342 "</malloc>", 343 malloc_ zero_calls, malloc_calls, malloc_storage,344 aalloc_ zero_calls, aalloc_calls, aalloc_storage,345 calloc_ zero_calls, calloc_calls, calloc_storage,346 memalign_ zero_calls, memalign_calls, memalign_storage,347 amemalign_ zero_calls, amemalign_calls, amemalign_storage,348 cmemalign_ zero_calls, cmemalign_calls, cmemalign_storage,349 resize_ zero_calls, resize_calls, resize_storage,350 realloc_ zero_calls, realloc_calls, realloc_storage,351 free_ zero_calls, free_calls, free_storage,343 malloc_calls, malloc_storage, 344 aalloc_calls, aalloc_storage, 345 calloc_calls, calloc_storage, 346 memalign_calls, memalign_storage, 347 amemalign_calls, amemalign_storage, 348 cmemalign_calls, cmemalign_storage, 349 resize_calls, resize_storage, 350 realloc_calls, realloc_storage, 351 free_calls, free_storage, 352 352 mmap_calls, mmap_storage, 353 353 munmap_calls, munmap_storage, … … 466 466 } // headers 467 467 468 //#ifdef __CFA_DEBUG__469 //#if __SIZEOF_POINTER__ == 4470 //#define MASK 0xdeadbeef471 //#else472 //#define MASK 0xdeadbeefdeadbeef473 //#endif474 //#define STRIDE size_t475 476 //static void * Memset( void * addr, STRIDE size ) { // debug only477 //if ( size % sizeof(STRIDE) != 0 ) abort( "Memset() : internal error, size %zd not multiple of %zd.", size, sizeof(STRIDE) );478 //if ( (STRIDE)addr % sizeof(STRIDE) != 0 ) abort( "Memset() : internal error, addr %p not multiple of %zd.", addr, sizeof(STRIDE) );479 480 //STRIDE * end = (STRIDE *)addr + size / sizeof(STRIDE);481 //for ( STRIDE * p = (STRIDE *)addr; p < end; p += 1 ) *p = MASK;482 //return addr;483 //} // Memset484 //#endif // __CFA_DEBUG__468 #ifdef __CFA_DEBUG__ 469 #if __SIZEOF_POINTER__ == 4 470 #define MASK 0xdeadbeef 471 #else 472 #define MASK 0xdeadbeefdeadbeef 473 #endif 474 #define STRIDE size_t 475 476 static void * Memset( void * addr, STRIDE size ) { // debug only 477 if ( size % sizeof(STRIDE) != 0 ) abort( "Memset() : internal error, size %zd not multiple of %zd.", size, sizeof(STRIDE) ); 478 if ( (STRIDE)addr % sizeof(STRIDE) != 0 ) abort( "Memset() : internal error, addr %p not multiple of %zd.", addr, sizeof(STRIDE) ); 479 480 STRIDE * end = (STRIDE *)addr + size / sizeof(STRIDE); 481 for ( STRIDE * p = (STRIDE *)addr; p < end; p += 1 ) *p = MASK; 482 return addr; 483 } // Memset 484 #endif // __CFA_DEBUG__ 485 485 486 486 … … 498 498 unlock( extlock ); 499 499 __cfaabi_bits_print_nolock( STDERR_FILENO, NO_MEMORY_MSG, size ); 500 _exit( EXIT_FAILURE ); // give up 501 } // if 502 // Make storage executable for thunks. 500 _exit( EXIT_FAILURE ); 501 } // if 503 502 if ( mprotect( (char *)heapEnd + heapRemaining, increase, __map_prot ) ) { 504 503 unlock( extlock ); … … 771 770 772 771 772 static inline void * callocNoStats( size_t dim, size_t elemSize ) { 773 size_t size = dim * elemSize; 774 if ( unlikely( size ) == 0 ) return 0p; // 0 BYTE ALLOCATION RETURNS NULL POINTER 775 char * addr = (char *)mallocNoStats( size ); 776 777 HeapManager.Storage.Header * header; 778 HeapManager.FreeHeader * freeElem; 779 size_t bsize, alignment; 780 #ifndef __CFA_DEBUG__ 781 bool mapped = 782 #endif // __CFA_DEBUG__ 783 headers( "calloc", addr, header, freeElem, bsize, alignment ); 784 #ifndef __CFA_DEBUG__ 785 786 // Mapped storage is zero filled, but in debug mode mapped memory is scrubbed in doMalloc, so it has to be reset to zero. 787 if ( ! mapped ) 788 #endif // __CFA_DEBUG__ 789 // <-------0000000000000000000000000000UUUUUUUUUUUUUUUUUUUUUUUUU> bsize (bucket size) U => undefined 790 // `-header`-addr `-size 791 memset( addr, '\0', size ); // set to zeros 792 793 header->kind.real.blockSize |= 2; // mark as zero filled 794 return addr; 795 } // callocNoStats 796 797 773 798 static inline void * memalignNoStats( size_t alignment, size_t size ) { 774 799 if ( unlikely( size ) == 0 ) return 0p; // 0 BYTE ALLOCATION RETURNS NULL POINTER … … 809 834 810 835 836 static inline void * cmemalignNoStats( size_t alignment, size_t dim, size_t elemSize ) { 837 size_t size = dim * elemSize; 838 if ( unlikely( size ) == 0 ) return 0p; // 0 BYTE ALLOCATION RETURNS NULL POINTER 839 char * addr = (char *)memalignNoStats( alignment, size ); 840 841 HeapManager.Storage.Header * header; 842 HeapManager.FreeHeader * freeElem; 843 size_t bsize; 844 #ifndef __CFA_DEBUG__ 845 bool mapped = 846 #endif // __CFA_DEBUG__ 847 headers( "cmemalign", addr, header, freeElem, bsize, alignment ); 848 849 // Mapped storage is zero filled, but in debug mode mapped memory is scrubbed in doMalloc, so it has to be reset to zero. 850 #ifndef __CFA_DEBUG__ 851 if ( ! mapped ) 852 #endif // __CFA_DEBUG__ 853 // <-------0000000000000000000000000000UUUUUUUUUUUUUUUUUUUUUUUUU> bsize (bucket size) U => undefined 854 // `-header`-addr `-size 855 memset( addr, '\0', size ); // set to zeros 856 857 header->kind.real.blockSize |= 2; // mark as zero filled 858 return addr; 859 } // cmemalignNoStats 860 861 811 862 extern "C" { 812 863 // Allocates size bytes and returns a pointer to the allocated memory. The contents are undefined. If size is 0, … … 814 865 void * malloc( size_t size ) { 815 866 #ifdef __STATISTICS__ 816 if ( likely( size > 0 ) ) { 817 __atomic_add_fetch( &malloc_calls, 1, __ATOMIC_SEQ_CST ); 818 __atomic_add_fetch( &malloc_storage, size, __ATOMIC_SEQ_CST ); 819 } else { 820 __atomic_add_fetch( &malloc_zero_calls, 1, __ATOMIC_SEQ_CST ); 821 } // if 867 __atomic_add_fetch( &malloc_calls, 1, __ATOMIC_SEQ_CST ); 868 __atomic_add_fetch( &malloc_storage, size, __ATOMIC_SEQ_CST ); 822 869 #endif // __STATISTICS__ 823 870 … … 830 877 size_t size = dim * elemSize; 831 878 #ifdef __STATISTICS__ 832 if ( likely( size > 0 ) ) { 833 __atomic_add_fetch( &aalloc_calls, 1, __ATOMIC_SEQ_CST ); 834 __atomic_add_fetch( &aalloc_storage, size, __ATOMIC_SEQ_CST ); 835 } else { 836 __atomic_add_fetch( &aalloc_zero_calls, 1, __ATOMIC_SEQ_CST ); 837 } // if 879 __atomic_add_fetch( &aalloc_calls, 1, __ATOMIC_SEQ_CST ); 880 __atomic_add_fetch( &aalloc_storage, size, __ATOMIC_SEQ_CST ); 838 881 #endif // __STATISTICS__ 839 882 … … 844 887 // Same as aalloc() with memory set to zero. 845 888 void * calloc( size_t dim, size_t elemSize ) { 846 size_t size = dim * elemSize;847 if ( unlikely( size ) == 0 ) { // 0 BYTE ALLOCATION RETURNS NULL POINTER848 #ifdef __STATISTICS__849 __atomic_add_fetch( &calloc_zero_calls, 1, __ATOMIC_SEQ_CST );850 #endif // __STATISTICS__851 return 0p;852 } // if853 889 #ifdef __STATISTICS__ 854 890 __atomic_add_fetch( &calloc_calls, 1, __ATOMIC_SEQ_CST ); … … 856 892 #endif // __STATISTICS__ 857 893 858 char * addr = (char *)mallocNoStats( size ); 859 860 HeapManager.Storage.Header * header; 861 HeapManager.FreeHeader * freeElem; 862 size_t bsize, alignment; 863 864 #ifndef __CFA_DEBUG__ 865 bool mapped = 866 #endif // __CFA_DEBUG__ 867 headers( "calloc", addr, header, freeElem, bsize, alignment ); 868 869 #ifndef __CFA_DEBUG__ 870 // Mapped storage is zero filled, but in debug mode mapped memory is scrubbed in doMalloc, so it has to be reset to zero. 871 if ( ! mapped ) 872 #endif // __CFA_DEBUG__ 873 // <-------0000000000000000000000000000UUUUUUUUUUUUUUUUUUUUUUUUU> bsize (bucket size) U => undefined 874 // `-header`-addr `-size 875 memset( addr, '\0', size ); // set to zeros 876 877 header->kind.real.blockSize |= 2; // mark as zero filled 878 return addr; 894 return callocNoStats( dim, elemSize ); 879 895 } // calloc 880 896 … … 885 901 // call to malloc(), alloc(), calloc() or realloc(). If the area pointed to was moved, a free(oaddr) is done. 886 902 void * resize( void * oaddr, size_t size ) { 903 #ifdef __STATISTICS__ 904 __atomic_add_fetch( &resize_calls, 1, __ATOMIC_SEQ_CST ); 905 #endif // __STATISTICS__ 906 887 907 // If size is equal to 0, either NULL or a pointer suitable to be passed to free() is returned. 888 if ( unlikely( size == 0 ) ) { // special cases 889 #ifdef __STATISTICS__ 890 __atomic_add_fetch( &resize_zero_calls, 1, __ATOMIC_SEQ_CST ); 891 #endif // __STATISTICS__ 892 free( oaddr ); 893 return 0p; 894 } // if 895 #ifdef __STATISTICS__ 896 __atomic_add_fetch( &resize_calls, 1, __ATOMIC_SEQ_CST ); 897 #endif // __STATISTICS__ 898 908 if ( unlikely( size == 0 ) ) { free( oaddr ); return 0p; } // special cases 899 909 if ( unlikely( oaddr == 0p ) ) { 900 910 #ifdef __STATISTICS__ … … 908 918 size_t bsize, oalign; 909 919 headers( "resize", oaddr, header, freeElem, bsize, oalign ); 910 911 920 size_t odsize = dataStorage( bsize, oaddr, header ); // data storage available in bucket 921 912 922 // same size, DO NOT preserve STICKY PROPERTIES. 913 923 if ( oalign == libAlign() && size <= odsize && odsize <= size * 2 ) { // allow 50% wasted storage for smaller size … … 930 940 // the old and new sizes. 931 941 void * realloc( void * oaddr, size_t size ) { 942 #ifdef __STATISTICS__ 943 __atomic_add_fetch( &realloc_calls, 1, __ATOMIC_SEQ_CST ); 944 #endif // __STATISTICS__ 945 932 946 // If size is equal to 0, either NULL or a pointer suitable to be passed to free() is returned. 933 if ( unlikely( size == 0 ) ) { // special cases 934 #ifdef __STATISTICS__ 935 __atomic_add_fetch( &realloc_zero_calls, 1, __ATOMIC_SEQ_CST ); 936 #endif // __STATISTICS__ 937 free( oaddr ); 938 return 0p; 939 } // if 940 #ifdef __STATISTICS__ 941 __atomic_add_fetch( &realloc_calls, 1, __ATOMIC_SEQ_CST ); 942 #endif // __STATISTICS__ 943 947 if ( unlikely( size == 0 ) ) { free( oaddr ); return 0p; } // special cases 944 948 if ( unlikely( oaddr == 0p ) ) { 945 949 #ifdef __STATISTICS__ … … 995 999 void * memalign( size_t alignment, size_t size ) { 996 1000 #ifdef __STATISTICS__ 997 if ( likely( size > 0 ) ) { 998 __atomic_add_fetch( &memalign_calls, 1, __ATOMIC_SEQ_CST ); 999 __atomic_add_fetch( &memalign_storage, size, __ATOMIC_SEQ_CST ); 1000 } else { 1001 __atomic_add_fetch( &memalign_zero_calls, 1, __ATOMIC_SEQ_CST ); 1002 } // if 1001 __atomic_add_fetch( &memalign_calls, 1, __ATOMIC_SEQ_CST ); 1002 __atomic_add_fetch( &memalign_storage, size, __ATOMIC_SEQ_CST ); 1003 1003 #endif // __STATISTICS__ 1004 1004 … … 1011 1011 size_t size = dim * elemSize; 1012 1012 #ifdef __STATISTICS__ 1013 if ( likely( size > 0 ) ) { 1014 __atomic_add_fetch( &cmemalign_calls, 1, __ATOMIC_SEQ_CST ); 1015 __atomic_add_fetch( &cmemalign_storage, size, __ATOMIC_SEQ_CST ); 1016 } else { 1017 __atomic_add_fetch( &cmemalign_zero_calls, 1, __ATOMIC_SEQ_CST ); 1018 } // if 1013 __atomic_add_fetch( &cmemalign_calls, 1, __ATOMIC_SEQ_CST ); 1014 __atomic_add_fetch( &cmemalign_storage, size, __ATOMIC_SEQ_CST ); 1019 1015 #endif // __STATISTICS__ 1020 1016 … … 1025 1021 // Same as calloc() with memory alignment. 1026 1022 void * cmemalign( size_t alignment, size_t dim, size_t elemSize ) { 1027 size_t size = dim * elemSize;1028 if ( unlikely( size ) == 0 ) { // 0 BYTE ALLOCATION RETURNS NULL POINTER1029 #ifdef __STATISTICS__1030 __atomic_add_fetch( &cmemalign_zero_calls, 1, __ATOMIC_SEQ_CST );1031 #endif // __STATISTICS__1032 return 0p;1033 } // if1034 1023 #ifdef __STATISTICS__ 1035 1024 __atomic_add_fetch( &cmemalign_calls, 1, __ATOMIC_SEQ_CST ); … … 1037 1026 #endif // __STATISTICS__ 1038 1027 1039 char * addr = (char *)memalignNoStats( alignment, size ); 1040 1041 HeapManager.Storage.Header * header; 1042 HeapManager.FreeHeader * freeElem; 1043 size_t bsize; 1044 1045 #ifndef __CFA_DEBUG__ 1046 bool mapped = 1047 #endif // __CFA_DEBUG__ 1048 headers( "cmemalign", addr, header, freeElem, bsize, alignment ); 1049 1050 // Mapped storage is zero filled, but in debug mode mapped memory is scrubbed in doMalloc, so it has to be reset to zero. 1051 #ifndef __CFA_DEBUG__ 1052 if ( ! mapped ) 1053 #endif // __CFA_DEBUG__ 1054 // <-------0000000000000000000000000000UUUUUUUUUUUUUUUUUUUUUUUUU> bsize (bucket size) U => undefined 1055 // `-header`-addr `-size 1056 memset( addr, '\0', size ); // set to zeros 1057 1058 header->kind.real.blockSize |= 2; // mark as zero filled 1059 return addr; 1028 return cmemalignNoStats( alignment, dim, elemSize ); 1060 1029 } // cmemalign 1061 1030 … … 1096 1065 // 0p, no operation is performed. 1097 1066 void free( void * addr ) { 1067 #ifdef __STATISTICS__ 1068 __atomic_add_fetch( &free_calls, 1, __ATOMIC_SEQ_CST ); 1069 #endif // __STATISTICS__ 1070 1098 1071 if ( unlikely( addr == 0p ) ) { // special case 1099 #ifdef __STATISTICS__1100 __atomic_add_fetch( &free_zero_calls, 1, __ATOMIC_SEQ_CST );1101 #endif // __STATISTICS__1102 1103 1072 // #ifdef __CFA_DEBUG__ 1104 1073 // if ( traceHeap() ) { … … 1213 1182 int malloc_stats_fd( int fd __attribute__(( unused )) ) { 1214 1183 #ifdef __STATISTICS__ 1215 int temp = stat s_fd;1216 stat s_fd = fd;1184 int temp = stat_fd; 1185 stat_fd = fd; 1217 1186 return temp; 1218 1187 #else … … 1245 1214 // The string is printed on the file stream stream. The exported string includes information about all arenas (see 1246 1215 // malloc). 1247 int malloc_info( int options, FILE * stream __attribute__(( unused ))) {1216 int malloc_info( int options, FILE * stream ) { 1248 1217 if ( options != 0 ) { errno = EINVAL; return -1; } 1249 1218 #ifdef __STATISTICS__ … … 1274 1243 // Must have CFA linkage to overload with C linkage realloc. 1275 1244 void * resize( void * oaddr, size_t nalign, size_t size ) { 1276 // If size is equal to 0, either NULL or a pointer suitable to be passed to free() is returned. 1277 if ( unlikely( size == 0 ) ) { // special cases 1278 #ifdef __STATISTICS__ 1279 __atomic_add_fetch( &resize_zero_calls, 1, __ATOMIC_SEQ_CST ); 1280 #endif // __STATISTICS__ 1281 free( oaddr ); 1282 return 0p; 1283 } // if 1245 #ifdef __STATISTICS__ 1246 __atomic_add_fetch( &resize_calls, 1, __ATOMIC_SEQ_CST ); 1247 #endif // __STATISTICS__ 1284 1248 1285 1249 if ( unlikely( nalign < libAlign() ) ) nalign = libAlign(); // reset alignment to minimum 1286 1250 #ifdef __CFA_DEBUG__ 1287 else checkAlign( nalign ); // check alignment 1251 else 1252 checkAlign( nalign ); // check alignment 1288 1253 #endif // __CFA_DEBUG__ 1289 1254 1255 // If size is equal to 0, either NULL or a pointer suitable to be passed to free() is returned. 1256 if ( unlikely( size == 0 ) ) { free( oaddr ); return 0p; } // special cases 1290 1257 if ( unlikely( oaddr == 0p ) ) { 1291 1258 #ifdef __STATISTICS__ 1292 __atomic_add_fetch( &resize_calls, 1, __ATOMIC_SEQ_CST );1293 1259 __atomic_add_fetch( &resize_storage, size, __ATOMIC_SEQ_CST ); 1294 1260 #endif // __STATISTICS__ … … 1336 1302 1337 1303 void * realloc( void * oaddr, size_t nalign, size_t size ) { 1338 // If size is equal to 0, either NULL or a pointer suitable to be passed to free() is returned.1339 if ( unlikely( size == 0 ) ) { // special cases1340 #ifdef __STATISTICS__1341 __atomic_add_fetch( &realloc_zero_calls, 1, __ATOMIC_SEQ_CST );1342 #endif // __STATISTICS__1343 free( oaddr );1344 return 0p;1345 } // if1346 1347 1304 if ( unlikely( nalign < libAlign() ) ) nalign = libAlign(); // reset alignment to minimum 1348 1305 #ifdef __CFA_DEBUG__ 1349 else checkAlign( nalign ); // check alignment 1306 else 1307 checkAlign( nalign ); // check alignment 1350 1308 #endif // __CFA_DEBUG__ 1351 1309 1310 // If size is equal to 0, either NULL or a pointer suitable to be passed to free() is returned. 1311 if ( unlikely( size == 0 ) ) { free( oaddr ); return 0p; } // special cases 1352 1312 if ( unlikely( oaddr == 0p ) ) { 1353 1313 #ifdef __STATISTICS__ -
libcfa/src/startup.cfa
r342af53 r4468a70 10 10 // Created On : Tue Jul 24 16:21:57 2018 11 11 // Last Modified By : Peter A. Buhr 12 // Last Modified On : Sat Jan 9 23:18:23 202113 // Update Count : 3 412 // Last Modified On : Tue Feb 4 13:03:18 2020 13 // Update Count : 30 14 14 // 15 15 16 #include <time.h> // tzset 17 #include <locale.h> // setlocale 18 #include <stdlib.h> // getenv 16 #include <time.h> // tzset 17 #include <locale.h> // setlocale 19 18 #include "startup.hfa" 20 19 … … 23 22 void __cfaabi_appready_startup( void ) { 24 23 tzset(); // initialize time global variables 25 setlocale( LC_NUMERIC, getenv("LANG"));24 setlocale(LC_NUMERIC, ""); 26 25 #ifdef __CFA_DEBUG__ 27 26 extern void heapAppStart(); -
src/AST/Decl.cpp
r342af53 r4468a70 10 10 // Created On : Thu May 9 10:00:00 2019 11 11 // Last Modified By : Peter A. Buhr 12 // Last Modified On : Tue Jan 12 16:54:55 202113 // Update Count : 2 312 // Last Modified On : Fri Dec 13 16:23:15 2019 13 // Update Count : 20 14 14 // 15 15 … … 78 78 79 79 const char * TypeDecl::typeString() const { 80 static const char * kindNames[] = { "sized data type", "sized data type", "sized object type", "sized function type", "sized tuple type", "sized array lengthtype" };81 static_assert( sizeof(kindNames) /sizeof(kindNames[0]) == TypeDecl::NUMBER_OF_KINDS, "typeString: kindNames is out of sync." );80 static const char * kindNames[] = { "sized data type", "sized object type", "sized function type", "sized tuple type" }; 81 static_assert( sizeof(kindNames)/sizeof(kindNames[0]) == TypeDecl::NUMBER_OF_KINDS, "typeString: kindNames is out of sync." ); 82 82 assertf( kind < TypeDecl::NUMBER_OF_KINDS, "TypeDecl kind is out of bounds." ); 83 83 return sized ? kindNames[ kind ] : &kindNames[ kind ][ sizeof("sized") ]; // sizeof includes '\0' … … 85 85 86 86 const char * TypeDecl::genTypeString() const { 87 static const char * kindNames[] = { " T &", "T *", "T", "(*)", "T ...", "[T]" };88 static_assert( sizeof(kindNames) /sizeof(kindNames[0]) == TypeDecl::NUMBER_OF_KINDS, "genTypeString: kindNames is out of sync." );87 static const char * kindNames[] = { "dtype", "otype", "ftype", "ttype" }; 88 static_assert( sizeof(kindNames)/sizeof(kindNames[0]) == TypeDecl::NUMBER_OF_KINDS, "genTypeString: kindNames is out of sync." ); 89 89 assertf( kind < TypeDecl::NUMBER_OF_KINDS, "TypeDecl kind is out of bounds." ); 90 90 return kindNames[ kind ]; -
src/AST/Decl.hpp
r342af53 r4468a70 10 10 // Created On : Thu May 9 10:00:00 2019 11 11 // Last Modified By : Peter A. Buhr 12 // Last Modified On : Mon Jan 11 20:48:38 202113 // Update Count : 3012 // Last Modified On : Fri Dec 13 17:38:33 2019 13 // Update Count : 29 14 14 // 15 15 … … 175 175 class TypeDecl final : public NamedTypeDecl { 176 176 public: 177 enum Kind { Dtype, DStype, Otype, Ftype, Ttype, ALtype, NUMBER_OF_KINDS };177 enum Kind { Dtype, Otype, Ftype, Ttype, NUMBER_OF_KINDS }; 178 178 179 179 Kind kind; -
src/Parser/DeclarationNode.cc
r342af53 r4468a70 10 10 // Created On : Sat May 16 12:34:05 2015 11 11 // Last Modified By : Peter A. Buhr 12 // Last Modified On : Mon Jan 11 20:58:07 202113 // Update Count : 113 712 // Last Modified On : Thu Oct 8 08:03:38 2020 13 // Update Count : 1135 14 14 // 15 15 … … 1075 1075 if ( variable.tyClass != TypeDecl::NUMBER_OF_KINDS ) { 1076 1076 // otype is internally converted to dtype + otype parameters 1077 static const TypeDecl::Kind kindMap[] = { TypeDecl::Dtype, TypeDecl::D Stype, TypeDecl::Dtype, TypeDecl::Ftype, TypeDecl::Ttype, TypeDecl::ALtype };1078 static_assert( sizeof(kindMap) /sizeof(kindMap[0]) == TypeDecl::NUMBER_OF_KINDS, "DeclarationNode::build: kindMap is out of sync." );1077 static const TypeDecl::Kind kindMap[] = { TypeDecl::Dtype, TypeDecl::Dtype, TypeDecl::Ftype, TypeDecl::Ttype }; 1078 static_assert( sizeof(kindMap)/sizeof(kindMap[0]) == TypeDecl::NUMBER_OF_KINDS, "DeclarationNode::build: kindMap is out of sync." ); 1079 1079 assertf( variable.tyClass < sizeof(kindMap)/sizeof(kindMap[0]), "Variable's tyClass is out of bounds." ); 1080 1080 TypeDecl * ret = new TypeDecl( *name, Type::StorageClasses(), nullptr, kindMap[ variable.tyClass ], variable.tyClass == TypeDecl::Otype, variable.initializer ? variable.initializer->buildType() : nullptr ); -
src/Parser/ParseNode.h
r342af53 r4468a70 10 10 // Created On : Sat May 16 13:28:16 2015 11 11 // Last Modified By : Peter A. Buhr 12 // Last Modified On : S un Jan 3 18:23:01 202113 // Update Count : 89 612 // Last Modified On : Sat Oct 24 03:53:54 2020 13 // Update Count : 895 14 14 // 15 15 … … 39 39 struct DeclarationNode; 40 40 class DeclarationWithType; 41 class ExpressionNode; 41 42 class Initializer; 42 class ExpressionNode;43 43 struct StatementNode; 44 44 -
src/Parser/parser.yy
r342af53 r4468a70 10 10 // Created On : Sat Sep 1 20:22:55 2001 11 11 // Last Modified By : Peter A. Buhr 12 // Last Modified On : Mon Jan 11 21:32:10 202113 // Update Count : 46 3312 // Last Modified On : Sat Oct 24 08:21:14 2020 13 // Update Count : 4624 14 14 // 15 15 … … 329 329 %type<en> conditional_expression constant_expression assignment_expression assignment_expression_opt 330 330 %type<en> comma_expression comma_expression_opt 331 %type<en> argument_expression_list_opt argument_expression default_initialize r_opt331 %type<en> argument_expression_list_opt argument_expression default_initialize_opt 332 332 %type<ifctl> if_control_expression 333 333 %type<fctl> for_control_expression for_control_expression_list … … 424 424 %type<decl> sue_declaration_specifier sue_declaration_specifier_nobody sue_type_specifier sue_type_specifier_nobody 425 425 426 %type<tclass> type_class new_type_class426 %type<tclass> type_class 427 427 %type<decl> type_declarator type_declarator_name type_declaring_list 428 428 … … 1545 1545 | cfa_function_declaration 1546 1546 | type_declaring_list 1547 { SemanticError( yylloc, "otype declaration is currently unimplemented." ); $$ = nullptr; }1548 1547 | trait_specifier 1549 1548 ; … … 2224 2223 ; 2225 2224 2226 cfa_parameter_ellipsis_list_opt: // CFA, abstract + real2225 cfa_parameter_ellipsis_list_opt: // CFA, abstract + real 2227 2226 // empty 2228 2227 { $$ = DeclarationNode::newBasicType( DeclarationNode::Void ); } … … 2281 2280 cfa_parameter_declaration: // CFA, new & old style parameter declaration 2282 2281 parameter_declaration 2283 | cfa_identifier_parameter_declarator_no_tuple identifier_or_type_name default_initialize r_opt2282 | cfa_identifier_parameter_declarator_no_tuple identifier_or_type_name default_initialize_opt 2284 2283 { $$ = $1->addName( $2 ); } 2285 | cfa_abstract_tuple identifier_or_type_name default_initialize r_opt2284 | cfa_abstract_tuple identifier_or_type_name default_initialize_opt 2286 2285 // To obtain LR(1), these rules must be duplicated here (see cfa_abstract_declarator). 2287 2286 { $$ = $1->addName( $2 ); } 2288 | type_qualifier_list cfa_abstract_tuple identifier_or_type_name default_initialize r_opt2287 | type_qualifier_list cfa_abstract_tuple identifier_or_type_name default_initialize_opt 2289 2288 { $$ = $2->addName( $3 )->addQualifiers( $1 ); } 2290 2289 | cfa_function_specifier … … 2303 2302 parameter_declaration: 2304 2303 // No SUE declaration in parameter list. 2305 declaration_specifier_nobody identifier_parameter_declarator default_initialize r_opt2304 declaration_specifier_nobody identifier_parameter_declarator default_initialize_opt 2306 2305 { $$ = $2->addType( $1 )->addInitializer( $3 ? new InitializerNode( $3 ) : nullptr ); } 2307 | declaration_specifier_nobody type_parameter_redeclarator default_initialize r_opt2306 | declaration_specifier_nobody type_parameter_redeclarator default_initialize_opt 2308 2307 { $$ = $2->addType( $1 )->addInitializer( $3 ? new InitializerNode( $3 ) : nullptr ); } 2309 2308 ; 2310 2309 2311 2310 abstract_parameter_declaration: 2312 declaration_specifier_nobody default_initialize r_opt2311 declaration_specifier_nobody default_initialize_opt 2313 2312 { $$ = $1->addInitializer( $2 ? new InitializerNode( $2 ) : nullptr ); } 2314 | declaration_specifier_nobody abstract_parameter_declarator default_initialize r_opt2313 | declaration_specifier_nobody abstract_parameter_declarator default_initialize_opt 2315 2314 { $$ = $2->addType( $1 )->addInitializer( $3 ? new InitializerNode( $3 ) : nullptr ); } 2316 2315 ; … … 2442 2441 type_class identifier_or_type_name 2443 2442 { typedefTable.addToScope( *$2, TYPEDEFname, "9" ); } 2444 type_initializer_opt assertion_list_opt2443 type_initializer_opt assertion_list_opt 2445 2444 { $$ = DeclarationNode::newTypeParam( $1, $2 )->addTypeInitializer( $4 )->addAssertions( $5 ); } 2446 | identifier_or_type_name new_type_class 2447 { typedefTable.addToScope( *$1, TYPEDEFname, "9" ); } 2448 type_initializer_opt assertion_list_opt 2449 { $$ = DeclarationNode::newTypeParam( $2, $1 )->addTypeInitializer( $4 )->addAssertions( $5 ); } 2450 | '[' identifier_or_type_name ']' 2451 { 2452 typedefTable.addToScope( *$2, TYPEDEFname, "9" ); 2453 $$ = DeclarationNode::newTypeParam( TypeDecl::ALtype, $2 ); 2454 } 2455 // | type_specifier identifier_parameter_declarator 2445 | type_specifier identifier_parameter_declarator 2456 2446 | assertion_list 2457 2447 { $$ = DeclarationNode::newTypeParam( TypeDecl::Dtype, new string( DeclarationNode::anonymous.newName() ) )->addAssertions( $1 ); } 2458 ;2459 2460 new_type_class: // CFA2461 // empty2462 { $$ = TypeDecl::Otype; }2463 | '&'2464 { $$ = TypeDecl::Dtype; }2465 | '*'2466 { $$ = TypeDecl::DStype; } // dtype + sized2467 | ELLIPSIS2468 { $$ = TypeDecl::Ttype; }2469 2448 ; 2470 2449 … … 3497 3476 ; 3498 3477 3499 default_initialize r_opt:3478 default_initialize_opt: 3500 3479 // empty 3501 3480 { $$ = nullptr; } -
src/ResolvExpr/ResolveAssertions.cc
r342af53 r4468a70 397 397 398 398 /// Limit to depth of recursion of assertion satisfaction 399 static const int recursionLimit = 7;399 static const int recursionLimit = 4; 400 400 /// Maximum number of simultaneously-deferred assertions to attempt concurrent satisfaction of 401 401 static const int deferLimit = 10; -
src/ResolvExpr/SatisfyAssertions.cpp
r342af53 r4468a70 57 57 ast::UniqueId resnSlot; ///< Slot for any recursive assertion IDs 58 58 59 AssnCandidate( 60 const ast::SymbolTable::IdData c, const ast::Type * at, ast::TypeEnvironment && e, 59 AssnCandidate( 60 const ast::SymbolTable::IdData c, const ast::Type * at, ast::TypeEnvironment && e, 61 61 ast::AssertionSet && h, ast::AssertionSet && n, ast::OpenVarSet && o, ast::UniqueId rs ) 62 : cdata( c ), adjType( at ), env( std::move( e ) ), have( std::move( h ) ), 62 : cdata( c ), adjType( at ), env( std::move( e ) ), have( std::move( h ) ), 63 63 need( std::move( n ) ), open( std::move( o ) ), resnSlot( rs ) {} 64 64 }; … … 73 73 const AssnCandidate & match; 74 74 }; 75 76 /// Wrapper for the deferred items from a single assertion satisfaction. 75 76 /// Wrapper for the deferred items from a single assertion satisfaction. 77 77 /// Acts like an indexed list of DeferRef 78 78 struct DeferItem { … … 81 81 AssnCandidateList matches; 82 82 83 DeferItem( 83 DeferItem( 84 84 const ast::VariableExpr * d, const ast::AssertionSetValue & i, AssnCandidateList && ms ) 85 85 : expr( d ), info( i ), matches( std::move( ms ) ) {} … … 117 117 /// Initial satisfaction state for a candidate 118 118 SatState( CandidateRef & c, const ast::SymbolTable & syms ) 119 : cand( c ), need(), newNeed(), deferred(), inferred(), costs{ Cost::zero }, 119 : cand( c ), need(), newNeed(), deferred(), inferred(), costs{ Cost::zero }, 120 120 symtab( syms ) { need.swap( c->need ); } 121 121 122 122 /// Update satisfaction state for next step after previous state 123 123 SatState( SatState && o, IterateFlag ) 124 : cand( std::move( o.cand ) ), need( o.newNeed.begin(), o.newNeed.end() ), newNeed(), 125 deferred(), inferred( std::move( o.inferred ) ), costs( std::move( o.costs ) ), 124 : cand( std::move( o.cand ) ), need( o.newNeed.begin(), o.newNeed.end() ), newNeed(), 125 deferred(), inferred( std::move( o.inferred ) ), costs( std::move( o.costs ) ), 126 126 symtab( o.symtab ) { costs.emplace_back( Cost::zero ); } 127 127 128 128 /// Field-wise next step constructor 129 129 SatState( 130 CandidateRef && c, ast::AssertionSet && nn, InferCache && i, CostVec && cs, 130 CandidateRef && c, ast::AssertionSet && nn, InferCache && i, CostVec && cs, 131 131 ast::SymbolTable && syms ) 132 : cand( std::move( c ) ), need( nn.begin(), nn.end() ), newNeed(), deferred(), 132 : cand( std::move( c ) ), need( nn.begin(), nn.end() ), newNeed(), deferred(), 133 133 inferred( std::move( i ) ), costs( std::move( cs ) ), symtab( std::move( syms ) ) 134 134 { costs.emplace_back( Cost::zero ); } … … 143 143 144 144 /// Binds a single assertion, updating satisfaction state 145 void bindAssertion( 146 const ast::VariableExpr * expr, const ast::AssertionSetValue & info, CandidateRef & cand, 145 void bindAssertion( 146 const ast::VariableExpr * expr, const ast::AssertionSetValue & info, CandidateRef & cand, 147 147 AssnCandidate & match, InferCache & inferred 148 148 ) { 149 149 const ast::DeclWithType * candidate = match.cdata.id; 150 assertf( candidate->uniqueId, 150 assertf( candidate->uniqueId, 151 151 "Assertion candidate does not have a unique ID: %s", toString( candidate ).c_str() ); 152 152 153 153 ast::Expr * varExpr = match.cdata.combine( cand->expr->location, cand->cvtCost ); 154 154 varExpr->result = match.adjType; … … 201 201 ast::OpenVarSet newOpen{ sat.cand->open }; 202 202 ast::ptr< ast::Type > toType = assn.first->result; 203 ast::ptr< ast::Type > adjType = 203 ast::ptr< ast::Type > adjType = 204 204 renameTyVars( adjustExprType( candidate->get_type(), newEnv, sat.symtab ), GEN_USAGE, false ); 205 205 … … 213 213 } 214 214 215 matches.emplace_back( 215 matches.emplace_back( 216 216 cdata, adjType, std::move( newEnv ), std::move( have ), std::move( newNeed ), 217 217 std::move( newOpen ), crntResnSlot ); … … 287 287 }; 288 288 289 /// Replace ResnSlots with InferParams and add alternative to output list, if it meets pruning 289 /// Replace ResnSlots with InferParams and add alternative to output list, if it meets pruning 290 290 /// threshold. 291 void finalizeAssertions( 292 CandidateRef & cand, InferCache & inferred, PruneMap & thresholds, CostVec && costs, 293 CandidateList & out 291 void finalizeAssertions( 292 CandidateRef & cand, InferCache & inferred, PruneMap & thresholds, CostVec && costs, 293 CandidateList & out 294 294 ) { 295 295 // prune if cheaper alternative for same key has already been generated … … 308 308 } 309 309 310 /// Combo iterator that combines candidates into an output list, merging their environments. 311 /// Rejects an appended candidate if environments cannot be merged. See `Common/FilterCombos.h` 310 /// Combo iterator that combines candidates into an output list, merging their environments. 311 /// Rejects an appended candidate if environments cannot be merged. See `Common/FilterCombos.h` 312 312 /// for description of "combo iterator". 313 313 class CandidateEnvMerger { … … 390 390 } // anonymous namespace 391 391 392 void satisfyAssertions( 393 CandidateRef & cand, const ast::SymbolTable & symtab, CandidateList & out, 392 void satisfyAssertions( 393 CandidateRef & cand, const ast::SymbolTable & symtab, CandidateList & out, 394 394 std::vector<std::string> & errors 395 395 ) { … … 418 418 419 419 // should a limit be imposed? worst case here is O(n^2) but very unlikely to happen. 420 for (unsigned resetCount = 0; ; ++resetCount) { 420 for (unsigned resetCount = 0; ; ++resetCount) { 421 421 ast::AssertionList next; 422 422 resetTyVarRenaming(); … … 449 449 // either add successful match or push back next state 450 450 if ( sat.newNeed.empty() ) { 451 finalizeAssertions( 451 finalizeAssertions( 452 452 sat.cand, sat.inferred, thresholds, std::move( sat.costs ), out ); 453 453 } else { … … 471 471 std::vector< CandidateEnvMerger::OutType > compatible = filterCombos( 472 472 sat.deferred, CandidateEnvMerger{ sat.cand->env, sat.cand->open, sat.symtab } ); 473 473 474 474 // fail early if no mutually-compatible assertion satisfaction 475 475 if ( compatible.empty() ) { … … 494 494 // set up next satisfaction state 495 495 CandidateRef nextCand = std::make_shared<Candidate>( 496 sat.cand->expr, std::move( compat.env ), std::move( compat.open ), 496 sat.cand->expr, std::move( compat.env ), std::move( compat.open ), 497 497 ast::AssertionSet{} /* need moved into satisfaction state */, 498 498 sat.cand->cost, sat.cand->cvtCost ); … … 500 500 ast::AssertionSet nextNewNeed{ sat.newNeed }; 501 501 InferCache nextInferred{ sat.inferred }; 502 502 503 503 CostVec nextCosts{ sat.costs }; 504 504 nextCosts.back() += compat.cost; 505 505 506 506 ast::SymbolTable nextSymtab{ sat.symtab }; 507 507 … … 517 517 // either add successful match or push back next state 518 518 if ( nextNewNeed.empty() ) { 519 finalizeAssertions( 519 finalizeAssertions( 520 520 nextCand, nextInferred, thresholds, std::move( nextCosts ), out ); 521 521 } else { 522 nextSats.emplace_back( 523 std::move( nextCand ), std::move( nextNewNeed ), 524 std::move( nextInferred ), std::move( nextCosts ), 522 nextSats.emplace_back( 523 std::move( nextCand ), std::move( nextNewNeed ), 524 std::move( nextInferred ), std::move( nextCosts ), 525 525 std::move( nextSymtab ) ); 526 526 } … … 534 534 nextSats.clear(); 535 535 } 536 536 537 537 // exceeded recursion limit if reaches here 538 538 if ( out.empty() ) { -
src/SymTab/Demangle.cc
r342af53 r4468a70 10 10 // Created On : Thu Jul 19 12:52:41 2018 11 11 // Last Modified By : Peter A. Buhr 12 // Last Modified On : Mon Jan 11 21:28:27 202113 // Update Count : 1 112 // Last Modified On : Tue Feb 11 15:09:18 2020 13 // Update Count : 10 14 14 // 15 15 … … 367 367 // type variable types 368 368 for (size_t k = 0; k < TypeDecl::NUMBER_OF_KINDS; ++k) { 369 static const std::string typeVariableNames[] = { "DT", " DST", "OT", "FT", "TT", "ALT", };369 static const std::string typeVariableNames[] = { "DT", "OT", "FT", "TT", }; 370 370 static_assert( 371 371 sizeof(typeVariableNames)/sizeof(typeVariableNames[0]) == TypeDecl::NUMBER_OF_KINDS, -
src/SymTab/Mangler.cc
r342af53 r4468a70 10 10 // Created On : Sun May 17 21:40:29 2015 11 11 // Last Modified By : Peter A. Buhr 12 // Last Modified On : Mon Jan 11 21:56:06 202113 // Update Count : 7412 // Last Modified On : Wed Nov 18 12:01:38 2020 13 // Update Count : 64 14 14 // 15 15 #include "Mangler.h" … … 313 313 // and the case has not yet come up in practice. Alternatively, if not then this code can be removed 314 314 // aside from the assert false. 315 assertf( false, "Mangler_old should not visit typedecl: %s", toCString(decl));315 assertf(false, "Mangler_old should not visit typedecl: %s", toCString(decl)); 316 316 assertf( decl->kind < TypeDecl::NUMBER_OF_KINDS, "Unhandled type variable kind: %d", decl->kind ); 317 317 mangleName += Encoding::typeVariables[ decl->kind ] + std::to_string( decl->name.length() ) + decl->name; … … 343 343 break; 344 344 default: 345 assert f( false, "unimplemented kind for type variable %s", SymTab::Mangler::Encoding::typeVariables[i->kind].c_str());345 assert( false ); 346 346 } // switch 347 347 varNums[ i->name ] = std::make_pair( nextVarNum, (int)i->kind ); … … 673 673 for ( auto & decl : ptype->forall ) { 674 674 switch ( decl->kind ) { 675 case ast::TypeDecl::Kind::Dtype:675 case ast::TypeDecl::Kind::Dtype: 676 676 dcount++; 677 677 break; 678 case ast::TypeDecl::Kind::Ftype:678 case ast::TypeDecl::Kind::Ftype: 679 679 fcount++; 680 680 break; 681 case ast::TypeDecl::Kind::Ttype:681 case ast::TypeDecl::Kind::Ttype: 682 682 vcount++; 683 683 break; 684 default:685 assert f( false, "unimplemented kind for type variable %s", SymTab::Mangler::Encoding::typeVariables[decl->kind].c_str());684 default: 685 assert( false ); 686 686 } // switch 687 687 varNums[ decl->name ] = std::make_pair( nextVarNum, (int)decl->kind ); -
src/SymTab/ManglerCommon.cc
r342af53 r4468a70 10 10 // Created On : Sun May 17 21:44:03 2015 11 11 // Last Modified By : Peter A. Buhr 12 // Last Modified On : Mon Jan 11 21:23:10 202113 // Update Count : 2 912 // Last Modified On : Fri Dec 13 14:54:38 2019 13 // Update Count : 28 14 14 // 15 15 … … 104 104 const std::string typeVariables[] = { 105 105 "BD", // dtype 106 "BDS", // dtype + sized107 106 "BO", // otype 108 107 "BF", // ftype 109 108 "BT", // ttype 110 "BAL", // array length type111 109 }; 112 110 static_assert( 113 sizeof(typeVariables) /sizeof(typeVariables[0]) == TypeDecl::NUMBER_OF_KINDS,111 sizeof(typeVariables)/sizeof(typeVariables[0]) == TypeDecl::NUMBER_OF_KINDS, 114 112 "Each type variable kind should have a corresponding mangler prefix" 115 113 ); -
src/SynTree/Declaration.h
r342af53 r4468a70 10 10 // Created On : Mon May 18 07:44:20 2015 11 11 // Last Modified By : Peter A. Buhr 12 // Last Modified On : Mon Jan 11 20:48:39 202113 // Update Count : 15 812 // Last Modified On : Fri Dec 13 23:11:22 2019 13 // Update Count : 157 14 14 // 15 15 … … 201 201 typedef NamedTypeDecl Parent; 202 202 public: 203 enum Kind { Dtype, DStype, Otype, Ftype, Ttype, ALtype, NUMBER_OF_KINDS };203 enum Kind { Dtype, Otype, Ftype, Ttype, NUMBER_OF_KINDS }; 204 204 205 205 Kind kind; -
src/SynTree/TypeDecl.cc
r342af53 r4468a70 10 10 // Created On : Mon May 18 07:44:20 2015 11 11 // Last Modified By : Peter A. Buhr 12 // Last Modified On : T ue Jan 12 16:07:33 202113 // Update Count : 2 612 // Last Modified On : Thu Oct 8 18:18:55 2020 13 // Update Count : 22 14 14 // 15 15 … … 33 33 34 34 const char * TypeDecl::typeString() const { 35 static const char * kindNames[] = { "sized data type", "sized data type", "sized object type", "sized function type", "sized tuple type", "sized array lengthtype" };36 static_assert( sizeof(kindNames) /sizeof(kindNames[0]) == TypeDecl::NUMBER_OF_KINDS, "typeString: kindNames is out of sync." );35 static const char * kindNames[] = { "sized data type", "sized object type", "sized function type", "sized tuple type" }; 36 static_assert( sizeof(kindNames)/sizeof(kindNames[0]) == TypeDecl::NUMBER_OF_KINDS, "typeString: kindNames is out of sync." ); 37 37 assertf( kind < TypeDecl::NUMBER_OF_KINDS, "TypeDecl kind is out of bounds." ); 38 38 return isComplete() ? kindNames[ kind ] : &kindNames[ kind ][ sizeof("sized") ]; // sizeof includes '\0' … … 40 40 41 41 const char * TypeDecl::genTypeString() const { 42 static const char * kindNames[] = { " T &", "T *", "T", "(*)", "T ...", "[T]" };43 static_assert( sizeof(kindNames) /sizeof(kindNames[0]) == TypeDecl::NUMBER_OF_KINDS, "genTypeString: kindNames is out of sync." );42 static const char * kindNames[] = { "dtype", "otype", "ftype", "ttype" }; 43 static_assert( sizeof(kindNames)/sizeof(kindNames[0]) == TypeDecl::NUMBER_OF_KINDS, "genTypeString: kindNames is out of sync." ); 44 44 assertf( kind < TypeDecl::NUMBER_OF_KINDS, "TypeDecl kind is out of bounds." ); 45 45 return kindNames[ kind ]; -
tests/Makefile.am
r342af53 r4468a70 103 103 104 104 mostlyclean-local : 105 find ${builddir} -not -path './__pycache__/*' -path '*.o' -delete106 find ${builddir} -not -path './__pycache__/*' -path '*/.err/*.log' -delete107 find ${builddir} -not -path './__pycache__/*' -path '*/.out/*.log' -delete108 105 rm -f ${EXTRA_PROGRAMS} 109 106 rm -rf __pycache__ 107 find ${builddir} -path '*.o' -delete 108 find ${builddir} -path '*/.err/*.log' -delete 109 find ${builddir} -path '*/.out/*.log' -delete 110 110 111 111 distclean-local : -
tests/concurrent/futures/.expect/basic.txt
r342af53 r4468a70 1 start2 1 done -
tests/concurrent/futures/basic.cfa
r342af53 r4468a70 1 1 #include <thread.hfa> 2 3 2 enum {NFUTURES = 10}; 4 3 … … 84 83 85 84 int main() { 86 printf( "start\n" ); // non-empty .expect file87 85 processor procs[2]; 88 86 {
Note:
See TracChangeset
for help on using the changeset viewer.