Changeset 342af53
- Timestamp:
- Jan 14, 2021, 12:23:14 PM (4 years ago)
- Branches:
- ADT, arm-eh, ast-experimental, enum, forall-pointer-decay, jacob/cs343-translation, master, new-ast-unique-expr, pthread-emulation, qualifiedEnum
- Children:
- 8e4aa05
- Parents:
- 4468a70 (diff), ec19b21 (diff)
Note: this is a merge changeset, the changes displayed below correspond to the merge itself.
Use the(diff)
links above to see all the changes relative to each parent. - Files:
-
- 39 added
- 2 deleted
- 45 edited
Legend:
- Unmodified
- Added
- Removed
-
Jenkins/FullBuild
r4468a70 r342af53 108 108 string(name: 'GitRef', value: commitId), \ 109 109 string(name: 'Build' , value: buildNum) \ 110 ] 110 ], \ 111 propagate: false 111 112 112 113 echo(result.result) -
benchmark/creation/node_cor.js
r4468a70 r342af53 6 6 function * coroutine() { yield } 7 7 8 for ( var i = 0; i < times; i += 1 ) { // warm jit8 for ( var i = 0; i < times; i += 1 ) { // warm JIT 9 9 cor = coroutine() 10 10 } -
benchmark/ctxswitch/node_cor.js
r4468a70 r342af53 11 11 cor = coroutine() 12 12 13 for ( var i = 0; i < times; i += 1 ) { // warm git13 for ( var i = 0; i < times; i += 1 ) { // warm JIT 14 14 cor.next(); 15 15 } -
benchmark/io/http/Makefile.am
r4468a70 r342af53 29 29 EXTRA_PROGRAMS = httpforall .dummy_hack 30 30 31 CLEANFILES = httpforall 32 31 33 nodist_httpforall_SOURCES = \ 32 34 filecache.cfa \ -
benchmark/io/http/main.cfa
r4468a70 r342af53 46 46 } 47 47 48 extern void init_protocol(void); 49 extern void deinit_protocol(void); 50 48 51 //============================================================================================= 49 52 // Main … … 61 64 //=================== 62 65 // Open Socket 63 printf(" Listening on port %d\n", options.socket.port);66 printf("%ld : Listening on port %d\n", getpid(), options.socket.port); 64 67 int server_fd = socket(AF_INET, SOCK_STREAM, 0); 65 68 if(server_fd < 0) { … … 79 82 ret = bind( server_fd, (struct sockaddr *)&address, sizeof(address) ); 80 83 if(ret < 0) { 81 if(errno == 98) {84 if(errno == EADDRINUSE) { 82 85 if(waited == 0) { 83 86 printf("Waiting for port\n"); … … 109 112 options.clopts.instance = &cl; 110 113 114 111 115 int pipe_cnt = options.clopts.nworkers * 2; 112 116 int pipe_off; … … 124 128 { 125 129 ServerProc procs[options.clopts.nprocs]; 130 131 init_protocol(); 126 132 { 127 133 Worker workers[options.clopts.nworkers]; … … 151 157 printf("Shutting Down\n"); 152 158 } 159 160 for(i; options.clopts.nworkers) { 161 printf("Cancelling %p\n", (void*)workers[i].cancel.target); 162 workers[i].done = true; 163 cancel(workers[i].cancel); 164 } 165 166 printf("Shutting down socket\n"); 167 int ret = shutdown( server_fd, SHUT_RD ); 168 if( ret < 0 ) { abort( "shutdown error: (%d) %s\n", (int)errno, strerror(errno) ); } 169 170 //=================== 171 // Close Socket 172 printf("Closing Socket\n"); 173 ret = close( server_fd ); 174 if(ret < 0) { 175 abort( "close socket error: (%d) %s\n", (int)errno, strerror(errno) ); 176 } 153 177 } 154 178 printf("Workers Closed\n"); 179 180 deinit_protocol(); 155 181 } 156 182 … … 162 188 } 163 189 free(fds); 164 }165 190 166 //===================167 // Close Socket168 printf("Closing Socket\n");169 ret = close( server_fd );170 if(ret < 0) {171 abort( "close socket error: (%d) %s\n", (int)errno, strerror(errno) );172 191 } 173 192 -
benchmark/io/http/options.cfa
r4468a70 r342af53 12 12 #include <parseargs.hfa> 13 13 14 #include <string.h> 15 14 16 Options options @= { 17 false, // log 18 15 19 { // file_cache 16 20 0, // open_flags; … … 48 52 {'p', "port", "Port the server will listen on", options.socket.port}, 49 53 {'c', "cpus", "Number of processors to use", options.clopts.nprocs}, 54 {'L', "log", "Enable logs", options.log, parse_settrue}, 50 55 {'t', "threads", "Number of worker threads to use", options.clopts.nworkers}, 51 56 {'b', "accept-backlog", "Maximum number of pending accepts", options.socket.backlog}, -
benchmark/io/http/options.hfa
r4468a70 r342af53 8 8 9 9 struct Options { 10 bool log; 11 10 12 struct { 11 13 int open_flags; -
benchmark/io/http/protocol.cfa
r4468a70 r342af53 18 18 #include "options.hfa" 19 19 20 const char * volatile date = 0p; 21 20 22 const char * http_msgs[] = { 21 "HTTP/1.1 200 OK\n Content-Type: text/plain\nContent-Length: %zu\n\n",22 "HTTP/1.1 400 Bad Request\n Content-Type: text/plain\nContent-Length: 0\n\n",23 "HTTP/1.1 404 Not Found\n Content-Type: text/plain\nContent-Length: 0\n\n",24 "HTTP/1.1 413 Payload Too Large\n Content-Type: text/plain\nContent-Length: 0\n\n",25 "HTTP/1.1 414 URI Too Long\n Content-Type: text/plain\nContent-Length: 0\n\n",23 "HTTP/1.1 200 OK\nServer: HttoForall\nDate: %s \nContent-Type: text/plain\nContent-Length: %zu \n\n", 24 "HTTP/1.1 400 Bad Request\nServer: HttoForall\nDate: %s \nContent-Type: text/plain\nContent-Length: 0 \n\n", 25 "HTTP/1.1 404 Not Found\nServer: HttoForall\nDate: %s \nContent-Type: text/plain\nContent-Length: 0 \n\n", 26 "HTTP/1.1 413 Payload Too Large\nServer: HttoForall\nDate: %s \nContent-Type: text/plain\nContent-Length: 0 \n\n", 27 "HTTP/1.1 414 URI Too Long\nServer: HttoForall\nDate: %s \nContent-Type: text/plain\nContent-Length: 0 \n\n", 26 28 }; 27 29 … … 45 47 while(len > 0) { 46 48 // Call write 47 int ret = write(fd, it, len); 49 int ret = cfa_write(fd, it, len, 0, -1`s, 0p, 0p); 50 // int ret = write(fd, it, len); 48 51 if( ret < 0 ) { if( errno != EAGAIN && errno != EWOULDBLOCK) abort( "'answer error' error: (%d) %s\n", (int)errno, strerror(errno) ); } 49 52 … … 63 66 int answer_header( int fd, size_t size ) { 64 67 const char * fmt = http_msgs[OK200]; 65 int len = 100;68 int len = 200; 66 69 char buffer[len]; 67 len = snprintf(buffer, len, fmt, size);70 len = snprintf(buffer, len, fmt, date, size); 68 71 return answer( fd, buffer, len ); 69 72 } 70 73 71 [HttpCode code, bool closed, * const char file, size_t len] http_read(int fd, []char buffer, size_t len) { 74 int answer_plain( int fd, char buffer[], size_t size ) { 75 int ret = answer_header(fd, size); 76 if( ret < 0 ) return ret; 77 return answer(fd, buffer, size); 78 } 79 80 int answer_empty( int fd ) { 81 return answer_header(fd, 0); 82 } 83 84 85 [HttpCode code, bool closed, * const char file, size_t len] http_read(int fd, []char buffer, size_t len, io_cancellation * cancel) { 72 86 char * it = buffer; 73 87 size_t count = len - 1; … … 75 89 READ: 76 90 for() { 77 int ret = cfa_read(fd, (void*)it, count, 0, -1`s, 0p, 0p); 91 int ret = cfa_read(fd, (void*)it, count, 0, -1`s, cancel, 0p); 92 // int ret = read(fd, (void*)it, count); 78 93 if(ret == 0 ) return [OK200, true, 0, 0]; 79 94 if(ret < 0 ) { 80 95 if( errno == EAGAIN || errno == EWOULDBLOCK) continue READ; 96 // if( errno == EINVAL ) return [E400, true, 0, 0]; 81 97 abort( "read error: (%d) %s\n", (int)errno, strerror(errno) ); 82 98 } … … 92 108 } 93 109 94 printf("%.*s\n", rlen, buffer);110 if( options.log ) printf("%.*s\n", rlen, buffer); 95 111 96 112 it = buffer; … … 104 120 105 121 void sendfile( int pipe[2], int fd, int ans_fd, size_t count ) { 122 unsigned sflags = SPLICE_F_MOVE; // | SPLICE_F_MORE; 106 123 off_t offset = 0; 107 124 ssize_t ret; 108 125 SPLICE1: while(count > 0) { 109 ret = cfa_splice(ans_fd, &offset, pipe[1], 0p, count, SPLICE_F_MOVE | SPLICE_F_MORE, 0, -1`s, 0p, 0p); 126 ret = cfa_splice(ans_fd, &offset, pipe[1], 0p, count, sflags, 0, -1`s, 0p, 0p); 127 // ret = splice(ans_fd, &offset, pipe[1], 0p, count, sflags); 110 128 if( ret < 0 ) { 111 129 if( errno != EAGAIN && errno != EWOULDBLOCK) continue SPLICE1; … … 117 135 size_t in_pipe = ret; 118 136 SPLICE2: while(in_pipe > 0) { 119 ret = cfa_splice(pipe[0], 0p, fd, 0p, in_pipe, SPLICE_F_MOVE | SPLICE_F_MORE, 0, -1`s, 0p, 0p); 137 ret = cfa_splice(pipe[0], 0p, fd, 0p, in_pipe, sflags, 0, -1`s, 0p, 0p); 138 // ret = splice(pipe[0], 0p, fd, 0p, in_pipe, sflags); 120 139 if( ret < 0 ) { 121 140 if( errno != EAGAIN && errno != EWOULDBLOCK) continue SPLICE2; … … 127 146 } 128 147 } 148 149 //============================================================================================= 150 151 #include <clock.hfa> 152 #include <time.hfa> 153 #include <thread.hfa> 154 155 struct date_buffer { 156 char buff[100]; 157 }; 158 159 thread DateFormater { 160 int idx; 161 date_buffer buffers[2]; 162 }; 163 164 void ?{}( DateFormater & this ) { 165 ((thread&)this){ "Server Date Thread", *options.clopts.instance }; 166 this.idx = 0; 167 memset( this.buffers[0].buff, 0, sizeof(this.buffers[0]) ); 168 memset( this.buffers[1].buff, 0, sizeof(this.buffers[1]) ); 169 } 170 171 void main(DateFormater & this) { 172 LOOP: for() { 173 waitfor( ^?{} : this) { 174 break LOOP; 175 } 176 or else {} 177 178 Time now = getTimeNsec(); 179 180 strftime( this.buffers[this.idx].buff, 100, "%a, %d %b %Y %H:%M:%S %Z", now ); 181 182 char * next = this.buffers[this.idx].buff; 183 __atomic_exchange_n((char * volatile *)&date, next, __ATOMIC_SEQ_CST); 184 this.idx = (this.idx + 1) % 2; 185 186 sleep(1`s); 187 } 188 } 189 190 //============================================================================================= 191 DateFormater * the_date_formatter; 192 193 void init_protocol(void) { 194 the_date_formatter = alloc(); 195 (*the_date_formatter){}; 196 } 197 198 void deinit_protocol(void) { 199 ^(*the_date_formatter){}; 200 free( the_date_formatter ); 201 } -
benchmark/io/http/protocol.hfa
r4468a70 r342af53 1 1 #pragma once 2 3 struct io_cancellation; 2 4 3 5 enum HttpCode { … … 14 16 int answer_error( int fd, HttpCode code ); 15 17 int answer_header( int fd, size_t size ); 18 int answer_plain( int fd, char buffer [], size_t size ); 19 int answer_empty( int fd ); 16 20 17 [HttpCode code, bool closed, * const char file, size_t len] http_read(int fd, []char buffer, size_t len );21 [HttpCode code, bool closed, * const char file, size_t len] http_read(int fd, []char buffer, size_t len, io_cancellation *); 18 22 19 23 void sendfile( int pipe[2], int fd, int ans_fd, size_t count ); -
benchmark/io/http/worker.cfa
r4468a70 r342af53 19 19 this.pipe[0] = -1; 20 20 this.pipe[1] = -1; 21 this.done = false; 22 } 23 24 extern "C" { 25 extern int accept4(int sockfd, struct sockaddr *addr, socklen_t *addrlen, int flags); 21 26 } 22 27 … … 28 33 CONNECTION: 29 34 for() { 30 int fd = cfa_accept4( this.[sockfd, addr, addrlen, flags], 0, -1`s, 0p, 0p ); 35 if( options.log ) printf("=== Accepting connection ===\n"); 36 int fd = cfa_accept4( this.[sockfd, addr, addrlen, flags], 0, -1`s, &this.cancel, 0p ); 37 // int fd = accept4( this.[sockfd, addr, addrlen, flags] ); 31 38 if(fd < 0) { 32 39 if( errno == ECONNABORTED ) break; 40 if( errno == EINVAL && this.done ) break; 33 41 abort( "accept error: (%d) %s\n", (int)errno, strerror(errno) ); 34 42 } 35 43 36 printf("New connection %d, waiting for requests\n", fd);44 if( options.log ) printf("=== New connection %d, waiting for requests ===\n", fd); 37 45 REQUEST: 38 46 for() { … … 45 53 size_t len = options.socket.buflen; 46 54 char buffer[len]; 47 printf("Reading request\n");48 [code, closed, file, name_size] = http_read(fd, buffer, len );55 if( options.log ) printf("=== Reading request ===\n"); 56 [code, closed, file, name_size] = http_read(fd, buffer, len, &this.cancel); 49 57 50 58 // if we are done, break out of the loop 51 59 if( closed ) { 52 printf("Connection closed\n"); 60 if( options.log ) printf("=== Connection closed ===\n"); 61 close(fd); 53 62 continue CONNECTION; 54 63 } … … 56 65 // If this wasn't a request retrun 400 57 66 if( code != OK200 ) { 58 printf(" Invalid Request : %d\n", code_val(code));67 printf("=== Invalid Request : %d ===\n", code_val(code)); 59 68 answer_error(fd, code); 60 69 continue REQUEST; 61 70 } 62 71 63 printf("Request for file %.*s\n", (int)name_size, file); 72 if(0 == strncmp(file, "plaintext", min(name_size, sizeof("plaintext") ))) { 73 if( options.log ) printf("=== Request for /plaintext ===\n"); 74 75 char text[] = "Hello, World!\n"; 76 77 // Send the header 78 answer_plain(fd, text, sizeof(text)); 79 80 if( options.log ) printf("=== Answer sent ===\n"); 81 continue REQUEST; 82 } 83 84 if(0 == strncmp(file, "ping", min(name_size, sizeof("ping") ))) { 85 if( options.log ) printf("=== Request for /ping ===\n"); 86 87 // Send the header 88 answer_empty(fd); 89 90 if( options.log ) printf("=== Answer sent ===\n"); 91 continue REQUEST; 92 } 93 94 if( options.log ) printf("=== Request for file %.*s ===\n", (int)name_size, file); 64 95 65 96 // Get the fd from the file cache … … 70 101 // If we can't find the file, return 404 71 102 if( ans_fd < 0 ) { 72 printf(" File Not Found\n");103 printf("=== File Not Found ===\n"); 73 104 answer_error(fd, E404); 74 105 continue REQUEST; … … 81 112 sendfile( this.pipe, fd, ans_fd, count); 82 113 83 printf("File sent\n");114 if( options.log ) printf("=== Answer sent ===\n"); 84 115 } 85 116 } -
benchmark/io/http/worker.hfa
r4468a70 r342af53 17 17 socklen_t * addrlen; 18 18 int flags; 19 io_cancellation cancel; 20 volatile bool done; 19 21 }; 20 22 void ?{}( Worker & this); -
benchmark/io/readv.cfa
r4468a70 r342af53 96 96 97 97 char **left; 98 parse_args( opt, opt_cnt, "[OPTIONS]...\ncforall yieldbenchmark", left );98 parse_args( opt, opt_cnt, "[OPTIONS]...\ncforall readv benchmark", left ); 99 99 100 100 if(kpollcp || odirect) { 101 101 if( (buflen % 512) != 0 ) { 102 102 fprintf(stderr, "Buffer length must be a multiple of 512 when using O_DIRECT, was %lu\n\n", buflen); 103 print_args_usage(opt, opt_cnt, "[OPTIONS]...\ncforall yieldbenchmark", true);103 print_args_usage(opt, opt_cnt, "[OPTIONS]...\ncforall readv benchmark", true); 104 104 } 105 105 } -
doc/bibliography/pl.bib
r4468a70 r342af53 990 990 } 991 991 992 @techreport{cfa-cc, 993 keywords = {Cforall, cfa-cc, transpiler}, 994 contributer = {pabuhr@plg}, 995 title = {{\textsf{cfa-cc}} Developer's Reference Manual}, 996 author = {Fangren Yu}, 997 institution = {School of Computer Science}, 998 address = {University of Waterloo, Waterloo, Ontario, Canada}, 999 month = aug, 1000 year = {2020}, 1001 note = {\href{https://cforall.uwaterloo.ca/doc/Fangren_Yu_Report_S20.pdf}{https://\-cforall.uwaterloo.ca/\-doc/\-Fangren\_Yu\_Report\_S20.pdf}}, 1002 } 1003 992 1004 @article{Moss18, 993 1005 keywords = {type systems, polymorphism, tuples, Cforall}, … … 1040 1052 keywords = {type system, generic type, resolution algorithm, type environment, Cforall}, 1041 1053 author = {Aaron Moss}, 1042 title = {\textsf{C} $\mathbf{\forall}$ Type System Implementation},1054 title = {\textsf{C}\,$\mathbf{\forall}$ Type System Implementation}, 1043 1055 school = {School of Computer Science, University of Waterloo}, 1044 1056 year = 2019, … … 1161 1173 keywords = {ctrie, concurrent map}, 1162 1174 contributer = {a3moss@uwaterloo.ca}, 1163 title = {Cache-aware lock-free concurrent hash tries},1164 author = {Prokopec, Aleksandar and Bagwell, Phil and Odersky, Martin},1165 institution = {EPFL},1166 year = {2011}1175 title = {Cache-aware lock-free concurrent hash tries}, 1176 author = {Prokopec, Aleksandar and Bagwell, Phil and Odersky, Martin}, 1177 institution = {EPFL}, 1178 year = {2011} 1167 1179 } 1168 1180 … … 3928 3940 school = {School of Computer Science, University of Waterloo}, 3929 3941 year = 2003, 3930 address = {Waterloo, Ontario, Canada, N2L 3G1},3942 optaddress = {Waterloo, Ontario, Canada, N2L 3G1}, 3931 3943 note = {\href{http://plg.uwaterloo.ca/theses/BilsonThesis.pdf}{http://\-plg.uwaterloo.ca/\-theses/\-BilsonThesis.pdf}}, 3932 3944 } … … 5210 5222 } 5211 5223 5224 @manual{gcc-nested-func, 5225 keywords = {gcc nested functions}, 5226 contributer = {pabuhr@plg}, 5227 key = {gcc nested functions}, 5228 title = {Nested Functions}, 5229 organization= {{gcc} 9.3 Manual}, 5230 year = 2019, 5231 note = {\href{https://gcc.gnu.org/onlinedocs/gcc-9.3.0/gcc/Nested-Functions.html}{https://\-gcc.gnu.org/\-onlinedocs/\-gcc-9.3.0/\-gcc/\-Nested-Functions.html}}, 5232 } 5233 5212 5234 @article{Haddon77, 5213 5235 keywords = {monitors, nested monitor calls}, … … 7413 7435 } 7414 7436 7437 @manual{POSIX17, 7438 keywords = {POSIX, Standard}, 7439 contributer = {pabuhr@plg}, 7440 key = {POSIX}, 7441 title = {1003.1 Standard for Information Technology -- Portable Operating System Interface (POSIX), Base Specifications, Issue 7}, 7442 organization= {IEEE and The Open Group}, 7443 year = 2017, 7444 note = {\href{https://pubs.opengroup.org/onlinepubs/9699919799}{https://\-pubs.opengroup.org/\-onlinepubs/\-9699919799}}, 7445 } 7446 7415 7447 @inproceedings{ML:NJ, 7416 7448 keywords = {continuations, ML}, -
doc/theses/andrew_beach_MMath/existing.tex
r4468a70 r342af53 83 83 the the call site. 84 84 85 As an example, even if no function named \codeCFA{do \_once} is not defined86 near the definition of \codeCFA{do \_twice} the following code will work.85 As an example, even if no function named \codeCFA{do_once} is not defined 86 near the definition of \codeCFA{do_twice} the following code will work. 87 87 \begin{lstlisting} 88 88 int quadruple(int x) { … … 95 95 \end{lstlisting} 96 96 This is not the recommended way to implement a quadruple function but it 97 does work. The complier will deduce that \codeCFA{do \_twice}'s T is an97 does work. The complier will deduce that \codeCFA{do_twice}'s T is an 98 98 integer from the argument. It will then look for a definition matching the 99 assertion which is the \codeCFA{do \_once} defined within the function. That100 function will be passed in as a function pointer to \codeCFA{do \_twice} and99 assertion which is the \codeCFA{do_once} defined within the function. That 100 function will be passed in as a function pointer to \codeCFA{do_twice} and 101 101 called within it. 102 102 … … 156 156 In \CFA coroutines are created using the \codeCFA{coroutine} keyword which 157 157 works just like \codeCFA{struct} except that the created structure will be 158 modified by the compiler to satify the \codeCFA{is \_coroutine} trait.158 modified by the compiler to satify the \codeCFA{is_coroutine} trait. 159 159 160 160 These structures act as the interface between callers and the coroutine, -
doc/theses/fangren_yu_COOP_S20/Report.tex
r4468a70 r342af53 56 56 57 57 \title{\Huge 58 cfa-ccDeveloper's Reference58 \lstinline|cfa-cc| Developer's Reference 59 59 }% title 60 60 … … 728 728 The \CFA compiler sets a limit on assertion depth and reports an error if assertion resolution does not terminate within the limit (as for \lstinline[language=C++]@templates@ in \CC). 729 729 730 \addcontentsline{toc}{section}{\refname} 730 731 \bibliographystyle{plain} 731 732 \bibliography{pl} -
doc/theses/thierry_delisle_PhD/thesis/Makefile
r4468a70 r342af53 8 8 BibTeX = BIBINPUTS=${TeXLIB} && export BIBINPUTS && bibtex 9 9 10 MAKEFLAGS = --no-print-directory --silent #10 MAKEFLAGS = --no-print-directory --silent 11 11 VPATH = ${Build} ${Figures} 12 12 … … 66 66 67 67 build/%.dvi : %.tex Makefile | ${Build} 68 # Conditionally create an empty *.ind (index) file for inclusion until makeindex is run. 69 if [ ! -r ${basename $@}.ind ] ; then touch ${basename $@}.ind ; fi 68 70 # Must have *.aux file containing citations for bibtex 69 71 if [ ! -r ${basename $@}.aux ] ; then ${LaTeX} $< ; fi … … 74 76 # Make index from *.aux entries and input index at end of document 75 77 -makeglossaries -q -s ${basename $@}.ist ${basename $@} 78 # Make index from *.aux entries and input index at end of document 79 -makeindex ${basename $@}.idx 76 80 # Run again to finish citations 77 81 ${LaTeX} $< -
doc/theses/thierry_delisle_PhD/thesis/text/core.tex
r4468a70 r342af53 1 1 \chapter{Scheduling Core}\label{core} 2 2 3 Before discussing scheduling in general, where it is important to address systems that are changing states, this document discusses scheduling in a somewhat ideal scen erio, where the system has reached a steady state. For this purpose, a steady state is loosely defined as a state where there are always \glspl{thrd} ready to run and the system has the ressources necessary to accomplish the work, \eg, enough workers. In short, the system is neither overloaded nor underloaded.3 Before discussing scheduling in general, where it is important to address systems that are changing states, this document discusses scheduling in a somewhat ideal scenario, where the system has reached a steady state. For this purpose, a steady state is loosely defined as a state where there are always \glspl{thrd} ready to run and the system has the resources necessary to accomplish the work, \eg, enough workers. In short, the system is neither overloaded nor underloaded. 4 4 5 I believe it is important to discuss the steady state first because it is the easiest case to handle and, relatedly, the case in which the best performance is to be expected. As such, when the system is either overloaded or underloaded, a common approach is to try to adapt the system to th e new load and return to the steady state, \eg, adding or removing workers. Flaws in the scheduling when the system is in the steady state can thereforeto be pervasive in all states.5 I believe it is important to discuss the steady state first because it is the easiest case to handle and, relatedly, the case in which the best performance is to be expected. As such, when the system is either overloaded or underloaded, a common approach is to try to adapt the system to this new load and return to the steady state, \eg, by adding or removing workers. Therefore, flaws in scheduling the steady state can to be pervasive in all states. 6 6 7 7 \section{Design Goals} 8 As with most of the design decisions behind \CFA, an important goal is to match the expectation of the programmer , according to their probable mental model. To match these expectations, the design must offer the programmers sufficient guarantees so that, as long as they respect the mental model, the system will also respectthis model.8 As with most of the design decisions behind \CFA, an important goal is to match the expectation of the programmer according to their execution mental-model. To match expectations, the design must offer the programmer sufficient guarantees so that, as long as they respect the execution mental-model, the system also respects this model. 9 9 10 For threading, a simple and common mentalmodel is the ``Ideal multi-tasking CPU'' :10 For threading, a simple and common execution mental-model is the ``Ideal multi-tasking CPU'' : 11 11 12 12 \begin{displayquote}[Linux CFS\cit{https://www.kernel.org/doc/Documentation/scheduler/sched-design-CFS.txt}] 13 13 {[The]} ``Ideal multi-tasking CPU'' is a (non-existent :-)) CPU that has 100\% physical power and which can run each task at precise equal speed, in parallel, each at [an equal fraction of the] speed. For example: if there are 2 tasks running, then it runs each at 50\% physical power --- i.e., actually in parallel. 14 \label{q:LinuxCFS} 14 15 \end{displayquote} 15 16 16 17 Applied to threads, this model states that every ready \gls{thrd} immediately runs in parallel with all other ready \glspl{thrd}. While a strict implementation of this model is not feasible, programmers still have expectations about scheduling that come from this model. 17 18 18 In general, the expectation at the center of this model is that ready \glspl{thrd} do not interfere with each other but simply share the hardware. This makes it easier to reason about threading because ready \glspl{thrd} can be takenin isolation and the effect of the scheduler can be virtually ignored. This expectation of \gls{thrd} independence means the scheduler is expected to offer two guarantees:19 In general, the expectation at the center of this model is that ready \glspl{thrd} do not interfere with each other but simply share the hardware. This assumption makes it easier to reason about threading because ready \glspl{thrd} can be thought of in isolation and the effect of the scheduler can be virtually ignored. This expectation of \gls{thrd} independence means the scheduler is expected to offer two guarantees: 19 20 \begin{enumerate} 20 \item A fairness guarantee: a \gls{thrd} that is ready to run will not be prevented to do soby another thread.21 \item A performance guarantee: a \gls{thrd} that wants to start or stop running will not be slowed downby other threads wanting to do the same.21 \item A fairness guarantee: a \gls{thrd} that is ready to run is not prevented by another thread. 22 \item A performance guarantee: a \gls{thrd} that wants to start or stop running is not prevented by other threads wanting to do the same. 22 23 \end{enumerate} 23 24 24 It is important to note that these guarantees are expected only up to a point. \Glspl{thrd} that are ready to run should not be prevented to do so, but they still need to share a limited amount of hardware. Therefore, the guarantee is considered respected if a \gls{thrd} gets access to a \emph{fair share} of the hardware, even if that share is very small.25 It is important to note that these guarantees are expected only up to a point. \Glspl{thrd} that are ready to run should not be prevented to do so, but they still share the limited hardware resources. Therefore, the guarantee is considered respected if a \gls{thrd} gets access to a \emph{fair share} of the hardware resources, even if that share is very small. 25 26 26 Similarly the performance guarantee, the lack of interfer ance between threads, is only relevant up to a point. Ideally the cost of running and blocking would be constant regardless of contention, but the guarantee is considered satisfied if the cost is not \emph{too high} with or without contention. How much is an acceptable cost is obviously highly variable. For this document the performance experimentation will attempt to show that the cost of scheduling is at worst equivalent to existing algorithms used in popular languages. This demonstration can be made by comparing application built in \CFA to applications built with other languages or other models. Recall from a few paragraphs ago that the expectation of programmers is that the impact of the scheduler can be ignored. Therefore, if the cost of scheduling is equivalent or lower to other popular languages, I willconsider the guarantee achieved.27 Similarly the performance guarantee, the lack of interference among threads, is only relevant up to a point. Ideally, the cost of running and blocking should be constant regardless of contention, but the guarantee is considered satisfied if the cost is not \emph{too high} with or without contention. How much is an acceptable cost is obviously highly variable. For this document, the performance experimentation attempts to show the cost of scheduling is at worst equivalent to existing algorithms used in popular languages. This demonstration can be made by comparing applications built in \CFA to applications built with other languages or other models. Recall programmer expectation is that the impact of the scheduler can be ignored. Therefore, if the cost of scheduling is equivalent to or lower than other popular languages, I consider the guarantee achieved. 27 28 28 29 More precisely the scheduler should be: 29 30 \begin{itemize} 30 31 \item As fast as other schedulers that are less fair. 31 \item Faster than other scheduler that have equal or better fairness.32 \item Faster than other schedulers that have equal or better fairness. 32 33 \end{itemize} 33 34 34 35 \subsection{Fairness vs Scheduler Locality} 35 An important performance factor in modern architectures is cache locality. Waiting for data not present in the cache can have a major impact on performance, and having multiple \glspl{hthrd} writing to the same cache lines can lead to cache lines that need to be waited on again. It is therefore preferable to divide the data among each \gls{hthrd}\footnote{Thiscan be an explicit division up front or using data structures where different \glspl{hthrd} are naturally routed to different cache lines.}.36 An important performance factor in modern architectures is cache locality. Waiting for data at lower levels or not present in the cache can have a major impact on performance. Having multiple \glspl{hthrd} writing to the same cache lines also leads to cache lines that must be waited on. It is therefore preferable to divide data among each \gls{hthrd}\footnote{This partitioning can be an explicit division up front or using data structures where different \glspl{hthrd} are naturally routed to different cache lines.}. 36 37 37 For a scheduler, having good locality\footnote{This section discusses \emph{internal } locality, \ie, the locality of the data used by the scheduler. \emph{External locality}, \ie, how the data used by the application is affected by scheduling, is a much more complicated subject and will be discussed in the chapters on evaluation.}, \ie, having the data be local to each \gls{hthrd}, generally conflicts with fairness. Indeed, good locality often requires avoiding the movement of cache lines, while fairness requires dynamically moving \gls{thrd}, and as a consequence cache lines, to \glspl{hthrd} that are currently more appropriate.38 For a scheduler, having good locality\footnote{This section discusses \emph{internal locality}, \ie, the locality of the data used by the scheduler versus \emph{external locality}, \ie, how the data used by the application is affected by scheduling. External locality is a much more complicated subject and is discussed in part~\ref{Evaluation} on evaluation.}, \ie, having the data local to each \gls{hthrd}, generally conflicts with fairness. Indeed, good locality often requires avoiding the movement of cache lines, while fairness requires dynamically moving a \gls{thrd}, and as consequence cache lines, to a \gls{hthrd} that is currently available. 38 39 39 However, I claim that in practice it is possible to strike a balance between fairness and performance because the need for these do not necessarily overlap temporaly. Figure~\ref{fig:fair} shows an visual representation of this behaviour. As mentionned, a little bit of unfairness can be acceptable, therefore it can be desirable to have an algorithm that prioritizes cache locality as long as no threads is left behind for too long.40 However, I claim that in practice it is possible to strike a balance between fairness and performance because these goals do not necessarily overlap temporally, where Figure~\ref{fig:fair} shows a visual representation of this behaviour. As mentioned, some unfairness is acceptable; therefore it is desirable to have an algorithm that prioritizes cache locality as long as thread delay does not exceed the execution mental-model. 40 41 41 42 \begin{figure} 42 \ begin{center}43 44 \ end{center}45 \caption {Fairness vs Locality}43 \centering 44 \input{fairness.pstex_t} 45 \vspace*{-10pt} 46 \caption[Fairness vs Locality graph]{Rule of thumb Fairness vs Locality graph \smallskip\newline The importance of Fairness and Locality while a ready \gls{thrd} awaits running is shown as the time the ready \gls{thrd} waits increases, Ready Time, the chances that its data is still in cache, Locality, decreases. At the same time, the need for fairness increases since other \glspl{thrd} may have the chance to run many times, breaking the fairness model. Since the actual values and curves of this graph can be highly variable, the graph is an idealized representation of the two opposing goals.} 46 47 \label{fig:fair} 47 Rule of thumb graph: Importance of Fairness and Locality while a ready \gls{thrd} waits run.48 As the time a ready \gls{thrd} waits increases, ``Ready Time'', the chances that its data is still in cache decreases. At the same time, the need for fairness increases since other \glspl{thrd} may have the chance to run many times, breaking the fairness model mentionned above. Since the actual values and curves of this graph can be highly variable, the graph is left intentionally fuzzy and innacurate.49 48 \end{figure} 50 49 51 50 \section{Design} 52 A naive strictly \glsxtrshort{fifo} ready-queue does not offer sufficient performance. As shown in the evaluation sections, most production schedulers scale when adding multiple \glspl{hthrd} and that is not possible with a single point of contention. Therefore it is vital to shard the ready-queue so that multiple \glspl{hthrd} can access the ready-queue without performance degradation.51 In general, a na\"{i}ve \glsxtrshort{fifo} ready-queue does not scale with increased parallelism from \glspl{hthrd}, resulting in decreased performance. The problem is adding/removing \glspl{thrd} is a single point of contention. As shown in the evaluation sections, most production schedulers do scale when adding \glspl{hthrd}. The common solution to the single point of contention is to shard the ready-queue so each \gls{hthrd} can access the ready-queue without contention, increasing performance though lack of contention. 53 52 54 53 \subsection{Sharding} \label{sec:sharding} 55 An interesting approach to sharding a queue is presented in \cit{Trevors paper}. This algorithm represents a queue with relaxed \glsxtrshort{fifo} guarantee using an array of strictly \glsxtrshort{fifo} sublists as shown in Figure~\ref{fig:base}. Each cell of the array contains a linked-list with a lock and each node in these list is marked with a timestamp indicating when they were added to the list. Push operations are done by picking a random cell and attempting to push to its list. If the cell is already locked, the operation is simply retried on a new cell until a lock is acquired. Pop operations are done in a similar fashion except two random cells are picked. If both cells are not already locked and both cells contain non-empty lists, the operation pops the node with the oldest timestamp. If only one of the cell is unlocked and non-empty, the operation pops from that cell. If both cells are either locked or empty, the operation picks two newcells and tries again.54 An interesting approach to sharding a queue is presented in \cit{Trevors paper}. This algorithm presents a queue with a relaxed \glsxtrshort{fifo} guarantee using an array of strictly \glsxtrshort{fifo} sublists as shown in Figure~\ref{fig:base}. Each \emph{cell} of the array has a timestamp for the last operation and a pointer to a linked-list with a lock and each node in the list is marked with a timestamp indicating when it is added to the list. A push operation is done by picking a random cell, acquiring the list lock, and pushing to the list. If the cell is locked, the operation is simply retried on another random cell until a lock is acquired. A pop operation is done in a similar fashion except two random cells are picked. If both cells are unlocked with non-empty lists, the operation pops the node with the oldest cell timestamp. If one of the cells is unlocked and non-empty, the operation pops from that cell. If both cells are either locked or empty, the operation picks two new random cells and tries again. 56 55 57 56 \begin{figure} 58 \begin{center} 59 \input{base.pstex_t} 60 \end{center} 61 \caption{Relaxed FIFO list} 57 \centering 58 \input{base.pstex_t} 59 \caption[Relaxed FIFO list]{Relaxed FIFO list \smallskip\newline List at the base of the scheduler: an array of strictly FIFO lists. The timestamp is in all nodes and cell arrays.} 62 60 \label{fig:base} 63 List at the base of the scheduler: an array of strictly FIFO lists.64 The timestamp is in all nodes and cell arrays.65 61 \end{figure} 66 62 67 63 \subsection{Finding threads} 68 Once threads have been distributed onto multiple queues, indentifying which queues are empty and which are not can become a problem. Indeed, if the number of \glspl{thrd} does not far exceed the number of queues, it is probable that several of these queues are empty. Figure~\ref{fig:empty} shows an example with 2 \glspl{thrd} running on 8 queues, where the chances of getting an empty queue is 75\% per pick, meaning two random picks yield a \gls{thrd} only half the time. 69 64 Once threads have been distributed onto multiple queues, identifying empty queues becomes a problem. Indeed, if the number of \glspl{thrd} does not far exceed the number of queues, it is probable that several of the cell queues are empty. Figure~\ref{fig:empty} shows an example with 2 \glspl{thrd} running on 8 queues, where the chances of getting an empty queue is 75\% per pick, meaning two random picks yield a \gls{thrd} only half the time. This scenario leads to performance problems since picks that do not yield a \gls{thrd} are not useful and do not necessarily help make more informed guesses. 70 65 71 66 \begin{figure} 72 \begin{center} 73 \input{empty.pstex_t} 74 \end{center} 75 \caption{``More empty'' Relaxed FIFO list} 67 \centering 68 \input{empty.pstex_t} 69 \caption[``More empty'' Relaxed FIFO list]{``More empty'' Relaxed FIFO list \smallskip\newline Emptier state of the queue: the array contains many empty cells, that is strictly FIFO lists containing no elements.} 76 70 \label{fig:empty} 77 Emptier state of the queue: the array contains many empty cells, that is strictly FIFO lists containing no elements.78 71 \end{figure} 79 72 80 Th is can lead to performance problems since picks that do not yield a \gls{thrd} are not useful and do not necessarily help make more informed guesses.73 There are several solutions to this problem, but they ultimately all have to encode if a cell has an empty list. My results show the density and locality of this encoding is generally the dominating factor in these scheme. Classic solutions to this problem use one of three techniques to encode the information: 81 74 82 Solutions to this problem can take many forms, but they ultimately all have to encode where the threads are in some form. My results show that the density and locality of this encoding is generally the dominating factor in these scheme. Classic solutions to this problem use one of three techniques to encode the information: 75 \paragraph{Dense Information} Figure~\ref{fig:emptybit} shows a dense bitmask to identify the cell queues currently in use. This approach means processors can often find \glspl{thrd} in constant time, regardless of how many underlying queues are empty. Furthermore, modern x86 CPUs have extended bit manipulation instructions (BMI2) that allow searching the bitmask with very little overhead compared to the randomized selection approach for a filled ready queue, offering good performance even in cases with many empty inner queues. However, this technique has its limits: with a single word\footnote{Word refers here to however many bits can be written atomically.} bitmask, the total amount of ready-queue sharding is limited to the number of bits in the word. With a multi-word bitmask, this maximum limit can be increased arbitrarily, but the look-up time increases. Finally, a dense bitmap, either single or multi-word, causes additional contention problems that reduces performance because of cache misses after updates. This central update bottleneck also means the information in the bitmask is more often stale before a processor can use it to find an item, \ie mask read says there are available \glspl{thrd} but none on queue when the subsequent atomic check is done. 83 76 84 77 \begin{figure} 85 \begin{center} 86 {\resizebox{0.73\textwidth}{!}{\input{emptybit.pstex_t}}} 87 \end{center} 78 \centering 88 79 \vspace*{-5pt} 89 \caption{Underloaded queue with added bitmask to indicate which array cells have items.} 80 {\resizebox{0.75\textwidth}{!}{\input{emptybit.pstex_t}}} 81 \vspace*{-5pt} 82 \caption[Underloaded queue with bitmask]{Underloaded queue with bitmask indicating array cells with items.} 90 83 \label{fig:emptybit} 91 \begin{center} 92 {\resizebox{0.73\textwidth}{!}{\input{emptytree.pstex_t}}}93 \end{center}84 85 \vspace*{10pt} 86 {\resizebox{0.75\textwidth}{!}{\input{emptytree.pstex_t}}} 94 87 \vspace*{-5pt} 95 \caption {Underloaded queue with added binary search tree indicate which array cells haveitems.}88 \caption[Underloaded queue with binary search-tree]{Underloaded queue with binary search-tree indicating array cells with items.} 96 89 \label{fig:emptytree} 97 \begin{center} 98 {\resizebox{0.9\textwidth}{!}{\input{emptytls.pstex_t}}}99 \end{center}90 91 \vspace*{10pt} 92 {\resizebox{0.95\textwidth}{!}{\input{emptytls.pstex_t}}} 100 93 \vspace*{-5pt} 101 \caption {Underloaded queue with added per processor bitmask to indicate which array cells haveitems.}94 \caption[Underloaded queue with per processor bitmask]{Underloaded queue with per processor bitmask indicating array cells with items.} 102 95 \label{fig:emptytls} 103 96 \end{figure} 104 97 105 \paragraph{ Dense Information} Figure~\ref{fig:emptybit} shows a dense bitmask to identify which inner queues are currently in use. This approach means processors can often find \glspl{thrd} in constant time, regardless of how many underlying queues are empty. Furthermore, modern x86 CPUs have extended bit manipulation instructions (BMI2) that allow using the bitmask with very little overhead compared to the randomized selection approach for a filled ready queue, offering good performance even in cases with many empty inner queues. However, this technique has its limits: with a single word\footnote{Word refers here to however many bits can be written atomically.} bitmask, the total number of underlying queues in the ready queue is limited to the number of bits in the word. With a multi-word bitmask, this maximum limit can be increased arbitrarily, but the look-up will nolonger be constant time. Finally, a dense bitmap, either single or multi-word, causes additional contention problems which reduces performance because of cache misses after updates. This central update bottleneck also means the information in the bitmask is more often stale before a processor can use it to find an item, \ie mask read says there are available \glspl{thrd} but none on queue.98 \paragraph{Sparse Information} Figure~\ref{fig:emptytree} shows an approach using a hierarchical tree data-structure to reduce contention and has been shown to work in similar cases~\cite{ellen2007snzi}. However, this approach may lead to poorer performance due to the inherent pointer chasing cost while still allowing significant contention on the nodes of the tree if the tree is shallow. 106 99 107 \paragraph{ Sparse Information} Figure~\ref{fig:emptytree} shows an approach using a hierarchical tree data-structure to reduce contention and has been shown to work in similar cases~\cite{ellen2007snzi}. However, this approach may lead to poorer performance due to the inherent pointer chasing cost while still allowing more contention on the nodes of the tree if the tree is not deep enough.100 \paragraph{Local Information} Figure~\ref{fig:emptytls} shows an approach using dense information, similar to the bitmap, but each \gls{hthrd} keeps its own independent copy. While this approach can offer good scalability \emph{and} low latency, the liveliness and discovery of the information can become a problem. This case is made worst in systems with few processors where even blind random picks can find \glspl{thrd} in a few tries. 108 101 109 \paragraph{Local Information} Figure~\ref{fig:emptytls} shows an approach using dense information, similar to the bitmap, but have each thread keep its own independent copy of it. While this approach can offer good scalability \emph{and} low latency, the liveliness and discovery of the information can become a problem. This case is made worst in systems with few processors where even blind random picks can find \glspl{thrd} in few tries. 110 111 I built a prototype of these approach and none of these techniques offer satisfying performance when few threads are present. All of these approach hit the same 2 problems. First, blindly picking two sub-queues is very fast which means that any improvement to the hit rate can easily be countered by a slow-down in look-up speed. Second, the array is already as sharded as is needed to avoid contention bottlenecks, so any denser data structure will tend to become a bottleneck. In all cases, these factors meant that the best cases scenerio, many threads, would get worst throughput and the worst case scenario, few threads, would get a better hit rate, but an equivalent throughput. As a result I tried an entirely different approach. 102 I built a prototype of these approaches and none of these techniques offer satisfying performance when few threads are present. All of these approach hit the same 2 problems. First, randomly picking sub-queues is very fast but means any improvement to the hit rate can easily be countered by a slow-down in look-up speed when there are empty lists. Second, the array is already as sharded to avoid contention bottlenecks, so any denser data structure tends to become a bottleneck. In all cases, these factors meant the best cases scenario, \ie many threads, would get worst throughput, and the worst-case scenario, few threads, would get a better hit rate, but an equivalent poor throughput. As a result I tried an entirely different approach. 112 103 113 104 \subsection{Dynamic Entropy}\cit{https://xkcd.com/2318/} 114 In the worst case scenario there are few \glspl{thrd} ready to run, or more accuratly given $P$ \glspl{proc}, $T$ \glspl{thrd} and $\epsilon$, as usual, a very small number, in this case $\epsilon \ll P$, we have $T = P + \epsilon$. An important thing to note is that in this case, fairness is effectively irrelevant. Indeed, this case is close to \emph{actually matching} the model of the ``Ideal multi-tasking CPU'' presented in this chapter\footnote{For simplicity, this assumes there is a one-to-one match between \glspl{proc} and \glspl{hthrd}.}. Therefore, in this context it is possible to use a purely internal locality based approach and still meet the fairness requirements. This approach would simply have each \gls{proc} running a single \gls{thrd} repeatedly. Or from the shared ready-queue viewpoint, each \gls{proc} would push to a given sub-queue and then pop from the \emph{same} subqueue. Ideally, the scheduler would achieve this without affecting the fairness guarantees in cases where $T \gg P$.105 In the worst-case scenario there are only few \glspl{thrd} ready to run, or more precisely given $P$ \glspl{proc}\footnote{For simplicity, this assumes there is a one-to-one match between \glspl{proc} and \glspl{hthrd}.}, $T$ \glspl{thrd} and $\epsilon$ a very small number, than the worst case scenario can be represented by $\epsilon \ll P$, than $T = P + \epsilon$. It is important to note in this case that fairness is effectively irrelevant. Indeed, this case is close to \emph{actually matching} the model of the ``Ideal multi-tasking CPU'' on page \pageref{q:LinuxCFS}. In this context, it is possible to use a purely internal-locality based approach and still meet the fairness requirements. This approach simply has each \gls{proc} running a single \gls{thrd} repeatedly. Or from the shared ready-queue viewpoint, each \gls{proc} pushes to a given sub-queue and then popes from the \emph{same} subqueue. In cases where $T \gg P$, the scheduler should also achieves similar performance without affecting the fairness guarantees. 115 106 116 To achieve this, I use a communication channel I have not mentionned yet and which I believe I use in a novel way : the \glsxtrshort{prng}. If the scheduler has a \glsxtrshort{prng} instance per \gls{proc} exclusively used for scheduling, its seed effectively encodes a list of all the accessed subqueues, from the latest to the oldest. The only requirement to achieve this is to be able to ``replay'' the \glsxtrshort{prng} backwards. As it turns out, this is an entirely reasonnable requirement and there already exist \glsxtrshort{prng}s that are fast, compact \emph{and} can be run forward and backwards. Linear congruential generators\cite{wiki:lcg} are an example of \glsxtrshort{prng}s that match these requirements.107 To handle this case, I use a pseudo random-number generator, \glsxtrshort{prng} in a novel way. When the scheduler uses a \glsxtrshort{prng} instance per \gls{proc} exclusively, the random-number seed effectively starts an encoding that produces a list of all accessed subqueues, from latest to oldest. The novel approach is to be able to ``replay'' the \glsxtrshort{prng} backwards and there exist \glsxtrshort{prng}s that are fast, compact \emph{and} can be run forward and backwards. Linear congruential generators~\cite{wiki:lcg} are an example of \glsxtrshort{prng}s that match these requirements. 117 108 118 The algorithm works as follows 109 The algorithm works as follows: 119 110 \begin{itemize} 120 111 \item Each \gls{proc} has two \glsxtrshort{prng} instances, $F$ and $B$. 121 \item Push and Pop operations happen as mentionned in Section~\ref{sec:sharding} with the following exceptions:112 \item Push and Pop operations occur as discussed in Section~\ref{sec:sharding} with the following exceptions: 122 113 \begin{itemize} 123 114 \item Push operations use $F$ going forward on each try and on success $F$ is copied into $B$. … … 126 117 \end{itemize} 127 118 128 The main benefit of this technique is that it basically re pects the desired properties of Figure~\ref{fig:fair}. When looking for work, \glspl{proc} will look first at the last cells they pushed to, if any, and then move backwards through the cells. As the \glspl{proc} continue looking for work, $F$ moves back and $B$ stays in place. As a resultthe relation between the two becomes weaker, which means that the probablisitic fairness of the algorithm reverts to normal. Chapter~\ref{proofs} discusses more formally the fairness guarantees of this algorithm.119 The main benefit of this technique is that it basically respects the desired properties of Figure~\ref{fig:fair}. When looking for work, a \gls{proc} first looks at the last cell they pushed to, if any, and then move backwards through its accessed cells. As the \gls{proc} continues looking for work, $F$ moves backwards and $B$ stays in place. As a result, the relation between the two becomes weaker, which means that the probablisitic fairness of the algorithm reverts to normal. Chapter~\ref{proofs} discusses more formally the fairness guarantees of this algorithm. 129 120 130 121 \section{Details} -
doc/theses/thierry_delisle_PhD/thesis/text/io.tex
r4468a70 r342af53 5 5 Since \glsxtrshort{io} operations are generally handled by the 6 6 7 \subsection{\ texttt{epoll}, \texttt{poll} and \texttt{select}}7 \subsection{\lstinline|epoll|, \lstinline|poll| and \lstinline|select|} 8 8 9 9 \subsection{Linux's AIO} … … 33 33 34 34 \subsection{\texttt{io\_uring}} 35 A very recent addition to Linux, \texttt{io\_uring}\cit{io\_uring} is a framework that aims to solve many of the problems listed with the above mentioned solutions.35 A very recent addition to Linux, @io_uring@\cit{io\_uring} is a framework that aims to solve many of the problems listed with the above mentioned solutions. 36 36 37 37 \subsection{Extra Kernel Threads}\label{io:morethreads} -
doc/theses/thierry_delisle_PhD/thesis/text/runtime.tex
r4468a70 r342af53 1 1 \chapter{\CFA Runtime} 2 This chapter offers an overview of the capabilities of the \CFA runtime prior to this work.2 This chapter presents an overview of the capabilities of the \CFA runtime prior to this thesis work. 3 3 4 Threading in \CFA offers is based on \Gls{uthrding}, where \glspl{thrd} are the representation of a unit of work. As such, \CFA programmers should expect these units to be fairly inexpensive, that is: programmers should be able to create a large number of \glspl{thrd} and switch between \glspl{thrd} liberally without many concerns for performance.4 \Celeven introduced threading features, such the @_Thread_local@ storage class, and libraries @stdatomic.h@ and @threads.h@. Interestingly, almost a decade after the \Celeven standard, the most recent versions of gcc, clang, and msvc do not support the \Celeven include @threads.h@, indicating no interest in the C11 concurrency approach (possibly because of the recent effort to add concurrency to \CC). While the \Celeven standard does not state a threading model, the historical association with pthreads suggests implementations would adopt kernel-level threading (1:1)~\cite{ThreadModel}, as for \CC. This model uses \glspl{kthrd} to achieve parallelism and concurrency. In this model, every thread of computation maps to an object in the kernel. The kernel then has the responsibility of managing these threads, \eg creating, scheduling, blocking. This also entails that the kernel has a perfect view of every thread executing in the system\footnote{This is not completely true due to primitives like \lstinline|futex|es, which have a significant portion of their logic in user space.}. 5 5 6 6 \section{M:N Threading}\label{prev:model} 7 7 8 C traditionnally uses a 1:1 threading model. This model uses \glspl{kthrd} to achive parallelism and concurrency. In this model, every thread of computation maps to an object in the kernel. The kernel then has the responsibility of managing these threads, \eg creating, scheduling, blocking. This also entails that the kernel has a perfect view of every thread executing in the system\footnote{This is not completly true due to primitives like \texttt{futex}es, which have a significant portion of their logic in user space.}.8 Threading in \CFA is based on \Gls{uthrding}, where \glspl{thrd} are the representation of a unit of work. As such, \CFA programmers should expect these units to be fairly inexpensive, \ie programmers should be able to create a large number of \glspl{thrd} and switch among \glspl{thrd} liberally without many concerns for performance. 9 9 10 By contrast \CFA uses an M:N threading models, where concurrency is achieved using many user-level threads mapped onto fewer \glspl{kthrd}. The user-level threads have the same semantic meaning as a \glspl{kthrd} in the 1:1 model, they represent an independant thread of execution with it's on stack. The difference is that user-level threads do not have a corresponding object in the kernel, they are handled by the runtime in user space and scheduled onto \glspl{kthrd}, referred to as \glspl{proc} in this document. \Glspl{proc} run a \gls{thrd} until it context switches out, it then choses a different \gls{thrd} to run.10 The \CFA M:N threading models is implemented using many user-level threads mapped onto fewer \glspl{kthrd}. The user-level threads have the same semantic meaning as a \glspl{kthrd} in the 1:1 model: they represent an independent thread of execution with its own stack. The difference is that user-level threads do not have a corresponding object in the kernel, they are handled by the runtime in user space and scheduled onto \glspl{kthrd}, referred to as \glspl{proc} in this document. \Glspl{proc} run a \gls{thrd} until it context switches out, it then chooses a different \gls{thrd} to run. 11 11 12 12 \section{Clusters} 13 \CFA allows the option to group user-level threading, in the form of clusters. Both \glspl{thrd} and \glspl{proc} belong to a specific cluster. \Glspl{thrd} are only be scheduled onto \glspl{proc} in the same cluster and scheduling is done independently of other clusters. Figure~\ref{fig:system} shows an overview of the \CFA runtime, which allows programmers to tightly control parallelism. It also opens the door to handling effects like NUMA, by pining clusters to a specific NUMA node\footnote{This is not currently implemented in \CFA, but the only hurdle left is creating a generic interface for cpu masks.}. 14 13 15 \begin{figure} 14 16 \begin{center} 15 17 \input{system.pstex_t} 16 18 \end{center} 17 \caption {Overview of the \CFA runtime}19 \caption[Overview of the \CFA runtime]{Overview of the \CFA runtime \newline \Glspl{thrd} are scheduled inside a particular cluster, where it only runs on the \glspl{proc} which belong to the cluster. The discrete-event manager, which handles preemption and timeout, is a \gls{kthrd} which lives outside any cluster and does not run \glspl{thrd}.} 18 20 \label{fig:system} 19 \Glspl{thrd} are scheduled inside a particular cluster, where it only runs on the \glspl{proc} which belong to the cluster. The discrete-event manager, which handles preemption and timeout, is a \gls{kthrd} which lives outside any cluster and does not run \glspl{thrd}.20 21 \end{figure} 21 \CFA allows the option to group user-level threading, in the form of clusters. Both \glspl{thrd} and \glspl{proc} belong to a specific cluster. \Glspl{thrd} will only be scheduled onto \glspl{proc} in the same cluster and scheduling is done independantly of other clusters. Figure~\ref{fig:system} shows an overview if this system. This allows programmers to control more tightly parallelism. It also opens the door to handling effects like NUMA, by pining clusters to specific NUMA node\footnote{This is not currently implemented in \CFA, but the only hurdle left is creating a generic interface for cpu masks.}.22 22 23 23 \section{Scheduling} 24 The \CFA runtime was previously using a strictly \glsxtrshort{fifo} ready queue with a single lock. This setup offers perfect fairness in terms of opportunities to run/ However, it offers poor scalability, since the performance of the ready queue can never be improved by adding more \glspl{hthrd}, but thecontention can cause significant performance degradation.24 The \CFA runtime previously used a \glsxtrshort{fifo} ready-queue with a single lock. This setup offers perfect fairness in terms of opportunities to run. However, it offers poor scalability, since the performance of the ready queue can never be improved by adding more \glspl{hthrd} and contention can cause significant performance degradation. 25 25 26 26 \section{\glsxtrshort{io}}\label{prev:io} 27 Prior to this work, the \CFA runtime did not add any particular support for \glsxtrshort{io} operations. \CFA being built on C, this means that, while all the operations available in C are available in \CFA, \glsxtrshort{io} operations are designed for the POSIX threading model\cit{pthreads}. Using these operations in a M:N threading model, when they are built for 1:1 threading, means that operations block \glspl{proc} instead of \glspl{thrd}. While this can work in certain cases, it limits the number of concurrent operations to the number of \glspl{proc} rather than \glspl{thrd}. This also means that deadlocks can occur because all \glspl{proc} are blocked even if at least one \gls{thrd} is ready to run. A simple example of this type of deadlock would be as follows: 27 Prior to this work, the \CFA runtime did not add any particular support for \glsxtrshort{io} operations. %\CFA being built on C, this means that, 28 While all I/O operations available in C are available in \CFA, \glsxtrshort{io} operations are designed for the POSIX threading model~\cite{pthreads}. Using these 1:1 threading operations in an M:N threading model means I/O operations block \glspl{proc} instead of \glspl{thrd}. While this can work in certain cases, it limits the number of concurrent operations to the number of \glspl{proc} rather than \glspl{thrd}. It also means deadlock can occur because all \glspl{proc} are blocked even if at least one \gls{thrd} is ready to run. A simple example of this type of deadlock would be as follows: 29 \begin{quote} 30 Given a simple network program with 2 \glspl{thrd} and a single \gls{proc}, one \gls{thrd} sends network requests to a server and the other \gls{thrd} waits for a response from the server. If the second \gls{thrd} races ahead, it may wait for responses to requests that have not been sent yet. In theory, this should not be a problem, even if the second \gls{thrd} waits, because the first \gls{thrd} is still ready to run and should be able to get CPU time to send the request. With M:N threading, while the first \gls{thrd} is ready, the lone \gls{proc} \emph{cannot} run the first \gls{thrd} if it is blocked in the \glsxtrshort{io} operation of the second \gls{thrd}. If this happen, the system is in a synchronization deadlock\footnote{In this example, the deadlocked could be resolved if the server sends unprompted messages to the client. However, this solution is not general and may not be appropriate even in this simple case.}. 31 \end{quote} 32 Therefore, one of the objective of this work is to introduce \emph{User-Level \glsxtrshort{io}}, like \glslink{uthrding}{User-Level \emph{Threading}} blocks \glspl{thrd} rather than \glspl{proc} when doing \glsxtrshort{io} operations, which entails multiplexing the \glsxtrshort{io} operations of many \glspl{thrd} onto fewer \glspl{proc}. This multiplexing requires that a single \gls{proc} be able to execute multiple I/O operations in parallel. This requirement cannot be done with operations that block \glspl{proc}, \ie \glspl{kthrd}, since the first operation would prevent starting new operations for its blocking duration. Executing I/O operations in parallel requires \emph{asynchronous} \glsxtrshort{io}, sometimes referred to as \emph{non-blocking}, since the \gls{kthrd} does not block. 28 33 29 Given a simple network program with 2 \glspl{thrd} and a single \gls{proc}, one \gls{thrd} sends network requests to a server and the other \gls{thrd} waits for response from the server. If the second \gls{thrd} races ahead, it may wait for responses to requests that have not been sent yet. In theory, this should not be a problem, even if the second \gls{thrd} waits, the first \gls{thrd} is still ready to run and should just be able to get CPU time and send the request. In practice with M:N threading, while the first \gls{thrd} is ready, the lone \gls{proc} in this example will \emph{not} try to run the first \gls{thrd} if it is blocked in the \glsxtrshort{io} operation of the second \gls{thrd}. If this happen, the system is effectively deadlocked\footnote{In this example, the deadlocked could be resolved if the server sends unprompted messages to the client. However, this solution is not general and may not be appropriate even in this simple case.}. 34 \section{Interoperating with C} 35 While \glsxtrshort{io} operations are the classical example of operations that block \glspl{kthrd}, the non-blocking challenge extends to all blocking system-calls. The POSIX standard states~\cite[\S~2.9.1]{POSIX17}: 36 \begin{quote} 37 All functions defined by this volume of POSIX.1-2017 shall be thread-safe, except that the following functions1 need not be thread-safe. ... (list of 70+ potentially excluded functions) 38 \end{quote} 39 Only UNIX @man@ pages identify whether or not a library function is thread safe, and hence, may block on a pthread lock or system call; hence interoperability with UNIX library functions is a challenge for an M:N threading model. 30 40 31 One of the objective of this work, is to introduce \emph{User-Level \glsxtrshort{io}} which, as a parallel to \glslink{uthrding}{User-Level \emph{Threading}}, blocks \glspl{thrd} rather than \glspl{proc} when doing \glsxtrshort{io} operations. This entails multiplexing the \glsxtrshort{io} operations of many \glspl{thrd} onto fewer \glspl{proc}. This multiplexing requires that a single \gls{proc} be able to execute multiple operations in parallel. This cannot be done with operations that block \glspl{proc}, \ie \glspl{kthrd}, since the first operation would prevent starting new operations for its duration. Executing operations in parallel requires \emph{asynchronous} \glsxtrshort{io}, sometimes referred to as \emph{non-blocking}, since the \gls{kthrd} is not blocked.41 Languages like Go and Java, which have strict interoperability with C\cit{JNI, GoLang with C}, can control operations in C by ``sandboxing'' them, \eg a blocking function may be delegated to a \gls{kthrd}. Sandboxing may help towards guaranteeing that the kind of deadlock mentioned above does not occur. 32 42 33 \section{Interoperating with \texttt{C}} 34 While \glsxtrshort{io} operations are the classical example of operations that block \glspl{kthrd}, the challenges mentioned in the previous section do not require \glsxtrshort{io} to be involved. These challenges are a product of blocking system calls rather than \glsxtrshort{io}. C offers no tools to identify whether or not a librairy function will lead to a blocking system call. This fact means interoperatability with C becomes a challenge in a M:N threading model. 35 36 Languages like Go and Java, which have strict interoperatability with C\cit{JNI, GoLang with C}, can control operations in C by ``sandboxing'' them. They can, for example, delegate C operations to \glspl{kthrd} that are not \glspl{proc}. Sandboxing may help towards guaranteeing that the deadlocks mentioned in the previous section do not occur. 37 38 As mentioned in Section~\cit{\CFA intro}, \CFA is binary compatible with C and, as such, trivially supports calls to and from C librairies. Furthermore, interoperatability can happen within a single library, through inline code or simply C and \CFA translation units archived together. The fine-grained interoperatability between C and \CFA has two consequences: 43 As mentioned in Section~\ref{intro}, \CFA is binary compatible with C and, as such, must support all C library functions. Furthermore, interoperability can happen at the function-call level, inline code, or C and \CFA translation units linked together. This fine-grained interoperability between C and \CFA has two consequences: 39 44 \begin{enumerate} 40 \item Precisely identifying C calls that could blockis difficult.41 \item Introducing code where interoperatability occurs couldhave a significant impact on general performance.45 \item Precisely identifying blocking C calls is difficult. 46 \item Introducing new code can have a significant impact on general performance. 42 47 \end{enumerate} 43 44 Because of these consequences, this work does not attempt to ``sandbox'' calls to C. It is possible that conflicting calls to C could lead to deadlocks on \CFA's M:N threading model where they would not in the traditionnal 1:1 threading model. However, I judge that solving this problem in general, in a way that is composable and flexible, is too complex in itself and would add too much work to this thesis. Therefore it is outside the scope of this thesis. 48 Because of these consequences, this work does not attempt to ``sandbox'' calls to C. Therefore, it is possible for an unidentified library calls to block a \gls{kthrd} leading to deadlocks in \CFA's M:N threading model, which would not occur in a traditional 1:1 threading model. Currently, all M:N thread systems interacting with UNIX without sandboxing suffer from this problem but manage to work very well in the majority of applications. Therefore, a complete solution to this problem is outside the scope of this thesis. -
doc/theses/thierry_delisle_PhD/thesis/thesis.tex
r4468a70 r342af53 120 120 % although it's supposed to be in both the TeX Live and MikTeX distributions. There are also documentation and 121 121 % installation instructions there. 122 \renewcommand*{\glstextformat}[1]{\textsf{#1}} 122 123 123 124 \usepackage{csquotes} … … 125 126 126 127 % Setting up the page margins... 128 \setlength{\textheight}{9in}\setlength{\topmargin}{-0.45in}\setlength{\headsep}{0.25in} 127 129 % uWaterloo thesis requirements specify a minimum of 1 inch (72pt) margin at the 128 130 % top, bottom, and outside page edges and a 1.125 in. (81pt) gutter … … 191 193 % cfa macros used in the document 192 194 \input{common} 195 \CFAStyle % CFA code-style for all languages 196 \lstset{basicstyle=\linespread{0.9}\tt} 193 197 194 198 % glossary of terms to use 195 199 \input{glossary} 200 \makeindex 196 201 197 202 %====================================================================== … … 230 235 \input{text/io.tex} 231 236 \part{Evaluation} 237 \label{Evaluation} 232 238 \chapter{Theoretical and Existance Proofs} 233 239 \chapter{Micro-Benchmarks} … … 256 262 \addcontentsline{toc}{chapter}{\textbf{References}} 257 263 258 \bibliography{local }264 \bibliography{local,pl} 259 265 % Tip 5: You can create multiple .bib files to organize your references. 260 266 % Just list them all in the \bibliogaphy command, separated by commas (no spaces). … … 301 307 \printglossary 302 308 \cleardoublepage 309 310 % Index 311 % ----------------------------- 312 %\input{thesis.ind} % index 313 303 314 \phantomsection 304 315 -
example/io/simple/server_epoll.c
r4468a70 r342af53 88 88 } 89 89 90 ev.events = EPOLL IN | EPOLLONESHOT;90 ev.events = EPOLLOUT | EPOLLIN | EPOLLONESHOT; 91 91 ev.data.u64 = (uint64_t)˚ 92 92 if (epoll_ctl(epollfd, EPOLL_CTL_ADD, ring.ring_fd, &ev) == -1) { … … 99 99 100 100 while(1) { 101 BLOCK: 101 BLOCK:; 102 102 int nfds = epoll_wait(epollfd, events, MAX_EVENTS, -1); 103 103 if (nfds == -1) { -
libcfa/src/Makefile.am
r4468a70 r342af53 58 58 concurrency/iofwd.hfa \ 59 59 containers/list.hfa \ 60 containers/stackLockFree.hfa 60 containers/stackLockFree.hfa \ 61 vec/vec.hfa \ 62 vec/vec2.hfa \ 63 vec/vec3.hfa \ 64 vec/vec4.hfa 61 65 62 66 inst_headers_src = \ … … 94 98 concurrency/clib/cfathread.h \ 95 99 concurrency/invoke.h \ 100 concurrency/future.hfa \ 96 101 concurrency/kernel/fwd.hfa 97 102 -
libcfa/src/bits/locks.hfa
r4468a70 r342af53 283 283 void ^?{}(future_t &) {} 284 284 285 void reset(future_t & this) { 286 // needs to be in 0p or 1p 287 __atomic_exchange_n( &this.ptr, 0p, __ATOMIC_SEQ_CST); 288 } 289 285 290 // check if the future is available 286 291 bool available( future_t & this ) { … … 340 345 341 346 // Mark the future as abandoned, meaning it will be deleted by the server 342 void abandon( future_t & this ) { 347 bool abandon( future_t & this ) { 348 /* paranoid */ verify( this.ptr != 3p ); 349 350 // Mark the future as abandonned 343 351 struct oneshot * got = __atomic_exchange_n( &this.ptr, 3p, __ATOMIC_SEQ_CST); 352 353 // If the future isn't already fulfilled, let the server delete it 354 if( got == 0p ) return false; 344 355 345 356 // got == 2p: the future is ready but the context hasn't fully been consumed … … 347 358 if( got == 2p ) { 348 359 while( this.ptr != 1p ) Pause(); 349 } 350 return; 360 got = 1p; 361 } 362 363 // The future is completed delete it now 364 /* paranoid */ verify( this.ptr != 1p ); 365 free( &this ); 366 return true; 351 367 } 352 368 -
libcfa/src/concurrency/io.cfa
r4468a70 r342af53 31 31 32 32 extern "C" { 33 #include <sys/epoll.h>34 33 #include <sys/syscall.h> 35 34 … … 41 40 #include "kernel/fwd.hfa" 42 41 #include "io/types.hfa" 42 43 static const char * opcodes[] = { 44 "OP_NOP", 45 "OP_READV", 46 "OP_WRITEV", 47 "OP_FSYNC", 48 "OP_READ_FIXED", 49 "OP_WRITE_FIXED", 50 "OP_POLL_ADD", 51 "OP_POLL_REMOVE", 52 "OP_SYNC_FILE_RANGE", 53 "OP_SENDMSG", 54 "OP_RECVMSG", 55 "OP_TIMEOUT", 56 "OP_TIMEOUT_REMOVE", 57 "OP_ACCEPT", 58 "OP_ASYNC_CANCEL", 59 "OP_LINK_TIMEOUT", 60 "OP_CONNECT", 61 "OP_FALLOCATE", 62 "OP_OPENAT", 63 "OP_CLOSE", 64 "OP_FILES_UPDATE", 65 "OP_STATX", 66 "OP_READ", 67 "OP_WRITE", 68 "OP_FADVISE", 69 "OP_MADVISE", 70 "OP_SEND", 71 "OP_RECV", 72 "OP_OPENAT2", 73 "OP_EPOLL_CTL", 74 "OP_SPLICE", 75 "OP_PROVIDE_BUFFERS", 76 "OP_REMOVE_BUFFERS", 77 "OP_TEE", 78 "INVALID_OP" 79 }; 43 80 44 81 // returns true of acquired as leader or second leader … … 134 171 int ret = 0; 135 172 if( need_sys_to_submit || need_sys_to_complete ) { 173 __cfadbg_print_safe(io_core, "Kernel I/O : IO_URING enter %d %u %u\n", ring.fd, to_submit, flags); 136 174 ret = syscall( __NR_io_uring_enter, ring.fd, to_submit, 0, flags, (sigset_t *)0p, _NSIG / 8); 137 175 if( ret < 0 ) { … … 157 195 static unsigned __collect_submitions( struct __io_data & ring ); 158 196 static __u32 __release_consumed_submission( struct __io_data & ring ); 159 160 static inline void process(struct io_uring_cqe & cqe ) { 197 static inline void __clean( volatile struct io_uring_sqe * sqe ); 198 199 // Process a single completion message from the io_uring 200 // This is NOT thread-safe 201 static inline void process( volatile struct io_uring_cqe & cqe ) { 161 202 struct io_future_t * future = (struct io_future_t *)(uintptr_t)cqe.user_data; 162 203 __cfadbg_print_safe( io, "Kernel I/O : Syscall completed : cqe %p, result %d for %p\n", &cqe, cqe.res, future ); … … 165 206 } 166 207 167 // Process a single completion message from the io_uring168 // This is NOT thread-safe169 208 static [int, bool] __drain_io( & struct __io_data ring ) { 170 209 /* paranoid */ verify( ! __preemption_enabled() ); … … 192 231 } 193 232 233 __atomic_thread_fence( __ATOMIC_SEQ_CST ); 234 194 235 // Release the consumed SQEs 195 236 __release_consumed_submission( ring ); … … 209 250 for(i; count) { 210 251 unsigned idx = (head + i) & mask; 211 struct io_uring_cqe & cqe = ring.completion_q.cqes[idx];252 volatile struct io_uring_cqe & cqe = ring.completion_q.cqes[idx]; 212 253 213 254 /* paranoid */ verify(&cqe); … … 218 259 // Mark to the kernel that the cqe has been seen 219 260 // Ensure that the kernel only sees the new value of the head index after the CQEs have been read. 220 __atomic_thread_fence( __ATOMIC_SEQ_CST ); 221 __atomic_fetch_add( ring.completion_q.head, count, __ATOMIC_RELAXED ); 261 __atomic_fetch_add( ring.completion_q.head, count, __ATOMIC_SEQ_CST ); 222 262 223 263 return [count, count > 0 || to_submit > 0]; … … 225 265 226 266 void main( $io_ctx_thread & this ) { 227 epoll_event ev;228 __ioctx_register( this, ev ); 229 230 __cfadbg_print_safe(io_core, "Kernel I/O : IO poller %p for ring %p ready\n", &this, &this.ring); 231 232 int reset = 0;267 __ioctx_register( this ); 268 269 __cfadbg_print_safe(io_core, "Kernel I/O : IO poller %d (%p) ready\n", this.ring->fd, &this); 270 271 const int reset_cnt = 5; 272 int reset = reset_cnt; 233 273 // Then loop until we need to start 274 LOOP: 234 275 while(!__atomic_load_n(&this.done, __ATOMIC_SEQ_CST)) { 235 276 // Drain the io … … 239 280 [count, again] = __drain_io( *this.ring ); 240 281 241 if(!again) reset ++;282 if(!again) reset--; 242 283 243 284 // Update statistics … … 249 290 250 291 // If we got something, just yield and check again 251 if(reset < 5) {292 if(reset > 1) { 252 293 yield(); 253 } 254 // We didn't get anything baton pass to the slow poller 255 else { 294 continue LOOP; 295 } 296 297 // We alread failed to find completed entries a few time. 298 if(reset == 1) { 299 // Rearm the context so it can block 300 // but don't block right away 301 // we need to retry one last time in case 302 // something completed *just now* 303 __ioctx_prepare_block( this ); 304 continue LOOP; 305 } 306 256 307 __STATS__( false, 257 308 io.complete_q.blocks += 1; 258 309 ) 259 __cfadbg_print_safe(io_core, "Kernel I/O : Parking io poller %p\n", &this.self); 260 reset = 0; 310 __cfadbg_print_safe(io_core, "Kernel I/O : Parking io poller %d (%p)\n", this.ring->fd, &this); 261 311 262 312 // block this thread 263 __ioctx_prepare_block( this, ev );264 313 wait( this.sem ); 265 } 266 } 267 268 __cfadbg_print_safe(io_core, "Kernel I/O : Fast poller for ring %p stopping\n", &this.ring); 314 315 // restore counter 316 reset = reset_cnt; 317 } 318 319 __cfadbg_print_safe(io_core, "Kernel I/O : Fast poller %d (%p) stopping\n", this.ring->fd, &this); 269 320 } 270 321 … … 289 340 // 290 341 291 [* struct io_uring_sqe, __u32] __submit_alloc( struct __io_data & ring, __u64 data ) { 342 // Allocate an submit queue entry. 343 // The kernel cannot see these entries until they are submitted, but other threads must be 344 // able to see which entries can be used and which are already un used by an other thread 345 // for convenience, return both the index and the pointer to the sqe 346 // sqe == &sqes[idx] 347 [* volatile struct io_uring_sqe, __u32] __submit_alloc( struct __io_data & ring, __u64 data ) { 292 348 /* paranoid */ verify( data != 0 ); 293 349 … … 304 360 // Look through the list starting at some offset 305 361 for(i; cnt) { 306 __u64 expected = 0;307 __u32 idx = (i + off) & mask; 308 struct io_uring_sqe * sqe = &ring.submit_q.sqes[idx];362 __u64 expected = 3; 363 __u32 idx = (i + off) & mask; // Get an index from a random 364 volatile struct io_uring_sqe * sqe = &ring.submit_q.sqes[idx]; 309 365 volatile __u64 * udata = &sqe->user_data; 310 366 367 // Allocate the entry by CASing the user_data field from 0 to the future address 311 368 if( *udata == expected && 312 369 __atomic_compare_exchange_n( udata, &expected, data, true, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED ) ) … … 319 376 ) 320 377 378 // debug log 379 __cfadbg_print_safe( io, "Kernel I/O : allocated [%p, %u] for %p (%p)\n", sqe, idx, active_thread(), (void*)data ); 321 380 322 381 // Success return the data … … 325 384 verify(expected != data); 326 385 386 // This one was used 327 387 len ++; 328 388 } 329 389 330 390 block++; 391 392 abort( "Kernel I/O : all submit queue entries used, yielding\n" ); 393 331 394 yield(); 332 395 } … … 377 440 void __submit( struct io_context * ctx, __u32 idx ) __attribute__((nonnull (1))) { 378 441 __io_data & ring = *ctx->thrd.ring; 442 443 { 444 __attribute__((unused)) volatile struct io_uring_sqe * sqe = &ring.submit_q.sqes[idx]; 445 __cfadbg_print_safe( io, 446 "Kernel I/O : submitting %u (%p) for %p\n" 447 " data: %p\n" 448 " opcode: %s\n" 449 " fd: %d\n" 450 " flags: %d\n" 451 " prio: %d\n" 452 " off: %p\n" 453 " addr: %p\n" 454 " len: %d\n" 455 " other flags: %d\n" 456 " splice fd: %d\n" 457 " pad[0]: %llu\n" 458 " pad[1]: %llu\n" 459 " pad[2]: %llu\n", 460 idx, sqe, 461 active_thread(), 462 (void*)sqe->user_data, 463 opcodes[sqe->opcode], 464 sqe->fd, 465 sqe->flags, 466 sqe->ioprio, 467 sqe->off, 468 sqe->addr, 469 sqe->len, 470 sqe->accept_flags, 471 sqe->splice_fd_in, 472 sqe->__pad2[0], 473 sqe->__pad2[1], 474 sqe->__pad2[2] 475 ); 476 } 477 478 379 479 // Get now the data we definetely need 380 480 volatile __u32 * const tail = ring.submit_q.tail; … … 443 543 unlock(ring.submit_q.submit_lock); 444 544 #endif 445 if( ret < 0 ) return; 545 if( ret < 0 ) { 546 return; 547 } 446 548 447 549 // Release the consumed SQEs … … 454 556 io.submit_q.submit_avg.cnt += 1; 455 557 ) 456 } 457 else { 558 559 __cfadbg_print_safe( io, "Kernel I/O : submitted %u (among %u) for %p\n", idx, ret, active_thread() ); 560 } 561 else 562 { 458 563 // get mutual exclusion 459 564 #if defined(LEADER_LOCK) … … 463 568 #endif 464 569 465 /* paranoid */ verifyf( ring.submit_q.sqes[ idx ].user_data != 0,570 /* paranoid */ verifyf( ring.submit_q.sqes[ idx ].user_data != 3ul64, 466 571 /* paranoid */ "index %u already reclaimed\n" 467 572 /* paranoid */ "head %u, prev %u, tail %u\n" … … 490 595 } 491 596 597 /* paranoid */ verify(ret == 1); 598 492 599 // update statistics 493 600 __STATS__( false, … … 496 603 ) 497 604 605 { 606 __attribute__((unused)) volatile __u32 * const head = ring.submit_q.head; 607 __attribute__((unused)) __u32 last_idx = ring.submit_q.array[ ((*head) - 1) & mask ]; 608 __attribute__((unused)) volatile struct io_uring_sqe * sqe = &ring.submit_q.sqes[last_idx]; 609 610 __cfadbg_print_safe( io, 611 "Kernel I/O : last submitted is %u (%p)\n" 612 " data: %p\n" 613 " opcode: %s\n" 614 " fd: %d\n" 615 " flags: %d\n" 616 " prio: %d\n" 617 " off: %p\n" 618 " addr: %p\n" 619 " len: %d\n" 620 " other flags: %d\n" 621 " splice fd: %d\n" 622 " pad[0]: %llu\n" 623 " pad[1]: %llu\n" 624 " pad[2]: %llu\n", 625 last_idx, sqe, 626 (void*)sqe->user_data, 627 opcodes[sqe->opcode], 628 sqe->fd, 629 sqe->flags, 630 sqe->ioprio, 631 sqe->off, 632 sqe->addr, 633 sqe->len, 634 sqe->accept_flags, 635 sqe->splice_fd_in, 636 sqe->__pad2[0], 637 sqe->__pad2[1], 638 sqe->__pad2[2] 639 ); 640 } 641 642 __atomic_thread_fence( __ATOMIC_SEQ_CST ); 498 643 // Release the consumed SQEs 499 644 __release_consumed_submission( ring ); 645 // ring.submit_q.sqes[idx].user_data = 3ul64; 500 646 501 647 #if defined(LEADER_LOCK) … … 505 651 #endif 506 652 507 __cfadbg_print_safe( io, "Kernel I/O : Performed io_submit for %p, returned %d\n", active_thread(), ret);653 __cfadbg_print_safe( io, "Kernel I/O : submitted %u for %p\n", idx, active_thread() ); 508 654 } 509 655 } 510 656 511 657 // #define PARTIAL_SUBMIT 32 658 659 // go through the list of submissions in the ready array and moved them into 660 // the ring's submit queue 512 661 static unsigned __collect_submitions( struct __io_data & ring ) { 513 662 /* paranoid */ verify( ring.submit_q.ready != 0p ); … … 550 699 } 551 700 701 // Go through the ring's submit queue and release everything that has already been consumed 702 // by io_uring 552 703 static __u32 __release_consumed_submission( struct __io_data & ring ) { 553 704 const __u32 smask = *ring.submit_q.mask; 554 705 706 // We need to get the lock to copy the old head and new head 555 707 if( !try_lock(ring.submit_q.release_lock __cfaabi_dbg_ctx2) ) return 0; 556 __u32 chead = *ring.submit_q.head; 557 __u32 phead = ring.submit_q.prev_head; 558 ring.submit_q.prev_head = chead; 708 __attribute__((unused)) 709 __u32 ctail = *ring.submit_q.tail; // get the current tail of the queue 710 __u32 chead = *ring.submit_q.head; // get the current head of the queue 711 __u32 phead = ring.submit_q.prev_head; // get the head the last time we were here 712 ring.submit_q.prev_head = chead; // note up to were we processed 559 713 unlock(ring.submit_q.release_lock); 560 714 715 // the 3 fields are organized like this diagram 716 // except it's are ring 717 // ---+--------+--------+---- 718 // ---+--------+--------+---- 719 // ^ ^ ^ 720 // phead chead ctail 721 722 // make sure ctail doesn't wrap around and reach phead 723 /* paranoid */ verify( 724 (ctail >= chead && chead >= phead) 725 || (chead >= phead && phead >= ctail) 726 || (phead >= ctail && ctail >= chead) 727 ); 728 729 // find the range we need to clear 561 730 __u32 count = chead - phead; 731 732 // We acquired an previous-head/current-head range 733 // go through the range and release the sqes 562 734 for( i; count ) { 563 735 __u32 idx = ring.submit_q.array[ (phead + i) & smask ]; 564 ring.submit_q.sqes[ idx ].user_data = 0; 736 737 /* paranoid */ verify( 0 != ring.submit_q.sqes[ idx ].user_data ); 738 __clean( &ring.submit_q.sqes[ idx ] ); 565 739 } 566 740 return count; 567 741 } 742 743 void __sqe_clean( volatile struct io_uring_sqe * sqe ) { 744 __clean( sqe ); 745 } 746 747 static inline void __clean( volatile struct io_uring_sqe * sqe ) { 748 // If we are in debug mode, thrash the fields to make sure we catch reclamation errors 749 __cfaabi_dbg_debug_do( 750 memset(sqe, 0xde, sizeof(*sqe)); 751 sqe->opcode = (sizeof(opcodes) / sizeof(const char *)) - 1; 752 ); 753 754 // Mark the entry as unused 755 __atomic_store_n(&sqe->user_data, 3ul64, __ATOMIC_SEQ_CST); 756 } 568 757 #endif -
libcfa/src/concurrency/io/call.cfa.in
r4468a70 r342af53 74 74 ; 75 75 76 extern [* struct io_uring_sqe, __u32] __submit_alloc( struct __io_data & ring, __u64 data );76 extern [* volatile struct io_uring_sqe, __u32] __submit_alloc( struct __io_data & ring, __u64 data ); 77 77 extern void __submit( struct io_context * ctx, __u32 idx ) __attribute__((nonnull (1))); 78 78 … … 222 222 __u32 idx; 223 223 struct io_uring_sqe * sqe; 224 [sqe, idx] = __submit_alloc( ring, (__u64)(uintptr_t)&future ); 225 226 sqe->__pad2[0] = sqe->__pad2[1] = sqe->__pad2[2] = 0; 224 [(volatile struct io_uring_sqe *) sqe, idx] = __submit_alloc( ring, (__u64)(uintptr_t)&future ); 225 227 226 sqe->opcode = IORING_OP_{op}; 228 sqe->flags = sflags;{body} 227 sqe->flags = sflags; 228 sqe->ioprio = 0; 229 sqe->fd = 0; 230 sqe->off = 0; 231 sqe->addr = 0; 232 sqe->len = 0; 233 sqe->fsync_flags = 0; 234 sqe->__pad2[0] = 0; 235 sqe->__pad2[1] = 0; 236 sqe->__pad2[2] = 0;{body} 237 238 asm volatile("": : :"memory"); 229 239 230 240 verify( sqe->user_data == (__u64)(uintptr_t)&future ); … … 312 322 }), 313 323 # CFA_HAVE_IORING_OP_ACCEPT 314 Call('ACCEPT 4', 'int accept4(int sockfd, struct sockaddr *addr, socklen_t *addrlen, int flags)', {324 Call('ACCEPT', 'int accept4(int sockfd, struct sockaddr *addr, socklen_t *addrlen, int flags)', { 315 325 'fd': 'sockfd', 316 'addr': ' addr',317 'addr2': ' addrlen',326 'addr': '(__u64)addr', 327 'addr2': '(__u64)addrlen', 318 328 'accept_flags': 'flags' 319 329 }), … … 464 474 465 475 print(""" 476 //----------------------------------------------------------------------------- 477 bool cancel(io_cancellation & this) { 478 #if !defined(CFA_HAVE_LINUX_IO_URING_H) || !defined(CFA_HAVE_IORING_OP_ASYNC_CANCEL) 479 return false; 480 #else 481 io_future_t future; 482 483 io_context * context = __get_io_context(); 484 485 __u8 sflags = 0; 486 struct __io_data & ring = *context->thrd.ring; 487 488 __u32 idx; 489 volatile struct io_uring_sqe * sqe; 490 [sqe, idx] = __submit_alloc( ring, (__u64)(uintptr_t)&future ); 491 492 sqe->__pad2[0] = sqe->__pad2[1] = sqe->__pad2[2] = 0; 493 sqe->opcode = IORING_OP_ASYNC_CANCEL; 494 sqe->flags = sflags; 495 sqe->addr = this.target; 496 497 verify( sqe->user_data == (__u64)(uintptr_t)&future ); 498 __submit( context, idx ); 499 500 wait(future); 501 502 if( future.result == 0 ) return true; // Entry found 503 if( future.result == -EALREADY) return true; // Entry found but in progress 504 if( future.result == -ENOENT ) return false; // Entry not found 505 return false; 506 #endif 507 } 508 466 509 //----------------------------------------------------------------------------- 467 510 // Check if a function is has asynchronous -
libcfa/src/concurrency/io/setup.cfa
r4468a70 r342af53 52 52 #include <pthread.h> 53 53 #include <sys/epoll.h> 54 #include <sys/eventfd.h> 54 55 #include <sys/mman.h> 55 56 #include <sys/syscall.h> … … 169 170 // Main loop 170 171 while( iopoll.run ) { 172 __cfadbg_print_safe(io_core, "Kernel I/O - epoll : waiting on io_uring contexts\n"); 173 171 174 // Wait for events 172 175 int nfds = epoll_pwait( iopoll.epollfd, events, 10, -1, &mask ); 176 177 __cfadbg_print_safe(io_core, "Kernel I/O - epoll : %d io contexts events, waking up\n", nfds); 173 178 174 179 // Check if an error occured … … 181 186 $io_ctx_thread * io_ctx = ($io_ctx_thread *)(uintptr_t)events[i].data.u64; 182 187 /* paranoid */ verify( io_ctx ); 183 __cfadbg_print_safe(io_core, "Kernel I/O : Unparking io poller %p\n", io_ctx);188 __cfadbg_print_safe(io_core, "Kernel I/O - epoll : Unparking io poller %d (%p)\n", io_ctx->ring->fd, io_ctx); 184 189 #if !defined( __CFA_NO_STATISTICS__ ) 185 190 __cfaabi_tls.this_stats = io_ctx->self.curr_cluster->stats; 186 191 #endif 192 193 eventfd_t v; 194 eventfd_read(io_ctx->ring->efd, &v); 195 187 196 post( io_ctx->sem ); 188 197 } … … 233 242 $thread & thrd = this.thrd.self; 234 243 if( cluster_context ) { 244 // We are about to do weird things with the threads 245 // we don't need interrupts to complicate everything 246 disable_interrupts(); 247 248 // Get cluster info 235 249 cluster & cltr = *thrd.curr_cluster; 236 250 /* paranoid */ verify( cltr.idles.total == 0 || &cltr == mainCluster ); … … 239 253 // We need to adjust the clean-up based on where the thread is 240 254 if( thrd.state == Ready || thrd.preempted != __NO_PREEMPTION ) { 255 // This is the tricky case 256 // The thread was preempted or ready to run and now it is on the ready queue 257 // but the cluster is shutting down, so there aren't any processors to run the ready queue 258 // the solution is to steal the thread from the ready-queue and pretend it was blocked all along 241 259 242 260 ready_schedule_lock(); 243 244 // This is the tricky case 245 // The thread was preempted and now it is on the ready queue 261 // The thread should on the list 262 /* paranoid */ verify( thrd.link.next != 0p ); 263 264 // Remove the thread from the ready queue of this cluster 246 265 // The thread should be the last on the list 247 /* paranoid */ verify( thrd.link.next != 0p );248 249 // Remove the thread from the ready queue of this cluster250 266 __attribute__((unused)) bool removed = remove_head( &cltr, &thrd ); 251 267 /* paranoid */ verify( removed ); … … 263 279 } 264 280 // !!! This is not an else if !!! 281 // Ok, now the thread is blocked (whether we cheated to get here or not) 265 282 if( thrd.state == Blocked ) { 266 267 283 // This is the "easy case" 268 284 // The thread is parked and can easily be moved to active cluster … … 274 290 } 275 291 else { 276 277 292 // The thread is in a weird state 278 293 // I don't know what to do here 279 294 abort("io_context poller thread is in unexpected state, cannot clean-up correctly\n"); 280 295 } 296 297 // The weird thread kidnapping stuff is over, restore interrupts. 298 enable_interrupts( __cfaabi_dbg_ctx ); 281 299 } else { 282 300 post( this.thrd.sem ); … … 365 383 } 366 384 385 // Step 3 : Initialize the data structure 367 386 // Get the pointers from the kernel to fill the structure 368 387 // submit queue … … 379 398 const __u32 num = *sq.num; 380 399 for( i; num ) { 381 sq.sqes[i].user_data = 0ul64;400 __sqe_clean( &sq.sqes[i] ); 382 401 } 383 402 } … … 409 428 cq.cqes = (struct io_uring_cqe *)(((intptr_t)cq.ring_ptr) + params.cq_off.cqes); 410 429 430 // Step 4 : eventfd 431 int efd; 432 for() { 433 efd = eventfd(0, 0); 434 if (efd < 0) { 435 if (errno == EINTR) continue; 436 abort("KERNEL ERROR: IO_URING EVENTFD - %s\n", strerror(errno)); 437 } 438 break; 439 } 440 441 int ret; 442 for() { 443 ret = syscall( __NR_io_uring_register, fd, IORING_REGISTER_EVENTFD, &efd, 1); 444 if (ret < 0) { 445 if (errno == EINTR) continue; 446 abort("KERNEL ERROR: IO_URING EVENTFD REGISTER - %s\n", strerror(errno)); 447 } 448 break; 449 } 450 411 451 // some paranoid checks 412 452 /* paranoid */ verifyf( (*cq.mask) == ((*cq.num) - 1ul32), "IO_URING Expected mask to be %u (%u entries), was %u", (*cq.num) - 1ul32, *cq.num, *cq.mask ); … … 423 463 this.ring_flags = params.flags; 424 464 this.fd = fd; 465 this.efd = efd; 425 466 this.eager_submits = params_in.eager_submits; 426 467 this.poller_submits = params_in.poller_submits; … … 445 486 // close the file descriptor 446 487 close(this.fd); 488 close(this.efd); 447 489 448 490 free( this.submit_q.ready ); // Maybe null, doesn't matter … … 452 494 // I/O Context Sleep 453 495 //============================================================================================= 454 455 void __ioctx_register($io_ctx_thread & ctx, struct epoll_event & ev) { 456 ev.events = EPOLLIN | EPOLLONESHOT; 496 #define IOEVENTS EPOLLIN | EPOLLONESHOT 497 498 static inline void __ioctx_epoll_ctl($io_ctx_thread & ctx, int op, const char * error) { 499 struct epoll_event ev; 500 ev.events = IOEVENTS; 457 501 ev.data.u64 = (__u64)&ctx; 458 int ret = epoll_ctl(iopoll.epollfd, EPOLL_CTL_ADD, ctx.ring->fd, &ev);502 int ret = epoll_ctl(iopoll.epollfd, op, ctx.ring->efd, &ev); 459 503 if (ret < 0) { 460 abort( "KERNEL ERROR: EPOLL ADD - (%d) %s\n", (int)errno, strerror(errno) ); 461 } 462 } 463 464 void __ioctx_prepare_block($io_ctx_thread & ctx, struct epoll_event & ev) { 465 int ret = epoll_ctl(iopoll.epollfd, EPOLL_CTL_MOD, ctx.ring->fd, &ev); 466 if (ret < 0) { 467 abort( "KERNEL ERROR: EPOLL REARM - (%d) %s\n", (int)errno, strerror(errno) ); 468 } 504 abort( "KERNEL ERROR: EPOLL %s - (%d) %s\n", error, (int)errno, strerror(errno) ); 505 } 506 } 507 508 void __ioctx_register($io_ctx_thread & ctx) { 509 __ioctx_epoll_ctl(ctx, EPOLL_CTL_ADD, "ADD"); 510 } 511 512 void __ioctx_prepare_block($io_ctx_thread & ctx) { 513 __cfadbg_print_safe(io_core, "Kernel I/O - epoll : Re-arming io poller %d (%p)\n", ctx.ring->fd, &ctx); 514 __ioctx_epoll_ctl(ctx, EPOLL_CTL_MOD, "REARM"); 469 515 } 470 516 -
libcfa/src/concurrency/io/types.hfa
r4468a70 r342af53 65 65 66 66 // A buffer of sqes (not the actual ring) 67 struct io_uring_sqe * sqes;67 volatile struct io_uring_sqe * sqes; 68 68 69 69 // The location and size of the mmaped area … … 85 85 86 86 // the kernel ring 87 struct io_uring_cqe * cqes;87 volatile struct io_uring_cqe * cqes; 88 88 89 89 // The location and size of the mmaped area … … 97 97 __u32 ring_flags; 98 98 int fd; 99 int efd; 99 100 bool eager_submits:1; 100 101 bool poller_submits:1; … … 130 131 #endif 131 132 132 struct epoll_event;133 133 struct $io_ctx_thread; 134 void __ioctx_register($io_ctx_thread & ctx, struct epoll_event & ev); 135 void __ioctx_prepare_block($io_ctx_thread & ctx, struct epoll_event & ev); 134 void __ioctx_register($io_ctx_thread & ctx); 135 void __ioctx_prepare_block($io_ctx_thread & ctx); 136 void __sqe_clean( volatile struct io_uring_sqe * sqe ); 136 137 #endif 137 138 -
libcfa/src/concurrency/monitor.hfa
r4468a70 r342af53 132 132 133 133 void wait ( condition & this, uintptr_t user_info = 0 ); 134 static inline bool is_empty ( condition & this ) { return this.blocked.head == 1p; } 134 135 bool signal ( condition & this ); 135 136 bool signal_block( condition & this ); 136 static inline bool is_empty ( condition & this ) { return this.blocked.head == 1p; }137 static inline bool signal_all ( condition & this ) { bool ret = false; while(!is_empty(this)) { ret = signal(this) || ret; } return ret; } 137 138 uintptr_t front ( condition & this ); 138 139 -
libcfa/src/heap.cfa
r4468a70 r342af53 10 10 // Created On : Tue Dec 19 21:58:35 2017 11 11 // Last Modified By : Peter A. Buhr 12 // Last Modified On : Wed Dec 16 12:28:25 202013 // Update Count : 10 2312 // Last Modified On : Sun Jan 10 11:20:49 2021 13 // Update Count : 1031 14 14 // 15 15 … … 262 262 #ifdef __STATISTICS__ 263 263 // Heap statistics counters. 264 static unsigned int malloc_ calls;264 static unsigned int malloc_zero_calls, malloc_calls; 265 265 static unsigned long long int malloc_storage; 266 static unsigned int aalloc_ calls;266 static unsigned int aalloc_zero_calls, aalloc_calls; 267 267 static unsigned long long int aalloc_storage; 268 static unsigned int calloc_ calls;268 static unsigned int calloc_zero_calls, calloc_calls; 269 269 static unsigned long long int calloc_storage; 270 static unsigned int memalign_ calls;270 static unsigned int memalign_zero_calls, memalign_calls; 271 271 static unsigned long long int memalign_storage; 272 static unsigned int amemalign_ calls;272 static unsigned int amemalign_zero_calls, amemalign_calls; 273 273 static unsigned long long int amemalign_storage; 274 static unsigned int cmemalign_ calls;274 static unsigned int cmemalign_zero_calls, cmemalign_calls; 275 275 static unsigned long long int cmemalign_storage; 276 static unsigned int resize_ calls;276 static unsigned int resize_zero_calls, resize_calls; 277 277 static unsigned long long int resize_storage; 278 static unsigned int realloc_ calls;278 static unsigned int realloc_zero_calls, realloc_calls; 279 279 static unsigned long long int realloc_storage; 280 static unsigned int free_ calls;280 static unsigned int free_zero_calls, free_calls; 281 281 static unsigned long long int free_storage; 282 282 static unsigned int mmap_calls; … … 287 287 static unsigned long long int sbrk_storage; 288 288 // Statistics file descriptor (changed by malloc_stats_fd). 289 static int stat _fd = STDERR_FILENO;// default stderr289 static int stats_fd = STDERR_FILENO; // default stderr 290 290 291 291 // Use "write" because streams may be shutdown when calls are made. … … 293 293 char helpText[1024]; 294 294 __cfaabi_bits_print_buffer( STDERR_FILENO, helpText, sizeof(helpText), 295 296 " malloc: calls %u / storage %llu\n"297 " aalloc: calls %u / storage %llu\n"298 " calloc: calls %u / storage %llu\n"299 " memalign: calls %u / storage %llu\n"300 " amemalign: calls %u / storage %llu\n"301 " cmemalign: calls %u / storage %llu\n"302 " resize: calls %u / storage %llu\n"303 " realloc: calls %u / storage %llu\n"304 " free: calls %u / storage %llu\n"305 " mmap: calls %u / storage %llu\n"306 " munmap: calls %u / storage %llu\n"307 " sbrk: calls %u / storage %llu\n",308 309 310 311 312 313 314 315 316 317 318 319 295 "\nHeap statistics:\n" 296 " malloc 0-calls %'u; >0-calls %'u; storage %'llu bytes\n" 297 " aalloc 0-calls %'u; >0-calls %'u; storage %'llu bytes\n" 298 " calloc 0-calls %'u; >0-calls %'u; storage %'llu bytes\n" 299 " memalign 0-calls %'u; >0-calls %'u; storage %'llu bytes\n" 300 " amemalign 0-calls %'u; >0-calls %'u; storage %'llu bytes\n" 301 " cmemalign 0-calls %'u; >0-calls %'u; storage %'llu bytes\n" 302 " resize 0-calls %'u; >0-calls %'u; storage %'llu bytes\n" 303 " realloc 0-calls %'u; >0-calls %'u; storage %'llu bytes\n" 304 " free 0-calls %'u; >0-calls %'u; storage %'llu bytes\n" 305 " mmap calls %'u; storage %'llu bytes\n" 306 " munmap calls %'u; storage %'llu bytes\n" 307 " sbrk calls %'u; storage %'llu bytes\n", 308 malloc_zero_calls, malloc_calls, malloc_storage, 309 aalloc_zero_calls, aalloc_calls, aalloc_storage, 310 calloc_zero_calls, calloc_calls, calloc_storage, 311 memalign_zero_calls, memalign_calls, memalign_storage, 312 amemalign_zero_calls, amemalign_calls, amemalign_storage, 313 cmemalign_zero_calls, cmemalign_calls, cmemalign_storage, 314 resize_zero_calls, resize_calls, resize_storage, 315 realloc_zero_calls, realloc_calls, realloc_storage, 316 free_zero_calls, free_calls, free_storage, 317 mmap_calls, mmap_storage, 318 munmap_calls, munmap_storage, 319 sbrk_calls, sbrk_storage 320 320 ); 321 321 } // printStats … … 328 328 "<sizes>\n" 329 329 "</sizes>\n" 330 "<total type=\"malloc\" count=\"%u\" size=\"%llu\"/>\n"331 "<total type=\"aalloc\" count=\"%u\" size=\"%llu\"/>\n"332 "<total type=\"calloc\" count=\"%u\" size=\"%llu\"/>\n"333 "<total type=\"memalign\" count=\"%u\" size=\"%llu\"/>\n"334 "<total type=\"amemalign\" count=\"%u\" size=\"%llu\"/>\n"335 "<total type=\"cmemalign\" count=\"%u\" size=\"%llu\"/>\n"336 "<total type=\"resize\" count=\"%u\" size=\"%llu\"/>\n"337 "<total type=\"realloc\" count=\"%u\" size=\"%llu\"/>\n"338 "<total type=\"free\" count=\"%u\" size=\"%llu\"/>\n"339 "<total type=\"mmap\" count=\"% u\" size=\"%llu\"/>\n"340 "<total type=\"munmap\" count=\"% u\" size=\"%llu\"/>\n"341 "<total type=\"sbrk\" count=\"% u\" size=\"%llu\"/>\n"330 "<total type=\"malloc\" 0 count=\"%'u;\" >0 count=\"%'u;\" size=\"%'llu\"/> bytes\n" 331 "<total type=\"aalloc\" 0 count=\"%'u;\" >0 count=\"%'u;\" size=\"%'llu\"/> bytes\n" 332 "<total type=\"calloc\" 0 count=\"%'u;\" >0 count=\"%'u;\" size=\"%'llu\"/> bytes\n" 333 "<total type=\"memalign\" 0 count=\"%'u;\" >0 count=\"%'u;\" size=\"%'llu\"/> bytes\n" 334 "<total type=\"amemalign\" 0 count=\"%'u;\" >0 count=\"%'u;\" size=\"%'llu\"/> bytes\n" 335 "<total type=\"cmemalign\" 0 count=\"%'u;\" >0 count=\"%'u;\" size=\"%'llu\"/> bytes\n" 336 "<total type=\"resize\" 0 count=\"%'u;\" >0 count=\"%'u;\" size=\"%'llu\"/> bytes\n" 337 "<total type=\"realloc\" 0 count=\"%'u;\" >0 count=\"%'u;\" size=\"%'llu\"/> bytes\n" 338 "<total type=\"free\" 0 count=\"%'u;\" >0 count=\"%'u;\" size=\"%'llu\"/> bytes\n" 339 "<total type=\"mmap\" count=\"%'u;\" size=\"%'llu\"/> bytes\n" 340 "<total type=\"munmap\" count=\"%'u;\" size=\"%'llu\"/> bytes\n" 341 "<total type=\"sbrk\" count=\"%'u;\" size=\"%'llu\"/> bytes\n" 342 342 "</malloc>", 343 malloc_ calls, malloc_storage,344 aalloc_ calls, aalloc_storage,345 calloc_ calls, calloc_storage,346 memalign_ calls, memalign_storage,347 amemalign_ calls, amemalign_storage,348 cmemalign_ calls, cmemalign_storage,349 resize_ calls, resize_storage,350 realloc_ calls, realloc_storage,351 free_ calls, free_storage,343 malloc_zero_calls, malloc_calls, malloc_storage, 344 aalloc_zero_calls, aalloc_calls, aalloc_storage, 345 calloc_zero_calls, calloc_calls, calloc_storage, 346 memalign_zero_calls, memalign_calls, memalign_storage, 347 amemalign_zero_calls, amemalign_calls, amemalign_storage, 348 cmemalign_zero_calls, cmemalign_calls, cmemalign_storage, 349 resize_zero_calls, resize_calls, resize_storage, 350 realloc_zero_calls, realloc_calls, realloc_storage, 351 free_zero_calls, free_calls, free_storage, 352 352 mmap_calls, mmap_storage, 353 353 munmap_calls, munmap_storage, … … 466 466 } // headers 467 467 468 #ifdef __CFA_DEBUG__469 #if __SIZEOF_POINTER__ == 4470 #define MASK 0xdeadbeef471 #else472 #define MASK 0xdeadbeefdeadbeef473 #endif474 #define STRIDE size_t475 476 static void * Memset( void * addr, STRIDE size ) { // debug only477 if ( size % sizeof(STRIDE) != 0 ) abort( "Memset() : internal error, size %zd not multiple of %zd.", size, sizeof(STRIDE) );478 if ( (STRIDE)addr % sizeof(STRIDE) != 0 ) abort( "Memset() : internal error, addr %p not multiple of %zd.", addr, sizeof(STRIDE) );479 480 STRIDE * end = (STRIDE *)addr + size / sizeof(STRIDE);481 for ( STRIDE * p = (STRIDE *)addr; p < end; p += 1 ) *p = MASK;482 return addr;483 } // Memset484 #endif // __CFA_DEBUG__468 // #ifdef __CFA_DEBUG__ 469 // #if __SIZEOF_POINTER__ == 4 470 // #define MASK 0xdeadbeef 471 // #else 472 // #define MASK 0xdeadbeefdeadbeef 473 // #endif 474 // #define STRIDE size_t 475 476 // static void * Memset( void * addr, STRIDE size ) { // debug only 477 // if ( size % sizeof(STRIDE) != 0 ) abort( "Memset() : internal error, size %zd not multiple of %zd.", size, sizeof(STRIDE) ); 478 // if ( (STRIDE)addr % sizeof(STRIDE) != 0 ) abort( "Memset() : internal error, addr %p not multiple of %zd.", addr, sizeof(STRIDE) ); 479 480 // STRIDE * end = (STRIDE *)addr + size / sizeof(STRIDE); 481 // for ( STRIDE * p = (STRIDE *)addr; p < end; p += 1 ) *p = MASK; 482 // return addr; 483 // } // Memset 484 // #endif // __CFA_DEBUG__ 485 485 486 486 … … 498 498 unlock( extlock ); 499 499 __cfaabi_bits_print_nolock( STDERR_FILENO, NO_MEMORY_MSG, size ); 500 _exit( EXIT_FAILURE ); 501 } // if 500 _exit( EXIT_FAILURE ); // give up 501 } // if 502 // Make storage executable for thunks. 502 503 if ( mprotect( (char *)heapEnd + heapRemaining, increase, __map_prot ) ) { 503 504 unlock( extlock ); … … 770 771 771 772 772 static inline void * callocNoStats( size_t dim, size_t elemSize ) {773 size_t size = dim * elemSize;774 if ( unlikely( size ) == 0 ) return 0p; // 0 BYTE ALLOCATION RETURNS NULL POINTER775 char * addr = (char *)mallocNoStats( size );776 777 HeapManager.Storage.Header * header;778 HeapManager.FreeHeader * freeElem;779 size_t bsize, alignment;780 #ifndef __CFA_DEBUG__781 bool mapped =782 #endif // __CFA_DEBUG__783 headers( "calloc", addr, header, freeElem, bsize, alignment );784 #ifndef __CFA_DEBUG__785 786 // Mapped storage is zero filled, but in debug mode mapped memory is scrubbed in doMalloc, so it has to be reset to zero.787 if ( ! mapped )788 #endif // __CFA_DEBUG__789 // <-------0000000000000000000000000000UUUUUUUUUUUUUUUUUUUUUUUUU> bsize (bucket size) U => undefined790 // `-header`-addr `-size791 memset( addr, '\0', size ); // set to zeros792 793 header->kind.real.blockSize |= 2; // mark as zero filled794 return addr;795 } // callocNoStats796 797 798 773 static inline void * memalignNoStats( size_t alignment, size_t size ) { 799 774 if ( unlikely( size ) == 0 ) return 0p; // 0 BYTE ALLOCATION RETURNS NULL POINTER … … 834 809 835 810 836 static inline void * cmemalignNoStats( size_t alignment, size_t dim, size_t elemSize ) {837 size_t size = dim * elemSize;838 if ( unlikely( size ) == 0 ) return 0p; // 0 BYTE ALLOCATION RETURNS NULL POINTER839 char * addr = (char *)memalignNoStats( alignment, size );840 841 HeapManager.Storage.Header * header;842 HeapManager.FreeHeader * freeElem;843 size_t bsize;844 #ifndef __CFA_DEBUG__845 bool mapped =846 #endif // __CFA_DEBUG__847 headers( "cmemalign", addr, header, freeElem, bsize, alignment );848 849 // Mapped storage is zero filled, but in debug mode mapped memory is scrubbed in doMalloc, so it has to be reset to zero.850 #ifndef __CFA_DEBUG__851 if ( ! mapped )852 #endif // __CFA_DEBUG__853 // <-------0000000000000000000000000000UUUUUUUUUUUUUUUUUUUUUUUUU> bsize (bucket size) U => undefined854 // `-header`-addr `-size855 memset( addr, '\0', size ); // set to zeros856 857 header->kind.real.blockSize |= 2; // mark as zero filled858 return addr;859 } // cmemalignNoStats860 861 862 811 extern "C" { 863 812 // Allocates size bytes and returns a pointer to the allocated memory. The contents are undefined. If size is 0, … … 865 814 void * malloc( size_t size ) { 866 815 #ifdef __STATISTICS__ 867 __atomic_add_fetch( &malloc_calls, 1, __ATOMIC_SEQ_CST ); 868 __atomic_add_fetch( &malloc_storage, size, __ATOMIC_SEQ_CST ); 816 if ( likely( size > 0 ) ) { 817 __atomic_add_fetch( &malloc_calls, 1, __ATOMIC_SEQ_CST ); 818 __atomic_add_fetch( &malloc_storage, size, __ATOMIC_SEQ_CST ); 819 } else { 820 __atomic_add_fetch( &malloc_zero_calls, 1, __ATOMIC_SEQ_CST ); 821 } // if 869 822 #endif // __STATISTICS__ 870 823 … … 877 830 size_t size = dim * elemSize; 878 831 #ifdef __STATISTICS__ 879 __atomic_add_fetch( &aalloc_calls, 1, __ATOMIC_SEQ_CST ); 880 __atomic_add_fetch( &aalloc_storage, size, __ATOMIC_SEQ_CST ); 832 if ( likely( size > 0 ) ) { 833 __atomic_add_fetch( &aalloc_calls, 1, __ATOMIC_SEQ_CST ); 834 __atomic_add_fetch( &aalloc_storage, size, __ATOMIC_SEQ_CST ); 835 } else { 836 __atomic_add_fetch( &aalloc_zero_calls, 1, __ATOMIC_SEQ_CST ); 837 } // if 881 838 #endif // __STATISTICS__ 882 839 … … 887 844 // Same as aalloc() with memory set to zero. 888 845 void * calloc( size_t dim, size_t elemSize ) { 846 size_t size = dim * elemSize; 847 if ( unlikely( size ) == 0 ) { // 0 BYTE ALLOCATION RETURNS NULL POINTER 848 #ifdef __STATISTICS__ 849 __atomic_add_fetch( &calloc_zero_calls, 1, __ATOMIC_SEQ_CST ); 850 #endif // __STATISTICS__ 851 return 0p; 852 } // if 889 853 #ifdef __STATISTICS__ 890 854 __atomic_add_fetch( &calloc_calls, 1, __ATOMIC_SEQ_CST ); … … 892 856 #endif // __STATISTICS__ 893 857 894 return callocNoStats( dim, elemSize ); 858 char * addr = (char *)mallocNoStats( size ); 859 860 HeapManager.Storage.Header * header; 861 HeapManager.FreeHeader * freeElem; 862 size_t bsize, alignment; 863 864 #ifndef __CFA_DEBUG__ 865 bool mapped = 866 #endif // __CFA_DEBUG__ 867 headers( "calloc", addr, header, freeElem, bsize, alignment ); 868 869 #ifndef __CFA_DEBUG__ 870 // Mapped storage is zero filled, but in debug mode mapped memory is scrubbed in doMalloc, so it has to be reset to zero. 871 if ( ! mapped ) 872 #endif // __CFA_DEBUG__ 873 // <-------0000000000000000000000000000UUUUUUUUUUUUUUUUUUUUUUUUU> bsize (bucket size) U => undefined 874 // `-header`-addr `-size 875 memset( addr, '\0', size ); // set to zeros 876 877 header->kind.real.blockSize |= 2; // mark as zero filled 878 return addr; 895 879 } // calloc 896 880 … … 901 885 // call to malloc(), alloc(), calloc() or realloc(). If the area pointed to was moved, a free(oaddr) is done. 902 886 void * resize( void * oaddr, size_t size ) { 887 // If size is equal to 0, either NULL or a pointer suitable to be passed to free() is returned. 888 if ( unlikely( size == 0 ) ) { // special cases 889 #ifdef __STATISTICS__ 890 __atomic_add_fetch( &resize_zero_calls, 1, __ATOMIC_SEQ_CST ); 891 #endif // __STATISTICS__ 892 free( oaddr ); 893 return 0p; 894 } // if 903 895 #ifdef __STATISTICS__ 904 896 __atomic_add_fetch( &resize_calls, 1, __ATOMIC_SEQ_CST ); 905 897 #endif // __STATISTICS__ 906 898 907 // If size is equal to 0, either NULL or a pointer suitable to be passed to free() is returned.908 if ( unlikely( size == 0 ) ) { free( oaddr ); return 0p; } // special cases909 899 if ( unlikely( oaddr == 0p ) ) { 910 900 #ifdef __STATISTICS__ … … 918 908 size_t bsize, oalign; 919 909 headers( "resize", oaddr, header, freeElem, bsize, oalign ); 910 920 911 size_t odsize = dataStorage( bsize, oaddr, header ); // data storage available in bucket 921 922 912 // same size, DO NOT preserve STICKY PROPERTIES. 923 913 if ( oalign == libAlign() && size <= odsize && odsize <= size * 2 ) { // allow 50% wasted storage for smaller size … … 940 930 // the old and new sizes. 941 931 void * realloc( void * oaddr, size_t size ) { 932 // If size is equal to 0, either NULL or a pointer suitable to be passed to free() is returned. 933 if ( unlikely( size == 0 ) ) { // special cases 934 #ifdef __STATISTICS__ 935 __atomic_add_fetch( &realloc_zero_calls, 1, __ATOMIC_SEQ_CST ); 936 #endif // __STATISTICS__ 937 free( oaddr ); 938 return 0p; 939 } // if 942 940 #ifdef __STATISTICS__ 943 941 __atomic_add_fetch( &realloc_calls, 1, __ATOMIC_SEQ_CST ); 944 942 #endif // __STATISTICS__ 945 943 946 // If size is equal to 0, either NULL or a pointer suitable to be passed to free() is returned.947 if ( unlikely( size == 0 ) ) { free( oaddr ); return 0p; } // special cases948 944 if ( unlikely( oaddr == 0p ) ) { 949 945 #ifdef __STATISTICS__ … … 999 995 void * memalign( size_t alignment, size_t size ) { 1000 996 #ifdef __STATISTICS__ 1001 __atomic_add_fetch( &memalign_calls, 1, __ATOMIC_SEQ_CST ); 1002 __atomic_add_fetch( &memalign_storage, size, __ATOMIC_SEQ_CST ); 997 if ( likely( size > 0 ) ) { 998 __atomic_add_fetch( &memalign_calls, 1, __ATOMIC_SEQ_CST ); 999 __atomic_add_fetch( &memalign_storage, size, __ATOMIC_SEQ_CST ); 1000 } else { 1001 __atomic_add_fetch( &memalign_zero_calls, 1, __ATOMIC_SEQ_CST ); 1002 } // if 1003 1003 #endif // __STATISTICS__ 1004 1004 … … 1011 1011 size_t size = dim * elemSize; 1012 1012 #ifdef __STATISTICS__ 1013 __atomic_add_fetch( &cmemalign_calls, 1, __ATOMIC_SEQ_CST ); 1014 __atomic_add_fetch( &cmemalign_storage, size, __ATOMIC_SEQ_CST ); 1013 if ( likely( size > 0 ) ) { 1014 __atomic_add_fetch( &cmemalign_calls, 1, __ATOMIC_SEQ_CST ); 1015 __atomic_add_fetch( &cmemalign_storage, size, __ATOMIC_SEQ_CST ); 1016 } else { 1017 __atomic_add_fetch( &cmemalign_zero_calls, 1, __ATOMIC_SEQ_CST ); 1018 } // if 1015 1019 #endif // __STATISTICS__ 1016 1020 … … 1021 1025 // Same as calloc() with memory alignment. 1022 1026 void * cmemalign( size_t alignment, size_t dim, size_t elemSize ) { 1027 size_t size = dim * elemSize; 1028 if ( unlikely( size ) == 0 ) { // 0 BYTE ALLOCATION RETURNS NULL POINTER 1029 #ifdef __STATISTICS__ 1030 __atomic_add_fetch( &cmemalign_zero_calls, 1, __ATOMIC_SEQ_CST ); 1031 #endif // __STATISTICS__ 1032 return 0p; 1033 } // if 1023 1034 #ifdef __STATISTICS__ 1024 1035 __atomic_add_fetch( &cmemalign_calls, 1, __ATOMIC_SEQ_CST ); … … 1026 1037 #endif // __STATISTICS__ 1027 1038 1028 return cmemalignNoStats( alignment, dim, elemSize ); 1039 char * addr = (char *)memalignNoStats( alignment, size ); 1040 1041 HeapManager.Storage.Header * header; 1042 HeapManager.FreeHeader * freeElem; 1043 size_t bsize; 1044 1045 #ifndef __CFA_DEBUG__ 1046 bool mapped = 1047 #endif // __CFA_DEBUG__ 1048 headers( "cmemalign", addr, header, freeElem, bsize, alignment ); 1049 1050 // Mapped storage is zero filled, but in debug mode mapped memory is scrubbed in doMalloc, so it has to be reset to zero. 1051 #ifndef __CFA_DEBUG__ 1052 if ( ! mapped ) 1053 #endif // __CFA_DEBUG__ 1054 // <-------0000000000000000000000000000UUUUUUUUUUUUUUUUUUUUUUUUU> bsize (bucket size) U => undefined 1055 // `-header`-addr `-size 1056 memset( addr, '\0', size ); // set to zeros 1057 1058 header->kind.real.blockSize |= 2; // mark as zero filled 1059 return addr; 1029 1060 } // cmemalign 1030 1061 … … 1065 1096 // 0p, no operation is performed. 1066 1097 void free( void * addr ) { 1067 #ifdef __STATISTICS__1068 __atomic_add_fetch( &free_calls, 1, __ATOMIC_SEQ_CST );1069 #endif // __STATISTICS__1070 1071 1098 if ( unlikely( addr == 0p ) ) { // special case 1099 #ifdef __STATISTICS__ 1100 __atomic_add_fetch( &free_zero_calls, 1, __ATOMIC_SEQ_CST ); 1101 #endif // __STATISTICS__ 1102 1072 1103 // #ifdef __CFA_DEBUG__ 1073 1104 // if ( traceHeap() ) { … … 1182 1213 int malloc_stats_fd( int fd __attribute__(( unused )) ) { 1183 1214 #ifdef __STATISTICS__ 1184 int temp = stat _fd;1185 stat _fd = fd;1215 int temp = stats_fd; 1216 stats_fd = fd; 1186 1217 return temp; 1187 1218 #else … … 1214 1245 // The string is printed on the file stream stream. The exported string includes information about all arenas (see 1215 1246 // malloc). 1216 int malloc_info( int options, FILE * stream ) {1247 int malloc_info( int options, FILE * stream __attribute__(( unused )) ) { 1217 1248 if ( options != 0 ) { errno = EINVAL; return -1; } 1218 1249 #ifdef __STATISTICS__ … … 1243 1274 // Must have CFA linkage to overload with C linkage realloc. 1244 1275 void * resize( void * oaddr, size_t nalign, size_t size ) { 1245 #ifdef __STATISTICS__ 1246 __atomic_add_fetch( &resize_calls, 1, __ATOMIC_SEQ_CST ); 1247 #endif // __STATISTICS__ 1276 // If size is equal to 0, either NULL or a pointer suitable to be passed to free() is returned. 1277 if ( unlikely( size == 0 ) ) { // special cases 1278 #ifdef __STATISTICS__ 1279 __atomic_add_fetch( &resize_zero_calls, 1, __ATOMIC_SEQ_CST ); 1280 #endif // __STATISTICS__ 1281 free( oaddr ); 1282 return 0p; 1283 } // if 1248 1284 1249 1285 if ( unlikely( nalign < libAlign() ) ) nalign = libAlign(); // reset alignment to minimum 1250 1286 #ifdef __CFA_DEBUG__ 1251 else 1252 checkAlign( nalign ); // check alignment 1287 else checkAlign( nalign ); // check alignment 1253 1288 #endif // __CFA_DEBUG__ 1254 1289 1255 // If size is equal to 0, either NULL or a pointer suitable to be passed to free() is returned.1256 if ( unlikely( size == 0 ) ) { free( oaddr ); return 0p; } // special cases1257 1290 if ( unlikely( oaddr == 0p ) ) { 1258 1291 #ifdef __STATISTICS__ 1292 __atomic_add_fetch( &resize_calls, 1, __ATOMIC_SEQ_CST ); 1259 1293 __atomic_add_fetch( &resize_storage, size, __ATOMIC_SEQ_CST ); 1260 1294 #endif // __STATISTICS__ … … 1302 1336 1303 1337 void * realloc( void * oaddr, size_t nalign, size_t size ) { 1338 // If size is equal to 0, either NULL or a pointer suitable to be passed to free() is returned. 1339 if ( unlikely( size == 0 ) ) { // special cases 1340 #ifdef __STATISTICS__ 1341 __atomic_add_fetch( &realloc_zero_calls, 1, __ATOMIC_SEQ_CST ); 1342 #endif // __STATISTICS__ 1343 free( oaddr ); 1344 return 0p; 1345 } // if 1346 1304 1347 if ( unlikely( nalign < libAlign() ) ) nalign = libAlign(); // reset alignment to minimum 1305 1348 #ifdef __CFA_DEBUG__ 1306 else 1307 checkAlign( nalign ); // check alignment 1349 else checkAlign( nalign ); // check alignment 1308 1350 #endif // __CFA_DEBUG__ 1309 1351 1310 // If size is equal to 0, either NULL or a pointer suitable to be passed to free() is returned.1311 if ( unlikely( size == 0 ) ) { free( oaddr ); return 0p; } // special cases1312 1352 if ( unlikely( oaddr == 0p ) ) { 1313 1353 #ifdef __STATISTICS__ -
libcfa/src/startup.cfa
r4468a70 r342af53 10 10 // Created On : Tue Jul 24 16:21:57 2018 11 11 // Last Modified By : Peter A. Buhr 12 // Last Modified On : Tue Feb 4 13:03:18 202013 // Update Count : 3 012 // Last Modified On : Sat Jan 9 23:18:23 2021 13 // Update Count : 34 14 14 // 15 15 16 #include <time.h> // tzset 17 #include <locale.h> // setlocale 16 #include <time.h> // tzset 17 #include <locale.h> // setlocale 18 #include <stdlib.h> // getenv 18 19 #include "startup.hfa" 19 20 … … 22 23 void __cfaabi_appready_startup( void ) { 23 24 tzset(); // initialize time global variables 24 setlocale( LC_NUMERIC, "");25 setlocale( LC_NUMERIC, getenv("LANG") ); 25 26 #ifdef __CFA_DEBUG__ 26 27 extern void heapAppStart(); -
src/AST/Decl.cpp
r4468a70 r342af53 10 10 // Created On : Thu May 9 10:00:00 2019 11 11 // Last Modified By : Peter A. Buhr 12 // Last Modified On : Fri Dec 13 16:23:15 201913 // Update Count : 2 012 // Last Modified On : Tue Jan 12 16:54:55 2021 13 // Update Count : 23 14 14 // 15 15 … … 78 78 79 79 const char * TypeDecl::typeString() const { 80 static const char * kindNames[] = { "sized data type", "sized object type", "sized function type", "sized tupletype" };81 static_assert( sizeof(kindNames) /sizeof(kindNames[0]) == TypeDecl::NUMBER_OF_KINDS, "typeString: kindNames is out of sync." );80 static const char * kindNames[] = { "sized data type", "sized data type", "sized object type", "sized function type", "sized tuple type", "sized array length type" }; 81 static_assert( sizeof(kindNames) / sizeof(kindNames[0]) == TypeDecl::NUMBER_OF_KINDS, "typeString: kindNames is out of sync." ); 82 82 assertf( kind < TypeDecl::NUMBER_OF_KINDS, "TypeDecl kind is out of bounds." ); 83 83 return sized ? kindNames[ kind ] : &kindNames[ kind ][ sizeof("sized") ]; // sizeof includes '\0' … … 85 85 86 86 const char * TypeDecl::genTypeString() const { 87 static const char * kindNames[] = { " dtype", "otype", "ftype", "ttype" };88 static_assert( sizeof(kindNames) /sizeof(kindNames[0]) == TypeDecl::NUMBER_OF_KINDS, "genTypeString: kindNames is out of sync." );87 static const char * kindNames[] = { "T &", "T *", "T", "(*)", "T ...", "[T]" }; 88 static_assert( sizeof(kindNames) / sizeof(kindNames[0]) == TypeDecl::NUMBER_OF_KINDS, "genTypeString: kindNames is out of sync." ); 89 89 assertf( kind < TypeDecl::NUMBER_OF_KINDS, "TypeDecl kind is out of bounds." ); 90 90 return kindNames[ kind ]; -
src/AST/Decl.hpp
r4468a70 r342af53 10 10 // Created On : Thu May 9 10:00:00 2019 11 11 // Last Modified By : Peter A. Buhr 12 // Last Modified On : Fri Dec 13 17:38:33 201913 // Update Count : 2912 // Last Modified On : Mon Jan 11 20:48:38 2021 13 // Update Count : 30 14 14 // 15 15 … … 175 175 class TypeDecl final : public NamedTypeDecl { 176 176 public: 177 enum Kind { Dtype, Otype, Ftype, Ttype, NUMBER_OF_KINDS };177 enum Kind { Dtype, DStype, Otype, Ftype, Ttype, ALtype, NUMBER_OF_KINDS }; 178 178 179 179 Kind kind; -
src/Parser/DeclarationNode.cc
r4468a70 r342af53 10 10 // Created On : Sat May 16 12:34:05 2015 11 11 // Last Modified By : Peter A. Buhr 12 // Last Modified On : Thu Oct 8 08:03:38 202013 // Update Count : 113 512 // Last Modified On : Mon Jan 11 20:58:07 2021 13 // Update Count : 1137 14 14 // 15 15 … … 1075 1075 if ( variable.tyClass != TypeDecl::NUMBER_OF_KINDS ) { 1076 1076 // otype is internally converted to dtype + otype parameters 1077 static const TypeDecl::Kind kindMap[] = { TypeDecl::Dtype, TypeDecl::D type, TypeDecl::Ftype, TypeDecl::Ttype };1078 static_assert( sizeof(kindMap) /sizeof(kindMap[0]) == TypeDecl::NUMBER_OF_KINDS, "DeclarationNode::build: kindMap is out of sync." );1077 static const TypeDecl::Kind kindMap[] = { TypeDecl::Dtype, TypeDecl::DStype, TypeDecl::Dtype, TypeDecl::Ftype, TypeDecl::Ttype, TypeDecl::ALtype }; 1078 static_assert( sizeof(kindMap) / sizeof(kindMap[0]) == TypeDecl::NUMBER_OF_KINDS, "DeclarationNode::build: kindMap is out of sync." ); 1079 1079 assertf( variable.tyClass < sizeof(kindMap)/sizeof(kindMap[0]), "Variable's tyClass is out of bounds." ); 1080 1080 TypeDecl * ret = new TypeDecl( *name, Type::StorageClasses(), nullptr, kindMap[ variable.tyClass ], variable.tyClass == TypeDecl::Otype, variable.initializer ? variable.initializer->buildType() : nullptr ); -
src/Parser/ParseNode.h
r4468a70 r342af53 10 10 // Created On : Sat May 16 13:28:16 2015 11 11 // Last Modified By : Peter A. Buhr 12 // Last Modified On : S at Oct 24 03:53:54 202013 // Update Count : 89 512 // Last Modified On : Sun Jan 3 18:23:01 2021 13 // Update Count : 896 14 14 // 15 15 … … 39 39 struct DeclarationNode; 40 40 class DeclarationWithType; 41 class Initializer; 41 42 class ExpressionNode; 42 class Initializer;43 43 struct StatementNode; 44 44 -
src/Parser/parser.yy
r4468a70 r342af53 10 10 // Created On : Sat Sep 1 20:22:55 2001 11 11 // Last Modified By : Peter A. Buhr 12 // Last Modified On : Sat Oct 24 08:21:14 202013 // Update Count : 46 2412 // Last Modified On : Mon Jan 11 21:32:10 2021 13 // Update Count : 4633 14 14 // 15 15 … … 329 329 %type<en> conditional_expression constant_expression assignment_expression assignment_expression_opt 330 330 %type<en> comma_expression comma_expression_opt 331 %type<en> argument_expression_list_opt argument_expression default_initialize _opt331 %type<en> argument_expression_list_opt argument_expression default_initializer_opt 332 332 %type<ifctl> if_control_expression 333 333 %type<fctl> for_control_expression for_control_expression_list … … 424 424 %type<decl> sue_declaration_specifier sue_declaration_specifier_nobody sue_type_specifier sue_type_specifier_nobody 425 425 426 %type<tclass> type_class 426 %type<tclass> type_class new_type_class 427 427 %type<decl> type_declarator type_declarator_name type_declaring_list 428 428 … … 1545 1545 | cfa_function_declaration 1546 1546 | type_declaring_list 1547 { SemanticError( yylloc, "otype declaration is currently unimplemented." ); $$ = nullptr; } 1547 1548 | trait_specifier 1548 1549 ; … … 2223 2224 ; 2224 2225 2225 cfa_parameter_ellipsis_list_opt: 2226 cfa_parameter_ellipsis_list_opt: // CFA, abstract + real 2226 2227 // empty 2227 2228 { $$ = DeclarationNode::newBasicType( DeclarationNode::Void ); } … … 2280 2281 cfa_parameter_declaration: // CFA, new & old style parameter declaration 2281 2282 parameter_declaration 2282 | cfa_identifier_parameter_declarator_no_tuple identifier_or_type_name default_initialize _opt2283 | cfa_identifier_parameter_declarator_no_tuple identifier_or_type_name default_initializer_opt 2283 2284 { $$ = $1->addName( $2 ); } 2284 | cfa_abstract_tuple identifier_or_type_name default_initialize _opt2285 | cfa_abstract_tuple identifier_or_type_name default_initializer_opt 2285 2286 // To obtain LR(1), these rules must be duplicated here (see cfa_abstract_declarator). 2286 2287 { $$ = $1->addName( $2 ); } 2287 | type_qualifier_list cfa_abstract_tuple identifier_or_type_name default_initialize _opt2288 | type_qualifier_list cfa_abstract_tuple identifier_or_type_name default_initializer_opt 2288 2289 { $$ = $2->addName( $3 )->addQualifiers( $1 ); } 2289 2290 | cfa_function_specifier … … 2302 2303 parameter_declaration: 2303 2304 // No SUE declaration in parameter list. 2304 declaration_specifier_nobody identifier_parameter_declarator default_initialize _opt2305 declaration_specifier_nobody identifier_parameter_declarator default_initializer_opt 2305 2306 { $$ = $2->addType( $1 )->addInitializer( $3 ? new InitializerNode( $3 ) : nullptr ); } 2306 | declaration_specifier_nobody type_parameter_redeclarator default_initialize _opt2307 | declaration_specifier_nobody type_parameter_redeclarator default_initializer_opt 2307 2308 { $$ = $2->addType( $1 )->addInitializer( $3 ? new InitializerNode( $3 ) : nullptr ); } 2308 2309 ; 2309 2310 2310 2311 abstract_parameter_declaration: 2311 declaration_specifier_nobody default_initialize _opt2312 declaration_specifier_nobody default_initializer_opt 2312 2313 { $$ = $1->addInitializer( $2 ? new InitializerNode( $2 ) : nullptr ); } 2313 | declaration_specifier_nobody abstract_parameter_declarator default_initialize _opt2314 | declaration_specifier_nobody abstract_parameter_declarator default_initializer_opt 2314 2315 { $$ = $2->addType( $1 )->addInitializer( $3 ? new InitializerNode( $3 ) : nullptr ); } 2315 2316 ; … … 2441 2442 type_class identifier_or_type_name 2442 2443 { typedefTable.addToScope( *$2, TYPEDEFname, "9" ); } 2443 type_initializer_opt assertion_list_opt2444 type_initializer_opt assertion_list_opt 2444 2445 { $$ = DeclarationNode::newTypeParam( $1, $2 )->addTypeInitializer( $4 )->addAssertions( $5 ); } 2445 | type_specifier identifier_parameter_declarator 2446 | identifier_or_type_name new_type_class 2447 { typedefTable.addToScope( *$1, TYPEDEFname, "9" ); } 2448 type_initializer_opt assertion_list_opt 2449 { $$ = DeclarationNode::newTypeParam( $2, $1 )->addTypeInitializer( $4 )->addAssertions( $5 ); } 2450 | '[' identifier_or_type_name ']' 2451 { 2452 typedefTable.addToScope( *$2, TYPEDEFname, "9" ); 2453 $$ = DeclarationNode::newTypeParam( TypeDecl::ALtype, $2 ); 2454 } 2455 // | type_specifier identifier_parameter_declarator 2446 2456 | assertion_list 2447 2457 { $$ = DeclarationNode::newTypeParam( TypeDecl::Dtype, new string( DeclarationNode::anonymous.newName() ) )->addAssertions( $1 ); } 2458 ; 2459 2460 new_type_class: // CFA 2461 // empty 2462 { $$ = TypeDecl::Otype; } 2463 | '&' 2464 { $$ = TypeDecl::Dtype; } 2465 | '*' 2466 { $$ = TypeDecl::DStype; } // dtype + sized 2467 | ELLIPSIS 2468 { $$ = TypeDecl::Ttype; } 2448 2469 ; 2449 2470 … … 3476 3497 ; 3477 3498 3478 default_initialize _opt:3499 default_initializer_opt: 3479 3500 // empty 3480 3501 { $$ = nullptr; } -
src/ResolvExpr/ResolveAssertions.cc
r4468a70 r342af53 397 397 398 398 /// Limit to depth of recursion of assertion satisfaction 399 static const int recursionLimit = 4;399 static const int recursionLimit = 7; 400 400 /// Maximum number of simultaneously-deferred assertions to attempt concurrent satisfaction of 401 401 static const int deferLimit = 10; -
src/ResolvExpr/SatisfyAssertions.cpp
r4468a70 r342af53 57 57 ast::UniqueId resnSlot; ///< Slot for any recursive assertion IDs 58 58 59 AssnCandidate( 60 const ast::SymbolTable::IdData c, const ast::Type * at, ast::TypeEnvironment && e, 59 AssnCandidate( 60 const ast::SymbolTable::IdData c, const ast::Type * at, ast::TypeEnvironment && e, 61 61 ast::AssertionSet && h, ast::AssertionSet && n, ast::OpenVarSet && o, ast::UniqueId rs ) 62 : cdata( c ), adjType( at ), env( std::move( e ) ), have( std::move( h ) ), 62 : cdata( c ), adjType( at ), env( std::move( e ) ), have( std::move( h ) ), 63 63 need( std::move( n ) ), open( std::move( o ) ), resnSlot( rs ) {} 64 64 }; … … 73 73 const AssnCandidate & match; 74 74 }; 75 76 /// Wrapper for the deferred items from a single assertion satisfaction. 75 76 /// Wrapper for the deferred items from a single assertion satisfaction. 77 77 /// Acts like an indexed list of DeferRef 78 78 struct DeferItem { … … 81 81 AssnCandidateList matches; 82 82 83 DeferItem( 83 DeferItem( 84 84 const ast::VariableExpr * d, const ast::AssertionSetValue & i, AssnCandidateList && ms ) 85 85 : expr( d ), info( i ), matches( std::move( ms ) ) {} … … 117 117 /// Initial satisfaction state for a candidate 118 118 SatState( CandidateRef & c, const ast::SymbolTable & syms ) 119 : cand( c ), need(), newNeed(), deferred(), inferred(), costs{ Cost::zero }, 119 : cand( c ), need(), newNeed(), deferred(), inferred(), costs{ Cost::zero }, 120 120 symtab( syms ) { need.swap( c->need ); } 121 121 122 122 /// Update satisfaction state for next step after previous state 123 123 SatState( SatState && o, IterateFlag ) 124 : cand( std::move( o.cand ) ), need( o.newNeed.begin(), o.newNeed.end() ), newNeed(), 125 deferred(), inferred( std::move( o.inferred ) ), costs( std::move( o.costs ) ), 124 : cand( std::move( o.cand ) ), need( o.newNeed.begin(), o.newNeed.end() ), newNeed(), 125 deferred(), inferred( std::move( o.inferred ) ), costs( std::move( o.costs ) ), 126 126 symtab( o.symtab ) { costs.emplace_back( Cost::zero ); } 127 127 128 128 /// Field-wise next step constructor 129 129 SatState( 130 CandidateRef && c, ast::AssertionSet && nn, InferCache && i, CostVec && cs, 130 CandidateRef && c, ast::AssertionSet && nn, InferCache && i, CostVec && cs, 131 131 ast::SymbolTable && syms ) 132 : cand( std::move( c ) ), need( nn.begin(), nn.end() ), newNeed(), deferred(), 132 : cand( std::move( c ) ), need( nn.begin(), nn.end() ), newNeed(), deferred(), 133 133 inferred( std::move( i ) ), costs( std::move( cs ) ), symtab( std::move( syms ) ) 134 134 { costs.emplace_back( Cost::zero ); } … … 143 143 144 144 /// Binds a single assertion, updating satisfaction state 145 void bindAssertion( 146 const ast::VariableExpr * expr, const ast::AssertionSetValue & info, CandidateRef & cand, 145 void bindAssertion( 146 const ast::VariableExpr * expr, const ast::AssertionSetValue & info, CandidateRef & cand, 147 147 AssnCandidate & match, InferCache & inferred 148 148 ) { 149 149 const ast::DeclWithType * candidate = match.cdata.id; 150 assertf( candidate->uniqueId, 150 assertf( candidate->uniqueId, 151 151 "Assertion candidate does not have a unique ID: %s", toString( candidate ).c_str() ); 152 152 153 153 ast::Expr * varExpr = match.cdata.combine( cand->expr->location, cand->cvtCost ); 154 154 varExpr->result = match.adjType; … … 201 201 ast::OpenVarSet newOpen{ sat.cand->open }; 202 202 ast::ptr< ast::Type > toType = assn.first->result; 203 ast::ptr< ast::Type > adjType = 203 ast::ptr< ast::Type > adjType = 204 204 renameTyVars( adjustExprType( candidate->get_type(), newEnv, sat.symtab ), GEN_USAGE, false ); 205 205 … … 213 213 } 214 214 215 matches.emplace_back( 215 matches.emplace_back( 216 216 cdata, adjType, std::move( newEnv ), std::move( have ), std::move( newNeed ), 217 217 std::move( newOpen ), crntResnSlot ); … … 287 287 }; 288 288 289 /// Replace ResnSlots with InferParams and add alternative to output list, if it meets pruning 289 /// Replace ResnSlots with InferParams and add alternative to output list, if it meets pruning 290 290 /// threshold. 291 void finalizeAssertions( 292 CandidateRef & cand, InferCache & inferred, PruneMap & thresholds, CostVec && costs, 293 CandidateList & out 291 void finalizeAssertions( 292 CandidateRef & cand, InferCache & inferred, PruneMap & thresholds, CostVec && costs, 293 CandidateList & out 294 294 ) { 295 295 // prune if cheaper alternative for same key has already been generated … … 308 308 } 309 309 310 /// Combo iterator that combines candidates into an output list, merging their environments. 311 /// Rejects an appended candidate if environments cannot be merged. See `Common/FilterCombos.h` 310 /// Combo iterator that combines candidates into an output list, merging their environments. 311 /// Rejects an appended candidate if environments cannot be merged. See `Common/FilterCombos.h` 312 312 /// for description of "combo iterator". 313 313 class CandidateEnvMerger { … … 390 390 } // anonymous namespace 391 391 392 void satisfyAssertions( 393 CandidateRef & cand, const ast::SymbolTable & symtab, CandidateList & out, 392 void satisfyAssertions( 393 CandidateRef & cand, const ast::SymbolTable & symtab, CandidateList & out, 394 394 std::vector<std::string> & errors 395 395 ) { … … 418 418 419 419 // should a limit be imposed? worst case here is O(n^2) but very unlikely to happen. 420 for (unsigned resetCount = 0; ; ++resetCount) { 420 for (unsigned resetCount = 0; ; ++resetCount) { 421 421 ast::AssertionList next; 422 422 resetTyVarRenaming(); … … 449 449 // either add successful match or push back next state 450 450 if ( sat.newNeed.empty() ) { 451 finalizeAssertions( 451 finalizeAssertions( 452 452 sat.cand, sat.inferred, thresholds, std::move( sat.costs ), out ); 453 453 } else { … … 471 471 std::vector< CandidateEnvMerger::OutType > compatible = filterCombos( 472 472 sat.deferred, CandidateEnvMerger{ sat.cand->env, sat.cand->open, sat.symtab } ); 473 473 474 474 // fail early if no mutually-compatible assertion satisfaction 475 475 if ( compatible.empty() ) { … … 494 494 // set up next satisfaction state 495 495 CandidateRef nextCand = std::make_shared<Candidate>( 496 sat.cand->expr, std::move( compat.env ), std::move( compat.open ), 496 sat.cand->expr, std::move( compat.env ), std::move( compat.open ), 497 497 ast::AssertionSet{} /* need moved into satisfaction state */, 498 498 sat.cand->cost, sat.cand->cvtCost ); … … 500 500 ast::AssertionSet nextNewNeed{ sat.newNeed }; 501 501 InferCache nextInferred{ sat.inferred }; 502 502 503 503 CostVec nextCosts{ sat.costs }; 504 504 nextCosts.back() += compat.cost; 505 505 506 506 ast::SymbolTable nextSymtab{ sat.symtab }; 507 507 … … 517 517 // either add successful match or push back next state 518 518 if ( nextNewNeed.empty() ) { 519 finalizeAssertions( 519 finalizeAssertions( 520 520 nextCand, nextInferred, thresholds, std::move( nextCosts ), out ); 521 521 } else { 522 nextSats.emplace_back( 523 std::move( nextCand ), std::move( nextNewNeed ), 524 std::move( nextInferred ), std::move( nextCosts ), 522 nextSats.emplace_back( 523 std::move( nextCand ), std::move( nextNewNeed ), 524 std::move( nextInferred ), std::move( nextCosts ), 525 525 std::move( nextSymtab ) ); 526 526 } … … 534 534 nextSats.clear(); 535 535 } 536 536 537 537 // exceeded recursion limit if reaches here 538 538 if ( out.empty() ) { -
src/SymTab/Demangle.cc
r4468a70 r342af53 10 10 // Created On : Thu Jul 19 12:52:41 2018 11 11 // Last Modified By : Peter A. Buhr 12 // Last Modified On : Tue Feb 11 15:09:18 202013 // Update Count : 1 012 // Last Modified On : Mon Jan 11 21:28:27 2021 13 // Update Count : 11 14 14 // 15 15 … … 367 367 // type variable types 368 368 for (size_t k = 0; k < TypeDecl::NUMBER_OF_KINDS; ++k) { 369 static const std::string typeVariableNames[] = { "DT", " OT", "FT", "TT", };369 static const std::string typeVariableNames[] = { "DT", "DST", "OT", "FT", "TT", "ALT", }; 370 370 static_assert( 371 371 sizeof(typeVariableNames)/sizeof(typeVariableNames[0]) == TypeDecl::NUMBER_OF_KINDS, -
src/SymTab/Mangler.cc
r4468a70 r342af53 10 10 // Created On : Sun May 17 21:40:29 2015 11 11 // Last Modified By : Peter A. Buhr 12 // Last Modified On : Wed Nov 18 12:01:38 202013 // Update Count : 6412 // Last Modified On : Mon Jan 11 21:56:06 2021 13 // Update Count : 74 14 14 // 15 15 #include "Mangler.h" … … 313 313 // and the case has not yet come up in practice. Alternatively, if not then this code can be removed 314 314 // aside from the assert false. 315 assertf( false, "Mangler_old should not visit typedecl: %s", toCString(decl));315 assertf( false, "Mangler_old should not visit typedecl: %s", toCString(decl)); 316 316 assertf( decl->kind < TypeDecl::NUMBER_OF_KINDS, "Unhandled type variable kind: %d", decl->kind ); 317 317 mangleName += Encoding::typeVariables[ decl->kind ] + std::to_string( decl->name.length() ) + decl->name; … … 343 343 break; 344 344 default: 345 assert ( false);345 assertf( false, "unimplemented kind for type variable %s", SymTab::Mangler::Encoding::typeVariables[i->kind].c_str() ); 346 346 } // switch 347 347 varNums[ i->name ] = std::make_pair( nextVarNum, (int)i->kind ); … … 673 673 for ( auto & decl : ptype->forall ) { 674 674 switch ( decl->kind ) { 675 case ast::TypeDecl::Kind::Dtype:675 case ast::TypeDecl::Kind::Dtype: 676 676 dcount++; 677 677 break; 678 case ast::TypeDecl::Kind::Ftype:678 case ast::TypeDecl::Kind::Ftype: 679 679 fcount++; 680 680 break; 681 case ast::TypeDecl::Kind::Ttype:681 case ast::TypeDecl::Kind::Ttype: 682 682 vcount++; 683 683 break; 684 default:685 assert ( false);684 default: 685 assertf( false, "unimplemented kind for type variable %s", SymTab::Mangler::Encoding::typeVariables[decl->kind].c_str() ); 686 686 } // switch 687 687 varNums[ decl->name ] = std::make_pair( nextVarNum, (int)decl->kind ); -
src/SymTab/ManglerCommon.cc
r4468a70 r342af53 10 10 // Created On : Sun May 17 21:44:03 2015 11 11 // Last Modified By : Peter A. Buhr 12 // Last Modified On : Fri Dec 13 14:54:38 201913 // Update Count : 2 812 // Last Modified On : Mon Jan 11 21:23:10 2021 13 // Update Count : 29 14 14 // 15 15 … … 104 104 const std::string typeVariables[] = { 105 105 "BD", // dtype 106 "BDS", // dtype + sized 106 107 "BO", // otype 107 108 "BF", // ftype 108 109 "BT", // ttype 110 "BAL", // array length type 109 111 }; 110 112 static_assert( 111 sizeof(typeVariables) /sizeof(typeVariables[0]) == TypeDecl::NUMBER_OF_KINDS,113 sizeof(typeVariables) / sizeof(typeVariables[0]) == TypeDecl::NUMBER_OF_KINDS, 112 114 "Each type variable kind should have a corresponding mangler prefix" 113 115 ); -
src/SynTree/Declaration.h
r4468a70 r342af53 10 10 // Created On : Mon May 18 07:44:20 2015 11 11 // Last Modified By : Peter A. Buhr 12 // Last Modified On : Fri Dec 13 23:11:22 201913 // Update Count : 15 712 // Last Modified On : Mon Jan 11 20:48:39 2021 13 // Update Count : 158 14 14 // 15 15 … … 201 201 typedef NamedTypeDecl Parent; 202 202 public: 203 enum Kind { Dtype, Otype, Ftype, Ttype, NUMBER_OF_KINDS };203 enum Kind { Dtype, DStype, Otype, Ftype, Ttype, ALtype, NUMBER_OF_KINDS }; 204 204 205 205 Kind kind; -
src/SynTree/TypeDecl.cc
r4468a70 r342af53 10 10 // Created On : Mon May 18 07:44:20 2015 11 11 // Last Modified By : Peter A. Buhr 12 // Last Modified On : T hu Oct 8 18:18:55 202013 // Update Count : 2 212 // Last Modified On : Tue Jan 12 16:07:33 2021 13 // Update Count : 26 14 14 // 15 15 … … 33 33 34 34 const char * TypeDecl::typeString() const { 35 static const char * kindNames[] = { "sized data type", "sized object type", "sized function type", "sized tupletype" };36 static_assert( sizeof(kindNames) /sizeof(kindNames[0]) == TypeDecl::NUMBER_OF_KINDS, "typeString: kindNames is out of sync." );35 static const char * kindNames[] = { "sized data type", "sized data type", "sized object type", "sized function type", "sized tuple type", "sized array length type" }; 36 static_assert( sizeof(kindNames) / sizeof(kindNames[0]) == TypeDecl::NUMBER_OF_KINDS, "typeString: kindNames is out of sync." ); 37 37 assertf( kind < TypeDecl::NUMBER_OF_KINDS, "TypeDecl kind is out of bounds." ); 38 38 return isComplete() ? kindNames[ kind ] : &kindNames[ kind ][ sizeof("sized") ]; // sizeof includes '\0' … … 40 40 41 41 const char * TypeDecl::genTypeString() const { 42 static const char * kindNames[] = { " dtype", "otype", "ftype", "ttype" };43 static_assert( sizeof(kindNames) /sizeof(kindNames[0]) == TypeDecl::NUMBER_OF_KINDS, "genTypeString: kindNames is out of sync." );42 static const char * kindNames[] = { "T &", "T *", "T", "(*)", "T ...", "[T]" }; 43 static_assert( sizeof(kindNames) / sizeof(kindNames[0]) == TypeDecl::NUMBER_OF_KINDS, "genTypeString: kindNames is out of sync." ); 44 44 assertf( kind < TypeDecl::NUMBER_OF_KINDS, "TypeDecl kind is out of bounds." ); 45 45 return kindNames[ kind ]; -
tests/Makefile.am
r4468a70 r342af53 103 103 104 104 mostlyclean-local : 105 find ${builddir} -not -path './__pycache__/*' -path '*.o' -delete 106 find ${builddir} -not -path './__pycache__/*' -path '*/.err/*.log' -delete 107 find ${builddir} -not -path './__pycache__/*' -path '*/.out/*.log' -delete 105 108 rm -f ${EXTRA_PROGRAMS} 106 109 rm -rf __pycache__ 107 find ${builddir} -path '*.o' -delete108 find ${builddir} -path '*/.err/*.log' -delete109 find ${builddir} -path '*/.out/*.log' -delete110 110 111 111 distclean-local : -
tests/concurrent/futures/.expect/basic.txt
r4468a70 r342af53 1 start 1 2 done -
tests/concurrent/futures/basic.cfa
r4468a70 r342af53 1 1 #include <thread.hfa> 2 2 3 enum {NFUTURES = 10}; 3 4 … … 83 84 84 85 int main() { 86 printf( "start\n" ); // non-empty .expect file 85 87 processor procs[2]; 86 88 {
Note: See TracChangeset
for help on using the changeset viewer.