Changes in / [da3963a:565acf59]


Ignore:
Files:
6 added
30 edited

Legend:

Unmodified
Added
Removed
  • benchmark/io/http/http_ring.cpp

    rda3963a r565acf59  
    99#include <liburing.h>
    1010
    11 typedef enum {
    12         EVENT_END,
    13         EVENT_ACCEPT,
    14         EVENT_REQUEST,
    15         EVENT_ANSWER
    16 } event_t;
    17 
    18 struct __attribute__((aligned(128))) request_t {
    19         event_t type;
    20         int fd;
    21         size_t length;
    22         char * buff;
    23         char data[0];
    24 
    25         static struct request_t * create(event_t type, size_t extra) {
    26                 auto ret = (struct request_t *)malloc(sizeof(struct request_t) + extra);
    27                 ret->type = type;
    28                 ret->length = extra;
    29                 ret->buff = ret->data;
    30                 return ret;
    31         }
    32 
    33         static struct request_t * create(event_t type) {
    34                 return create(type, 0);
    35         }
    36 };
    37 
     11// #define NOBATCHING
     12// #define USE_ASYNC
     13
     14// Options passed to each threads
    3815struct __attribute__((aligned(128))) options_t {
     16        // Data passed to accept
    3917        struct {
    4018                int sockfd;
     
    4422        } acpt;
    4523
     24        // Termination notification
    4625        int endfd;
     26
     27        // The ring to use for io
    4728        struct io_uring * ring;
    48 
     29};
     30
     31//=========================================================
     32// General statistics
     33struct __attribute__((aligned(128))) stats_block_t {
    4934        struct {
    50                 size_t subs = 0;
    51                 size_t cnts = 0;
    52         } result;
     35                volatile size_t conns = 0;
     36                volatile size_t reads = 0;
     37                volatile size_t writes = 0;
     38                volatile size_t full_writes = 0;
     39        } completions;
     40
     41        struct {
     42                volatile size_t conns = 0;
     43                struct {
     44                        volatile size_t pipes = 0;
     45                        volatile size_t reset = 0;
     46                        volatile size_t other = 0;
     47                } requests;
     48
     49                struct {
     50                        volatile size_t pipes = 0;
     51                        volatile size_t reset = 0;
     52                        volatile size_t other = 0;
     53                } answers;
     54        } errors;
     55
     56        struct {
     57                volatile size_t current = 0;
     58                volatile size_t max = 0;
     59                volatile size_t used = 0;
     60        } conns;
     61
     62        volatile size_t recycle_errors = 0;
    5363};
    5464
     65// Each thread gets its own block of stats
     66// and there is a global block for tallying at the end
     67thread_local stats_block_t stats;
     68stats_block_t global_stats;
     69
     70// Get an array of current connections
     71// This is just for debugging, to make sure
     72// no two state-machines get the same fd
     73const size_t array_max = 25000;
     74class connection * volatile conns[array_max] = { 0 };
     75
     76// Max fd we've seen, keep track so it's convenient to adjust the array size after
     77volatile int max_fd = 0;
     78
    5579//=========================================================
     80// Some small wrappers for ring operations used outside the connection state machine
     81// get sqe + error handling
    5682static struct io_uring_sqe * get_sqe(struct io_uring * ring) {
    5783        struct io_uring_sqe * sqe = io_uring_get_sqe(ring);
     
    6389}
    6490
    65 static void submit(struct io_uring * ) {
    66         // io_uring_submit(ring);
    67 }
    68 
    69 //=========================================================
     91// read of the event fd is not done by a connection
     92// use nullptr as the user data
    7093static void ring_end(struct io_uring * ring, int fd, char * buffer, size_t len) {
    7194        struct io_uring_sqe * sqe = get_sqe(ring);
    7295        io_uring_prep_read(sqe, fd, buffer, len, 0);
    73         io_uring_sqe_set_data(sqe, request_t::create(EVENT_END));
    74         submit(ring);
     96        io_uring_sqe_set_data(sqe, nullptr);
     97        io_uring_submit(ring);
    7598}
    7699
    77 static void ring_accept(struct io_uring * ring, int sockfd, struct sockaddr *addr, socklen_t *addrlen, int flags) {
    78         auto req = request_t::create(EVENT_ACCEPT);
    79         struct io_uring_sqe * sqe = get_sqe(ring);
    80         io_uring_prep_accept(sqe, sockfd, addr, addrlen, flags);
    81         io_uring_sqe_set_data(sqe, req);
    82         submit(ring);
    83         // std::cout << "Submitted accept: " << req << std::endl;
    84 }
    85 
    86 static void ring_request(struct io_uring * ring, int fd) {
    87         size_t size = 1024;
    88         auto req = request_t::create(EVENT_REQUEST, size);
    89         req->fd = fd;
    90 
    91         struct io_uring_sqe * sqe = get_sqe(ring);
    92         io_uring_prep_read(sqe, fd, req->buff, size, 0);
    93         io_uring_sqe_set_data(sqe, req);
    94         submit(ring);
    95         // std::cout << "Submitted request: " << req << " (" << (void*)req->buffer << ")"<<std::endl;
    96 }
    97 
    98100//=========================================================
     101// All answers are fixed and determined by the return code
    99102enum HttpCode {
    100103        OK200 = 0,
     
    108111};
    109112
     113// Get a fix reply based on the return code
    110114const char * http_msgs[] = {
    111         "HTTP/1.1 200 OK\nServer: HttoForall\nDate: %s \nContent-Type: text/plain\nContent-Length: %zu \n\n%s",
    112         "HTTP/1.1 400 Bad Request\nServer: HttoForall\nDate: %s \nContent-Type: text/plain\nContent-Length: 0 \n\n",
    113         "HTTP/1.1 404 Not Found\nServer: HttoForall\nDate: %s \nContent-Type: text/plain\nContent-Length: 0 \n\n",
    114         "HTTP/1.1 405 Method Not Allowed\nServer: HttoForall\nDate: %s \nContent-Type: text/plain\nContent-Length: 0 \n\n",
    115         "HTTP/1.1 408 Request Timeout\nServer: HttoForall\nDate: %s \nContent-Type: text/plain\nContent-Length: 0 \n\n",
    116         "HTTP/1.1 413 Payload Too Large\nServer: HttoForall\nDate: %s \nContent-Type: text/plain\nContent-Length: 0 \n\n",
    117         "HTTP/1.1 414 URI Too Long\nServer: HttoForall\nDate: %s \nContent-Type: text/plain\nContent-Length: 0 \n\n",
     115        "HTTP/1.1 200 OK\r\nServer: HttoForall\r\nContent-Type: text/plain\r\nContent-Length: 15\r\nConnection: keep-alive\r\n\r\nHello, World!\r\n",
     116        "HTTP/1.1 400 Bad Request\r\nServer: HttoForall\r\nContent-Type: text/plain\r\nContent-Length: 0 \r\n\r\n",
     117        "HTTP/1.1 404 Not Found\r\nServer: HttoForall\r\nContent-Type: text/plain\r\nContent-Length: 0 \r\n\r\n",
     118        "HTTP/1.1 405 Method Not \r\nServer: HttoForall\r\nContent-Type: text/plain\r\nContent-Length: 0 \r\n\r\n",
     119        "HTTP/1.1 408 Request Timeout\r\nServer: HttoForall\r\nContent-Type: text/plain\r\nContent-Length: 0 \r\n\r\n",
     120        "HTTP/1.1 413 Payload Too Large\r\nServer: HttoForall\r\nContent-Type: text/plain\r\nContent-Length: 0 \r\n\r\n",
     121        "HTTP/1.1 414 URI Too Long\r\nServer: HttoForall\r\nContent-Type: text/plain\r\nContent-Length: 0 \r\n\r\n",
    118122};
    119 
    120 static_assert( KNOWN_CODES == (sizeof(http_msgs ) / sizeof(http_msgs [0])));
    121 
    122 const int http_codes[] = {
    123         200,
    124         400,
    125         404,
    126         405,
    127         408,
    128         413,
    129         414,
     123static_assert( KNOWN_CODES == (sizeof(http_msgs) / sizeof(http_msgs[0])) );
     124
     125// Pre-compute the length of these replys
     126const size_t http_lens[] = {
     127        strlen(http_msgs[0]),
     128        strlen(http_msgs[1]),
     129        strlen(http_msgs[2]),
     130        strlen(http_msgs[3]),
     131        strlen(http_msgs[4]),
     132        strlen(http_msgs[5]),
     133        strlen(http_msgs[6]),
    130134};
    131 
    132 static_assert( KNOWN_CODES == (sizeof(http_codes) / sizeof(http_codes[0])));
    133 
    134 int code_val(HttpCode code) {
    135         return http_codes[code];
    136 }
    137 
    138 static void ring_answer(struct io_uring * ring, int fd, HttpCode code) {
    139         size_t size = 256;
    140         auto req = request_t::create(EVENT_ANSWER, size);
    141         req->fd = fd;
    142 
    143         const char * fmt = http_msgs[code];
    144         const char * date = "";
    145         size = snprintf(req->buff, size, fmt, date, size);
    146 
    147         struct io_uring_sqe * sqe = get_sqe(ring);
    148         io_uring_prep_write(sqe, fd, req->buff, size, 0);
    149         io_uring_sqe_set_data(sqe, req);
    150         submit(ring);
    151         // std::cout << "Submitted good answer: " << req << " (" << (void*)req->buffer << ")"<<std::endl;
    152 }
    153 
    154 static void ring_answer(struct io_uring * ring, int fd, const std::string &) {
    155         // size_t size = 256;
    156         // auto req = request_t::create(EVENT_ANSWER, size);
    157         // req->fd = fd;
    158 
    159         // const char * fmt = http_msgs[OK200];
    160         // const char * date = "";
    161         // size_t len = snprintf(req->buffer, size, fmt, date, ans.size(), ans.c_str());
    162         // req->length = len;
    163 
    164         // struct io_uring_sqe * sqe = get_sqe(ring);
    165         // io_uring_prep_write(sqe, fd, req->buffer, len, 0);
    166         // io_uring_sqe_set_data(sqe, req);
    167         // submit(ring);
    168         // std::cout << "Submitted good answer: " << req << " (" << (void*)req->buffer << ")"<<std::endl;
    169 
    170 
    171         static const char* RESPONSE = "HTTP/1.1 200 OK\r\n" \
    172                                                 "Content-Length: 15\r\n" \
    173                                                 "Content-Type: text/html\r\n" \
    174                                                 "Connection: keep-alive\r\n" \
    175                                                 "Server: testserver\r\n" \
    176                                                 "\r\n" \
    177                                                 "Hello, World!\r\n";
    178 
    179         static const size_t RLEN = strlen(RESPONSE);
    180 
    181         size_t size = 256;
    182         auto req = request_t::create(EVENT_ANSWER, size);
    183         req->fd = fd;
    184         req->buff = (char*)RESPONSE;
    185         req->length = RLEN;
    186 
    187         // const char * fmt = http_msgs[OK200];
    188         // const char * date = "";
    189         // size_t len = snprintf(req->buffer, size, fmt, date, ans.size(), ans.c_str());
    190         // req->length = len;
    191 
    192         struct io_uring_sqe * sqe = get_sqe(ring);
    193         io_uring_prep_write(sqe, fd, RESPONSE, RLEN, 0);
    194         io_uring_sqe_set_data(sqe, req);
    195         submit(ring);
    196 }
     135static_assert( KNOWN_CODES == (sizeof(http_lens) / sizeof(http_lens[0])) );
    197136
    198137//=========================================================
    199 static void handle_new_conn(struct io_uring * ring, int fd) {
    200         if( fd < 0 ) {
    201                 int err = -fd;
    202                 if( err == ECONNABORTED ) return;
    203                 std::cerr << "accept error: (" << errno << ") " << strerror(errno) << std::endl;
    204                 exit(EXIT_FAILURE);
    205         }
    206 
    207         ring_request(ring, fd);
    208 }
    209 
    210 static void handle_request(struct io_uring * ring, struct request_t * in, int res) {
    211         if( res < 0 ) {
    212                 int err = -res;
    213                 switch(err) {
    214                         case EPIPE:
    215                         case ECONNRESET:
    216                                 close(in->fd);
    217                                 free(in);
     138// Finate state machine responsible for handling each connection
     139class __attribute__((aligned(128))) connection {
     140private:
     141        // The state of the machine
     142        enum {
     143                ACCEPTING,  // Accept sent waiting for connection
     144                REQUESTING, // Waiting for new request
     145                ANSWERING,  // Either request received submitting answer or short answer sent, need to submit rest
     146        } state;
     147
     148        // The file descriptor of the connection
     149        int fd;
     150
     151        // request data
     152        static const size_t buffer_size = 1024; // Size of the read buffer
     153        const char * buffer;                      // Buffer into which requests are read
     154
     155        // send data
     156        size_t to_send;         // Data left to send
     157        const char * iterator;  // Pointer to rest of the message to send
     158
     159        // stats
     160        // how many requests/answers were complete, that is, a valid cqe was obtained
     161        struct {
     162                size_t requests = 0;
     163                size_t answers = 0;
     164        } stats;
     165
     166private:
     167        connection()
     168                : state(ACCEPTING)
     169                , fd(0)
     170                , buffer( new char[buffer_size])
     171                , iterator(nullptr)
     172        {}
     173
     174        ~connection() {
     175                delete [] buffer;
     176                ::stats.conns.current--;
     177        }
     178
     179        // Close the current connection
     180        void close(int err) {
     181                // std::cout << "(" << this->stats.requests << "," << this->stats.answers << ", e" << err << ") ";
     182                conns[fd] = nullptr;
     183
     184                if(fd != 0) {
     185                        ::close(fd);
     186                }
     187                delete this;
     188        }
     189
     190        //--------------------------------------------------
     191        // Wrappers for submit so we can tweak it more easily
     192        static void submit(struct io_uring * ring, struct io_uring_sqe * sqe, connection * conn) {
     193                (void)ring;
     194                #ifdef USE_ASYNC
     195                        io_uring_sqe_set_flags(sqe, IOSQE_ASYNC);
     196                #endif
     197                io_uring_sqe_set_data(sqe, conn);
     198                #ifdef NOBATCHING
     199                        io_uring_submit(ring);
     200                #endif
     201        }
     202
     203        void submit(struct io_uring * ring, struct io_uring_sqe * sqe) {
     204                submit(ring, sqe, this);
     205        }
     206
     207        //--------------------------------------------------
     208        // get a new request from the client
     209        void request(struct io_uring * ring) {
     210                state = REQUESTING;
     211                struct io_uring_sqe * sqe = get_sqe(ring);
     212                io_uring_prep_recv(sqe, fd, (void*)buffer, buffer_size, 0);
     213                submit(ring, sqe);
     214        }
     215
     216        //--------------------------------------------------
     217        // Send a new answer based on a return code
     218        void answer(struct io_uring * ring, HttpCode code) {
     219                iterator = http_msgs[code];
     220                to_send  = http_lens[code];
     221                if(to_send != 124) {
     222                        std::cerr << "Answer has weird size: " << to_send << " (" << (int)code << ")" << std::endl;
     223                }
     224                answer(ring);
     225        }
     226
     227        // send a new answer to the client
     228        // Reused for incomplete writes
     229        void answer(struct io_uring * ring) {
     230                state = ANSWERING;
     231                struct io_uring_sqe * sqe = get_sqe(ring);
     232                io_uring_prep_send(sqe, fd, iterator, to_send, 0);
     233                submit(ring, sqe);
     234        }
     235
     236        //--------------------------------------------------
     237        // Handle a new connection, results for getting an cqe while in the ACCEPTING state
     238        void newconn(struct io_uring * ring, int ret) {
     239                // Check errors
     240                if( ret < 0 ) {
     241                        int err = -ret;
     242                        if( err == ECONNABORTED ) {
     243                                ::stats.errors.conns++;
     244                                this->close(err);
    218245                                return;
    219                         default:
    220                                 std::cerr << "request error: (" << err << ") " << strerror(err) << std::endl;
    221                                 exit(EXIT_FAILURE);
    222                 }
    223         }
    224 
    225         if(res == 0) {
    226                 close(in->fd);
    227                 free(in);
    228                 return;
    229         }
    230 
    231         const char * it = in->buff;
    232         if( !strstr( it, "\r\n\r\n" ) ) {
    233                 std::cout << "Incomplete request" << std::endl;
    234                 close(in->fd);
    235                 free(in);
    236                 return;
    237         }
    238 
    239         it = in->buff;
    240         const std::string reply = "Hello, World!\n";
    241         int ret = memcmp(it, "GET ", 4);
    242         if( ret != 0 ) {
    243                 ring_answer(ring, in->fd, E400);
    244                 goto NEXT;
    245         }
    246 
    247         it += 4;
    248         ret = memcmp(it, "/plaintext", 10);
    249         if( ret != 0 ) {
    250                 ring_answer(ring, in->fd, E404);
    251                 goto NEXT;
    252         }
    253 
    254         ring_answer(ring, in->fd, reply);
    255 
    256         NEXT:
    257                 ring_request(ring, in->fd);
    258                 return;
    259 }
    260 
    261 static void handle_answer(struct io_uring * ring, struct request_t * in, int res) {
    262         if( res < 0 ) {
    263                 int err = -res;
    264                 switch(err) {
    265                         case EPIPE:
    266                         case ECONNRESET:
    267                                 close(in->fd);
    268                                 free(in);
    269                                 return;
    270                         default:
    271                                 std::cerr << "answer error: (" << err << ") " << strerror(err) << std::endl;
    272                                 exit(EXIT_FAILURE);
    273                 }
    274         }
    275 
    276         if( res >= in->length ) {
    277                 free(in);
    278                 return;
    279         }
    280 
    281         struct io_uring_sqe * sqe = get_sqe(ring);
    282         io_uring_prep_write(sqe, in->fd, in->buff + res, in->length - res, 0);
    283         io_uring_sqe_set_data(sqe, in);
    284         submit(ring);
    285         // std::cout << "Re-Submitted request: " << in << " (" << (void*)in->buffer << ")"<<std::endl;
    286 
    287         ring_request(ring, in->fd);
    288 }
     246                        }
     247                        std::cerr << "accept error: (" << errno << ") " << strerror(errno) << std::endl;
     248                        exit(EXIT_FAILURE);
     249                }
     250
     251                // Count the connections
     252                ::stats.completions.conns++;
     253                ::stats.conns.current++;
     254                if(::stats.conns.current > ::stats.conns.max) {
     255                        ::stats.conns.max = ::stats.conns.current;
     256                }
     257
     258                // Read on the data
     259                fd = ret;
     260                request(ring);
     261
     262                // check the max fd so we know if we exceeded the array
     263                for(;;) {
     264                        int expected = max_fd;
     265                        if(expected >= fd) return;
     266                        if( __atomic_compare_exchange_n(&max_fd, &expected, fd, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST) ) return;
     267                }
     268
     269                // check if we have enough space to fit inside the array
     270                if(fd >= array_max) {
     271                        std::cerr << "accept error: fd " << fd << " is too high" << std::endl;
     272                        return;
     273                }
     274
     275                // Put our connection into the global array
     276                // No one else should be using it so if they are that's a bug
     277                auto exist = __atomic_exchange_n( &conns[fd], this, __ATOMIC_SEQ_CST);
     278                if( exist ) {
     279                        size_t first = __atomic_fetch_add(&global_stats.recycle_errors, 1, __ATOMIC_SEQ_CST);
     280                        if( first == 0 ) {
     281                                std::cerr << "First: accept has existing connection " << std::endl;
     282                        }
     283                }
     284        }
     285
     286        // Handle a new request, results for getting an cqe while in the REQUESTING state
     287        void newrequest(struct io_uring * ring, int res) {
     288                // Check errors
     289                if( res < 0 ) {
     290                        int err = -res;
     291                        switch(err) {
     292                                case EPIPE:
     293                                        ::stats.errors.requests.pipes++;
     294                                        break;
     295                                        // Don't fall through the get better stats
     296                                case ECONNRESET:
     297                                        ::stats.errors.requests.reset++;
     298                                        break;
     299                                default:
     300                                        ::stats.errors.requests.other++;
     301                                        std::cerr << "request error: (" << err << ") " << strerror(err) << std::endl;
     302                                        exit(EXIT_FAILURE);
     303                        }
     304
     305                        // Connection failed, close it
     306                        this->close(err);
     307                        return;
     308                }
     309
     310                // Update stats
     311                ::stats.completions.reads++;
     312
     313                // Is this an EOF
     314                if(res == 0) {
     315                        // Yes, close the connection
     316                        this->close(0);
     317                        return;
     318                }
     319
     320                // Find the end of the request header
     321                const char * it = buffer;
     322                if( !strstr( it, "\r\n\r\n" ) ) {
     323                        // This state machine doesn't support incomplete reads
     324                        // Print them to output so it's clear there is an issue
     325                        std::cout << "Incomplete request" << std::endl;
     326                        this->close(EBADR);
     327                        return;
     328                }
     329
     330                // Find the method to use
     331                it = buffer;
     332                int ret = memcmp(it, "GET ", 4);
     333                if( ret != 0 ) {
     334                        // We only support get, answer with an error
     335                        answer(ring, E400);
     336                        return;
     337                }
     338
     339                // Find the target
     340                it += 4;
     341                ret = memcmp(it, "/plaintext", 10);
     342                if( ret != 0 ) {
     343                        // We only support /plaintext, answer with an error
     344                        answer(ring, E404);
     345                        return;
     346                }
     347
     348                // Correct request, answer with the payload
     349                this->stats.requests++;
     350                answer(ring, OK200);
     351        }
     352
     353        // Handle a partial or full answer sent, results for getting an cqe while in the ANSWERING state
     354        void writedone(struct io_uring * ring, int res) {
     355                // Check errors
     356                if( res < 0 ) {
     357                        int err = -res;
     358                        switch(err) {
     359                                case EPIPE:
     360                                        ::stats.errors.answers.pipes++;
     361                                        break;
     362                                        // Don't fall through the get better stats
     363                                case ECONNRESET:
     364                                        ::stats.errors.answers.reset++;
     365                                        break;
     366                                default:
     367                                        ::stats.errors.answers.other++;
     368                                        std::cerr << "answer error: (" << err << ") " << strerror(err) << std::endl;
     369                                        exit(EXIT_FAILURE);
     370                        }
     371
     372                        this->close(err);
     373                        return;
     374                }
     375
     376                // Update stats
     377                ::stats.completions.writes++;
     378                if(res == 124) ::stats.completions.full_writes++;
     379
     380                // Is this write completed
     381                if( res == to_send ) {
     382                        // Yes, more stats
     383                        this->stats.answers++;
     384                        if(this->stats.answers == 1) ::stats.conns.used++;
     385                        // Then read a new request
     386                        request(ring);
     387                        return;
     388                }
     389
     390                // Not a completed read, push the rest
     391                to_send -= res;
     392                iterator += res;
     393                answer(ring);
     394        }
     395public:
     396        // Submit a call to accept and create a new connection object
     397        static void accept(struct io_uring * ring, const struct options_t & opt) {
     398                struct io_uring_sqe * sqe = get_sqe(ring);
     399                io_uring_prep_accept(sqe, opt.acpt.sockfd, opt.acpt.addr, opt.acpt.addrlen, opt.acpt.flags);
     400                submit(ring, sqe, new connection());
     401                // std::cout << "Submitted accept: " << req << std::endl;
     402        }
     403
     404        // Handle a new cqe
     405        void handle(struct io_uring * ring, int res, const struct options_t & opt) {
     406                switch(state) {
     407                case ACCEPTING:
     408                        connection::accept(ring, opt);
     409                        newconn(ring, res);
     410                        break;
     411                case REQUESTING:
     412                        newrequest(ring, res);
     413                        break;
     414                case ANSWERING:
     415                        writedone(ring, res);
     416                        break;
     417                }
     418        }
     419};
    289420
    290421//=========================================================
    291 extern "C" {
    292 extern int __io_uring_flush_sq(struct io_uring *ring);
    293 }
    294 
     422// Main loop of the WebServer
     423// Effectively uses one thread_local copy of everything per kernel thread
    295424void * proc_loop(void * arg) {
    296         size_t count = 0;
     425        // Get the thread local argument
    297426        struct options_t & opt = *(struct options_t *)arg;
    298 
    299427        struct io_uring * ring = opt.ring;
    300428
     429        // Track the shutdown using a event_fd
    301430        char endfd_buf[8];
    302431        ring_end(ring, opt.endfd, endfd_buf, 8);
    303432
    304         ring_accept(ring, opt.acpt.sockfd, opt.acpt.addr, opt.acpt.addrlen, opt.acpt.flags);
    305 
    306         bool done = false;
     433        // Accept our first connection
     434        // May not take effect until io_uring_submit_and_wait
     435        connection::accept(ring, opt);
     436
     437        int reset = 1;       // Counter to print stats once in a while
     438        bool done = false;   // Are we done
     439        size_t sqes = 0;     // Number of sqes we submitted
     440        size_t call = 0;     // Number of submits we made
    307441        while(!done) {
    308                 struct io_uring_cqe *cqe;
    309                 int ret;
    310                 while(-EAGAIN == (ret = io_uring_wait_cqe_nr(ring, &cqe, 0))) {
    311                         ret = io_uring_submit_and_wait(ring, 1);
    312                         if (ret < 0) {
    313                                 fprintf( stderr, "io_uring get error: (%d) %s\n", (int)-ret, strerror(-ret) );
    314                                 exit(EXIT_FAILURE);
    315                         }
    316                         opt.result.subs += ret;
    317                         opt.result.cnts++;
    318                 }
    319 
    320                 if (ret < 0 && -EAGAIN != ret) {
    321                         fprintf( stderr, "io_uring peek error: (%d) %s\n", (int)-ret, strerror(-ret) );
     442                // Submit all the answers we have and wait for responses
     443                int ret = io_uring_submit_and_wait(ring, 1);
     444
     445                // check errors
     446                if (ret < 0) {
     447                        fprintf( stderr, "io_uring S&W error: (%d) %s\n", (int)-ret, strerror(-ret) );
    322448                        exit(EXIT_FAILURE);
    323449                }
    324450
    325                 auto req = (struct request_t *)cqe->user_data;
    326                 // std::cout << req << " completed with " << cqe->res << std::endl;
    327 
    328                 switch(req->type) {
    329                         case EVENT_END:
     451                // Check how good we are at batching sqes
     452                sqes += ret;
     453                call++;
     454
     455                struct io_uring_cqe *cqe;
     456                unsigned head;
     457                unsigned count = 0;
     458
     459                // go through all cqes
     460                io_uring_for_each_cqe(ring, head, cqe) {
     461                        if (0 == cqe->user_data) {
    330462                                done = true;
    331463                                break;
    332                         case EVENT_ACCEPT:
    333                                 handle_new_conn(ring, cqe->res);
    334                                 free(req);
    335                                 ring_accept(ring, opt.acpt.sockfd, opt.acpt.addr, opt.acpt.addrlen, opt.acpt.flags);
    336                                 break;
    337                         case EVENT_REQUEST:
    338                                 handle_request(ring, req, cqe->res);
    339                                 break;
    340                         case EVENT_ANSWER:
    341                                 handle_answer(ring, req, cqe->res);
    342                                 break;
    343                 }
    344 
    345                 io_uring_cqe_seen(ring, cqe);
    346         }
    347 
    348         return (void*)count;
     464                        }
     465
     466                        auto req = (class connection *)cqe->user_data;
     467                        req->handle( ring, cqe->res, opt );
     468
     469                        // Every now and then, print some stats
     470                        reset--;
     471                        if(reset == 0) {
     472                                std::cout << "Submit average: " << sqes << "/" << call << "(" << (((double)sqes) / call) << ")" << std::endl;
     473                                // Reset to some random number of completions
     474                                // use the ring_fd in the number of threads don't all print at once
     475                                reset = 100000 + (100000 * (ring->ring_fd % 5));
     476                        }
     477
     478                        // Keep track of how many cqes we have seen
     479                        count++;
     480                }
     481
     482                // Mark the cqes as seen
     483                io_uring_cq_advance(ring, count);
     484        }
     485
     486        // Tally all the thread local statistics
     487        __atomic_fetch_add( &global_stats.completions.conns, ::stats.completions.conns, __ATOMIC_SEQ_CST );
     488        __atomic_fetch_add( &global_stats.completions.reads, ::stats.completions.reads, __ATOMIC_SEQ_CST );
     489        __atomic_fetch_add( &global_stats.completions.writes, ::stats.completions.writes, __ATOMIC_SEQ_CST );
     490        __atomic_fetch_add( &global_stats.completions.full_writes, ::stats.completions.full_writes, __ATOMIC_SEQ_CST );
     491        __atomic_fetch_add( &global_stats.errors.conns, ::stats.errors.conns, __ATOMIC_SEQ_CST );
     492        __atomic_fetch_add( &global_stats.errors.requests.pipes, ::stats.errors.requests.pipes, __ATOMIC_SEQ_CST );
     493        __atomic_fetch_add( &global_stats.errors.requests.reset, ::stats.errors.requests.reset, __ATOMIC_SEQ_CST );
     494        __atomic_fetch_add( &global_stats.errors.requests.other, ::stats.errors.requests.other, __ATOMIC_SEQ_CST );
     495        __atomic_fetch_add( &global_stats.errors.answers.pipes, ::stats.errors.answers.pipes, __ATOMIC_SEQ_CST );
     496        __atomic_fetch_add( &global_stats.errors.answers.reset, ::stats.errors.answers.reset, __ATOMIC_SEQ_CST );
     497        __atomic_fetch_add( &global_stats.errors.answers.other, ::stats.errors.answers.other, __ATOMIC_SEQ_CST );
     498        __atomic_fetch_add( &global_stats.conns.current, ::stats.conns.current, __ATOMIC_SEQ_CST );
     499        __atomic_fetch_add( &global_stats.conns.max, ::stats.conns.max, __ATOMIC_SEQ_CST );
     500        __atomic_fetch_add( &global_stats.conns.used, ::stats.conns.used, __ATOMIC_SEQ_CST );
     501
     502        return nullptr;
    349503}
    350504
    351505//=========================================================
    352 struct __attribute__((aligned(128))) aligned_ring {
    353         struct io_uring storage;
    354 };
    355 
    356 #include <bit>
    357 
    358 #include <pthread.h>
     506#include <bit> // for ispow2
     507
    359508extern "C" {
    360         #include <signal.h>
    361         #include <sys/eventfd.h>
    362         #include <sys/socket.h>
    363         #include <netinet/in.h>
     509        #include <pthread.h>      // for pthreads
     510        #include <signal.h>       // for signal(SIGPIPE, SIG_IGN);
     511        #include <sys/eventfd.h>  // use for termination
     512        #include <sys/socket.h>   // for sockets in general
     513        #include <netinet/in.h>   // for sockaddr_in, AF_INET
    364514}
    365515
    366516int main(int argc, char * argv[]) {
     517        // Initialize the array of connection-fd associations
     518        for(int i = 0; i < array_max; i++) {
     519                conns[i] = nullptr;
     520        }
     521
     522        // Make sure we ignore all sigpipes
    367523        signal(SIGPIPE, SIG_IGN);
    368524
    369         unsigned nthreads = 1;
    370         unsigned port = 8800;
    371         unsigned entries = 256;
    372         unsigned backlog = 10;
    373         bool attach = false;
     525        // Default command line arguments
     526        unsigned nthreads = 1;      // number of kernel threads
     527        unsigned port = 8800;       // which port to listen on
     528        unsigned entries = 256;     // number of entries per ring/kernel thread
     529        unsigned backlog = 262144;  // backlog argument to listen
     530        bool attach = false;        // Whether or not to attach all the rings
     531        bool sqpoll = false;        // Whether or not to use SQ Polling
    374532
    375533        //===================
    376         // Arguments
     534        // Arguments Parsing
    377535        int c;
    378         while ((c = getopt (argc, argv, "t:p:e:b:a")) != -1) {
     536        while ((c = getopt (argc, argv, "t:p:e:b:aS")) != -1) {
    379537                switch (c)
    380538                {
     
    394552                        attach = true;
    395553                        break;
     554                case 'S':
     555                        sqpoll = true;
     556                        break;
    396557                case '?':
    397558                default:
    398                         std::cerr << "Usage: -t <threads> -p <port> -e <entries> -b <backlog> -a" << std::endl;
     559                        std::cerr << "Usage: -t <threads> -p <port> -e <entries> -b <backlog> -aS" << std::endl;
    399560                        return EXIT_FAILURE;
    400561                }
     
    416577        //===================
    417578        // End FD
     579        // Create a single event fd to notify the kernel threads when the server shutsdown
    418580        int efd = eventfd(0, EFD_SEMAPHORE);
    419581        if (efd < 0) {
     
    424586        //===================
    425587        // Open Socket
     588        // Listen on specified port
    426589        std::cout << getpid() << " : Listening on port " << port << std::endl;
    427590        int server_fd = socket(AF_INET, SOCK_STREAM, 0);
     
    439602        address.sin_port = htons( port );
    440603
     604        // In case the port is already in use, don't just return an error
     605        // Linux is very slow at reclaiming port so just retry regularly
    441606        int waited = 0;
    442607        while(true) {
     
    444609                if(ret < 0) {
    445610                        if(errno == EADDRINUSE) {
     611                                // Port is in used let's retry later
    446612                                if(waited == 0) {
    447613                                        std::cerr << "Waiting for port" << std::endl;
    448614                                } else {
     615                                        // To be cure, print how long we have been waiting
    449616                                        std::cerr << "\r" << waited;
    450617                                        std::cerr.flush();
    451618                                }
    452619                                waited ++;
    453                                 usleep( 1000000 );
     620                                usleep( 1000000 ); // Wait and retry
    454621                                continue;
    455622                        }
     623                        // Some other error occured, this is a real error
    456624                        std::cerr << "bind error: (" << errno << ") " << strerror(errno) << std::endl;
    457625                        exit(EXIT_FAILURE);
     
    474642        std::cout << std::endl;
    475643
     644        // Create the desired number of kernel-threads and for each
     645        // create a ring. Create the rings in the main so we can attach them
     646        // Since the rings are all in a dense VLA, aligned them so we don't get false sharing
     647        // it's unlikely but better safe than sorry
     648        struct __attribute__((aligned(128))) aligned_ring {
     649                struct io_uring storage;
     650        };
    476651        aligned_ring thrd_rings[nthreads];
    477652        pthread_t    thrd_hdls[nthreads];
    478653        options_t    thrd_opts[nthreads];
     654        bool no_drops  = true;
     655        bool fast_poll = true;
     656        bool nfix_sqpl = true;
    479657        for(unsigned i = 0; i < nthreads; i++) {
    480                 if(!attach || i == 0) {
    481                         io_uring_queue_init(entries, &thrd_rings[i].storage, 0);
    482                 }
    483                 else {
    484                         struct io_uring_params p;
    485                         memset(&p, 0, sizeof(p));
    486                         p.flags = IORING_SETUP_ATTACH_WQ;
     658                struct io_uring_params p = { };
     659
     660                if(sqpoll) { // If sqpoll is on, add the flag
     661                        p.flags |= IORING_SETUP_SQPOLL;
     662                        p.sq_thread_idle = 100;
     663                }
     664
     665                if (attach && i != 0) { // If attach is on, add the flag, except for the first ring
     666                        p.flags |= IORING_SETUP_ATTACH_WQ;
    487667                        p.wq_fd = thrd_rings[0].storage.ring_fd;
    488                         io_uring_queue_init_params(entries, &thrd_rings[i].storage, &p);
    489                 }
    490 
     668                }
     669
     670                // Create the ring
     671                io_uring_queue_init_params(entries, &thrd_rings[i].storage, &p);
     672
     673                // Check if some of the note-worthy features are there
     674                if(0 == (p.features & IORING_FEAT_NODROP         )) { no_drops  = false; }
     675                if(0 == (p.features & IORING_FEAT_FAST_POLL      )) { fast_poll = false; }
     676                if(0 == (p.features & IORING_FEAT_SQPOLL_NONFIXED)) { nfix_sqpl = false; }
     677
     678                // Write the socket options we want to the options we pass to the threads
    491679                thrd_opts[i].acpt.sockfd  = server_fd;
    492680                thrd_opts[i].acpt.addr    = (struct sockaddr *)&address;
     
    502690                }
    503691        }
     692
     693        // Tell the user if the features are present
     694        if( no_drops ) std::cout << "No Drop Present" << std::endl;
     695        if( fast_poll) std::cout << "Fast Poll Present" << std::endl;
     696        if(!nfix_sqpl) std::cout << "Non-Fixed SQ Poll not Present" << std::endl;
    504697
    505698        //===================
     
    510703                int ret;
    511704                do {
     705                        // Wait for a Ctrl-D to close the server
    512706                        ret = read(STDIN_FILENO, buffer, 128);
    513707                        if(ret < 0) {
     
    526720
    527721        //===================
     722        // Use eventfd_write to tell the threads we are closing
    528723        (std::cout << "Sending Shutdown to Threads... ").flush();
    529724        ret = eventfd_write(efd, nthreads);
     
    535730
    536731        //===================
     732        // Join all the threads and close the rings
    537733        (std::cout << "Stopping Threads Done... ").flush();
    538         size_t total = 0;
    539         size_t count = 0;
    540734        for(unsigned i = 0; i < nthreads; i++) {
    541735                void * retval;
     
    545739                        exit(EXIT_FAILURE);
    546740                }
    547                 // total += (size_t)retval;
    548                 total += thrd_opts[i].result.subs;
    549                 count += thrd_opts[i].result.cnts;
    550741
    551742                io_uring_queue_exit(thrd_opts[i].ring);
    552743        }
    553744        std::cout << "done" << std::endl;
    554         std::cout << "Submit average: " << total << "/" << count << "(" << (((double)total) / count) << ")" << std::endl;
    555745
    556746        //===================
     747        // Close the sockets
    557748        (std::cout << "Closing Socket... ").flush();
    558749        ret = shutdown( server_fd, SHUT_RD );
     
    567758                exit(EXIT_FAILURE);
    568759        }
    569         std::cout << "done" << std::endl;
     760        std::cout << "done" << std::endl << std::endl;
     761
     762        // Print stats and exit
     763        std::cout << "Errors: " << global_stats.errors.conns << "c, (" << global_stats.errors.requests.pipes << "p, " << global_stats.errors.requests.reset << "r, " << global_stats.errors.requests.other << "o" << ")r, (" << global_stats.errors.answers.pipes << "p, " << global_stats.errors.answers.reset << "r, " << global_stats.errors.answers.other << "o" << ")a" << std::endl;
     764        std::cout << "Completions: " << global_stats.completions.conns << "c, " << global_stats.completions.reads << "r, " << global_stats.completions.writes << "w" << std::endl;
     765        std::cout << "Full Writes: " << global_stats.completions.full_writes << std::endl;
     766        std::cout << "Max FD: " << max_fd << std::endl;
     767        std::cout << "Successful connections: " << global_stats.conns.used << std::endl;
     768        std::cout << "Max concurrent connections: " << global_stats.conns.max << std::endl;
     769        std::cout << "Accepts on non-zeros: " << global_stats.recycle_errors << std::endl;
     770        std::cout << "Leaked conn objects: " << global_stats.conns.current << std::endl;
    570771}
     772
     773// compile-command: "g++ http_ring.cpp -std=c++2a -pthread -luring -O3" //
  • doc/LaTeXmacros/common.tex

    rda3963a r565acf59  
    1111%% Created On       : Sat Apr  9 10:06:17 2016
    1212%% Last Modified By : Peter A. Buhr
    13 %% Last Modified On : Sat Jan 23 09:06:39 2021
    14 %% Update Count     : 491
     13%% Last Modified On : Mon Feb  8 21:45:41 2021
     14%% Update Count     : 522
    1515%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    1616
     
    3232\setlist[enumerate]{listparindent=\parindent}% global
    3333\setlist[enumerate,2]{leftmargin=\parindent,labelsep=*,align=parleft,label=\alph*.}% local
    34 \setlist[description]{itemsep=0pt,listparindent=\parindent,leftmargin=\parindent,labelsep=1.5ex}
     34\setlist[description]{topsep=0.5ex,itemsep=0pt,listparindent=\parindent,leftmargin=\parindent,labelsep=1.5ex}
    3535
    3636% Names used in the document.
    3737
    3838\usepackage{xspace}
    39 \newcommand{\CFAIcon}{\textsf{C}\raisebox{\depth}{\rotatebox{180}{\textsf{A}}}\xspace} % Cforall symbolic name
    40 \newcommand{\CFA}{\protect\CFAIcon}             % safe for section/caption
    41 \newcommand{\CFL}{\textrm{Cforall}\xspace} % Cforall symbolic name
    42 \newcommand{\Celeven}{\textrm{C11}\xspace} % C11 symbolic name
    43 \newcommand{\CC}{\textrm{C}\kern-.1em\hbox{+\kern-.25em+}\xspace} % C++ symbolic name
    44 \newcommand{\CCeleven}{\textrm{C}\kern-.1em\hbox{+\kern-.25em+}11\xspace} % C++11 symbolic name
    45 \newcommand{\CCfourteen}{\textrm{C}\kern-.1em\hbox{+\kern-.25em+}14\xspace} % C++14 symbolic name
    46 \newcommand{\CCseventeen}{\textrm{C}\kern-.1em\hbox{+\kern-.25em+}17\xspace} % C++17 symbolic name
    47 \newcommand{\CCtwenty}{\textrm{C}\kern-.1em\hbox{+\kern-.25em+}20\xspace} % C++20 symbolic name
     39\newcommand{\CFAIcon}{\textsf{C}\raisebox{\depth}{\rotatebox{180}{\textsf{A}}}} % Cforall icon
     40\newcommand{\CFA}{\protect\CFAIcon\xspace}                      % CFA symbolic name
     41\newcommand{\CFL}{\textrm{Cforall}\xspace}                      % Cforall non-icon name
     42\newcommand{\Celeven}{\textrm{C11}\xspace}                      % C11 symbolic name
     43\newcommand{\CCIcon}{\textrm{C}\kern-.1em\hbox{+\kern-.25em+}} % C++ icon
     44\newcommand{\CC}{\protect\CCIcon\xspace}                        % C++ symbolic name
     45% numbers disallowed in latex variables names => use number names
     46\newcommand{\CCeleven}{\protect\CCIcon{11}\xspace}      % C++11 symbolic name
     47\newcommand{\CCfourteen}{\protect\CCIcon{14}\xspace} % C++14 symbolic name
     48\newcommand{\CCseventeen}{\protect\CCIcon{17}\xspace} % C++17 symbolic name
     49\newcommand{\CCtwenty}{\protect\CCIcon{20}\xspace}      % C++20 symbolic name
    4850\newcommand{\Csharp}{C\raisebox{-0.7ex}{\Large$^\sharp$}\xspace} % C# symbolic name
    4951
    5052%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    5153
     54% remove special-character warning in PDF side-bar names
    5255\makeatletter
     56\@ifpackageloaded{hyperref}{
     57  \pdfstringdefDisableCommands{
     58  \def\CFA{\CFL}
     59  \def\Celeven{C11\xspace}
     60  \def\CC{C++\xspace}
     61  \def\CCeleven{C++11\xspace}
     62  \def\CCfourteen{C++14\xspace}
     63  \def\CCseventeen{C++17\xspace}
     64  \def\CCtwenty{C++20\xspace}
     65  \def\Csharp{C\#\xspace}
     66  \def\lstinline{\xspace}% must use {} as delimiters, e.g., \lstinline{...}
     67  }{}
     68}
     69
    5370% parindent is relative, i.e., toggled on/off in environments like itemize, so store the value for
    5471% use rather than use \parident directly.
     
    8198    \vskip 50\p@
    8299  }}
    83 \renewcommand\section{\@startsection{section}{1}{\z@}{-3.5ex \@plus -1ex \@minus -.2ex}{1.75ex \@plus .2ex}{\normalfont\large\bfseries}}
    84 \renewcommand\subsection{\@startsection{subsection}{2}{\z@}{-3.25ex \@plus -1ex \@minus -.2ex}{1.5ex \@plus .2ex}{\normalfont\normalsize\bfseries}}
     100\renewcommand\section{\@startsection{section}{1}{\z@}{-3.0ex \@plus -1ex \@minus -.2ex}{1.5ex \@plus .2ex}{\normalfont\large\bfseries}}
     101\renewcommand\subsection{\@startsection{subsection}{2}{\z@}{-2.75ex \@plus -1ex \@minus -.2ex}{1.25ex \@plus .2ex}{\normalfont\normalsize\bfseries}}
    85102\renewcommand\subsubsection{\@startsection{subsubsection}{3}{\z@}{-2.5ex \@plus -1ex \@minus -.2ex}{1.0ex \@plus .2ex}{\normalfont\normalsize\bfseries}}
    86103\renewcommand\paragraph{\@startsection{paragraph}{4}{\z@}{-2.0ex \@plus -1ex \@minus -.2ex}{-1em}{\normalfont\normalsize\bfseries}}
     
    89106\newcommand{\italic}[1]{\emph{\hyperpage{#1}}}
    90107\newcommand{\Definition}[1]{\textbf{\hyperpage{#1}}}
    91 \newcommand{\see}[1]{\emph{see}~#1}
     108\newcommand{\see}[1]{(see #1)}
    92109
    93110% Define some commands that produce formatted index entries suitable for cross-references.
     
    235252\newcommand{\LstKeywordStyle}[1]{{\lst@basicstyle{\lst@keywordstyle{#1}}}}
    236253\newcommand{\LstCommentStyle}[1]{{\lst@basicstyle{\lst@commentstyle{#1}}}}
     254\newcommand{\LstStringStyle}[1]{{\lst@basicstyle{\lst@stringstyle{#1}}}}
    237255
    238256\newlength{\gcolumnposn}                                % temporary hack because lstlisting does not handle tabs correctly
     
    260278xleftmargin=\parindentlnth,                             % indent code to paragraph indentation
    261279extendedchars=true,                                             % allow ASCII characters in the range 128-255
    262 escapechar=§,                                                   % LaTeX escape in CFA code §...§ (section symbol), emacs: C-q M-'
    263 mathescape=true,                                                % LaTeX math escape in CFA code $...$
     280escapechar=\$,                                                  % LaTeX escape in CFA code §...§ (section symbol), emacs: C-q M-'
     281mathescape=false,                                               % LaTeX math escape in CFA code $...$
    264282keepspaces=true,                                                %
    265283showstringspaces=false,                                 % do not show spaces with cup
    266284showlines=true,                                                 % show blank lines at end of code
    267285aboveskip=4pt,                                                  % spacing above/below code block
    268 belowskip=-2pt,
     286belowskip=0pt,
    269287numberstyle=\footnotesize\sf,                   % numbering style
    270288% replace/adjust listing characters that look bad in sanserif
     
    279297\lstset{
    280298language=CFA,
    281 moredelim=**[is][\color{red}]{®}{®},    % red highlighting ®...® (registered trademark symbol) emacs: C-q M-.
    282 moredelim=**[is][\color{blue}]{ß}{ß},   % blue highlighting ß...ß (sharp s symbol) emacs: C-q M-_
    283 moredelim=**[is][\color{OliveGreen}]{¢}{¢}, % green highlighting ¢...¢ (cent symbol) emacs: C-q M-"
    284 moredelim=[is][\lstset{keywords={}}]{¶}{¶}, % keyword escape ¶...¶ (pilcrow symbol) emacs: C-q M-^
    285 % replace/adjust listing characters that look bad in sanserif
    286 add to literate={`}{\ttfamily\upshape\hspace*{-0.1ex}`}1
     299moredelim=**[is][\color{red}]{@}{@},    % red highlighting @...@
     300%moredelim=**[is][\color{red}]{®}{®},   % red highlighting ®...® (registered trademark symbol) emacs: C-q M-.
     301%moredelim=**[is][\color{blue}]{ß}{ß},  % blue highlighting ß...ß (sharp s symbol) emacs: C-q M-_
     302%moredelim=**[is][\color{OliveGreen}]{¢}{¢}, % green highlighting ¢...¢ (cent symbol) emacs: C-q M-"
     303%moredelim=[is][\lstset{keywords={}}]{¶}{¶}, % keyword escape ¶...¶ (pilcrow symbol) emacs: C-q M-^
    287304}% lstset
    288305\lstset{#1}
  • doc/bibliography/pl.bib

    rda3963a r565acf59  
    17971797}
    17981798
    1799 @article{Delisle19,
     1799@article{Delisle20,
    18001800    keywords    = {concurrency, Cforall},
    18011801    contributer = {pabuhr@plg},
    18021802    author      = {Thierry Delisle and Peter A. Buhr},
    18031803    title       = {Advanced Control-flow and Concurrency in \textsf{C}$\mathbf{\forall}$},
    1804     year        = 2019,
     1804    year        = 2020,
    18051805    journal     = spe,
    1806     pages       = {1-33},
    1807     note        = {submitted},
     1806    pages       = {1-38},
     1807    note        = {\href{https://doi-org.proxy.lib.uwaterloo.ca/10.1002/spe.2925}{https://\-doi-org.proxy.lib.uwaterloo.ca/\-10.1002/\-spe.2925}},
     1808    note        = {},
    18081809}
    18091810
  • doc/theses/andrew_beach_MMath/existing.tex

    rda3963a r565acf59  
    1 \chapter{\texorpdfstring{\CFA Existing Features}{Cforall Existing Features}}
     1\chapter{\CFA Existing Features}
    22
    33\CFA (C-for-all)~\cite{Cforall} is an open-source project extending ISO C with
     
    1212obvious to the reader.
    1313
    14 \section{\texorpdfstring{Overloading and \lstinline|extern|}{Overloading and extern}}
     14\section{Overloading and \lstinline{extern}}
    1515\CFA has extensive overloading, allowing multiple definitions of the same name
    1616to be defined.~\cite{Moss18}
     
    4242
    4343\section{Reference Type}
    44 \CFA adds a rebindable reference type to C, but more expressive than the \CC
     44\CFA adds a rebindable reference type to C, but more expressive than the \Cpp
    4545reference.  Multi-level references are allowed and act like auto-dereferenced
    4646pointers using the ampersand (@&@) instead of the pointer asterisk (@*@). \CFA
     
    5959
    6060Both constructors and destructors are operators, which means they are just
    61 functions with special operator names rather than type names in \CC. The
     61functions with special operator names rather than type names in \Cpp. The
    6262special operator names may be used to call the functions explicitly (not
    63 allowed in \CC for constructors).
     63allowed in \Cpp for constructors).
    6464
    6565In general, operator names in \CFA are constructed by bracketing an operator
     
    8888matching overloaded destructor @void ^?{}(T &);@ is called.  Without explicit
    8989definition, \CFA creates a default and copy constructor, destructor and
    90 assignment (like \CC). It is possible to define constructors/destructors for
     90assignment (like \Cpp). It is possible to define constructors/destructors for
    9191basic and existing types.
    9292
     
    9494\CFA uses parametric polymorphism to create functions and types that are
    9595defined over multiple types. \CFA polymorphic declarations serve the same role
    96 as \CC templates or Java generics. The ``parametric'' means the polymorphism is
     96as \Cpp templates or Java generics. The ``parametric'' means the polymorphism is
    9797accomplished by passing argument operations to associate \emph{parameters} at
    9898the call site, and these parameters are used in the function to differentiate
     
    134134
    135135Note, a function named @do_once@ is not required in the scope of @do_twice@ to
    136 compile it, unlike \CC template expansion. Furthermore, call-site inferencing
     136compile it, unlike \Cpp template expansion. Furthermore, call-site inferencing
    137137allows local replacement of the most specific parametric functions needs for a
    138138call.
     
    178178}
    179179\end{cfa}
    180 The generic type @node(T)@ is an example of a polymorphic-type usage.  Like \CC
     180The generic type @node(T)@ is an example of a polymorphic-type usage.  Like \Cpp
    181181templates usage, a polymorphic-type usage must specify a type parameter.
    182182
  • doc/theses/andrew_beach_MMath/features.tex

    rda3963a r565acf59  
    55
    66\section{Virtuals}
     7Virtual types and casts are not part of the exception system nor are they
     8required for an exception system. But an object-oriented style hierarchy is a
     9great way of organizing exceptions so a minimal virtual system has been added
     10to \CFA.
     11
     12The pattern of a simple hierarchy was borrowed from object-oriented
     13programming was chosen for several reasons.
     14The first is that it allows new exceptions to be added in user code
     15and in libraries independently of each other. Another is it allows for
     16different levels of exception grouping (all exceptions, all IO exceptions or
     17a particular IO exception). Also it also provides a simple way of passing
     18data back and forth across the throw.
     19
    720Virtual types and casts are not required for a basic exception-system but are
    821useful for advanced exception features. However, \CFA is not object-oriented so
    9 there is no obvious concept of virtuals.  Hence, to create advanced exception
    10 features for this work, I needed to designed and implemented a virtual-like
     22there is no obvious concept of virtuals. Hence, to create advanced exception
     23features for this work, I needed to design and implement a virtual-like
    1124system for \CFA.
    1225
     26% NOTE: Maybe we should but less of the rational here.
    1327Object-oriented languages often organized exceptions into a simple hierarchy,
    1428\eg Java.
     
    3044\end{center}
    3145The hierarchy provides the ability to handle an exception at different degrees
    32 of specificity (left to right).  Hence, it is possible to catch a more general
     46of specificity (left to right). Hence, it is possible to catch a more general
    3347exception-type in higher-level code where the implementation details are
    3448unknown, which reduces tight coupling to the lower-level implementation.
     
    6175While much of the virtual infrastructure is created, it is currently only used
    6276internally for exception handling. The only user-level feature is the virtual
    63 cast, which is the same as the \CC \lstinline[language=C++]|dynamic_cast|.
     77cast, which is the same as the \Cpp \lstinline[language=C++]|dynamic_cast|.
     78\label{p:VirtualCast}
    6479\begin{cfa}
    6580(virtual TYPE)EXPRESSION
    6681\end{cfa}
    67 Note, the syntax and semantics matches a C-cast, rather than the unusual \CC
    68 syntax for special casts. Both the type of @EXPRESSION@ and @TYPE@ must be a
    69 pointer to a virtual type. The cast dynamically checks if the @EXPRESSION@ type
    70 is the same or a subtype of @TYPE@, and if true, returns a pointer to the
     82Note, the syntax and semantics matches a C-cast, rather than the function-like
     83\Cpp syntax for special casts. Both the type of @EXPRESSION@ and @TYPE@ must be
     84a pointer to a virtual type.
     85The cast dynamically checks if the @EXPRESSION@ type is the same or a subtype
     86of @TYPE@, and if true, returns a pointer to the
    7187@EXPRESSION@ object, otherwise it returns @0p@ (null pointer).
    7288
     
    7793
    7894Exceptions are defined by the trait system; there are a series of traits, and
    79 if a type satisfies them, then it can be used as an exception.  The following
     95if a type satisfies them, then it can be used as an exception. The following
    8096is the base trait all exceptions need to match.
    8197\begin{cfa}
    8298trait is_exception(exceptT &, virtualT &) {
    83         virtualT const & @get_exception_vtable@(exceptT *);
     99        virtualT const & get_exception_vtable(exceptT *);
    84100};
    85101\end{cfa}
    86 The function takes any pointer, including the null pointer, and returns a
    87 reference to the virtual-table object. Defining this function also establishes
    88 the virtual type and a virtual-table pair to the \CFA type-resolver and
    89 promises @exceptT@ is a virtual type and a child of the base exception-type.
    90 
    91 {\color{blue} PAB: I do not understand this paragraph.}
    92 One odd thing about @get_exception_vtable@ is that it should always be a
    93 constant function, returning the same value regardless of its argument.  A
    94 pointer or reference to the virtual table instance could be used instead,
    95 however using a function has some ease of implementation advantages and allows
    96 for easier disambiguation because the virtual type name (or the address of an
    97 instance that is in scope) can be used instead of the mangled virtual table
    98 name.  Also note the use of the word ``promise'' in the trait
    99 description. Currently, \CFA cannot check to see if either @exceptT@ or
    100 @virtualT@ match the layout requirements. This is considered part of
    101 @get_exception_vtable@'s correct implementation.
     102The trait is defined over two types, the exception type and the virtual table
     103type. This should be one-to-one, each exception type has only one virtual
     104table type and vice versa. The only assertion in the trait is
     105@get_exception_vtable@, which takes a pointer of the exception type and
     106returns a reference to the virtual table type instance.
     107
     108The function @get_exception_vtable@ is actually a constant function.
     109Recardless of the value passed in (including the null pointer) it should
     110return a reference to the virtual table instance for that type.
     111The reason it is a function instead of a constant is that it make type
     112annotations easier to write as you can use the exception type instead of the
     113virtual table type; which usually has a mangled name.
     114% Also \CFA's trait system handles functions better than constants and doing
     115% it this way
     116
     117% I did have a note about how it is the programmer's responsibility to make
     118% sure the function is implemented correctly. But this is true of every
     119% similar system I know of (except Agda's I guess) so I took it out.
    102120
    103121\section{Raise}
    104 \CFA provides two kinds of exception raise: termination (see
    105 \VRef{s:Termination}) and resumption (see \VRef{s:Resumption}), which are
     122\CFA provides two kinds of exception raise: termination
     123\see{\VRef{s:Termination}} and resumption \see{\VRef{s:Resumption}}, which are
    106124specified with the following traits.
    107125\begin{cfa}
    108126trait is_termination_exception(
    109127                exceptT &, virtualT & | is_exception(exceptT, virtualT)) {
    110         void @defaultTerminationHandler@(exceptT &);
     128        void defaultTerminationHandler(exceptT &);
    111129};
    112130\end{cfa}
     
    118136trait is_resumption_exception(
    119137                exceptT &, virtualT & | is_exception(exceptT, virtualT)) {
    120         void @defaultResumptionHandler@(exceptT &);
     138        void defaultResumptionHandler(exceptT &);
    121139};
    122140\end{cfa}
     
    125143
    126144Finally there are three convenience macros for referring to the these traits:
    127 @IS_EXCEPTION@, @IS_TERMINATION_EXCEPTION@ and @IS_RESUMPTION_EXCEPTION@.  Each
    128 takes the virtual type's name, and for polymorphic types only, the
    129 parenthesized list of polymorphic arguments. These macros do the name mangling
    130 to get the virtual-table name and provide the arguments to both sides
    131 {\color{blue}(PAB: What's a ``side''?)}
     145@IS_EXCEPTION@, @IS_TERMINATION_EXCEPTION@ and @IS_RESUMPTION_EXCEPTION@.
     146All three traits are hard to use while naming the virtual table as it has an
     147internal mangled name. These macros take the exception name as their first
     148argument and do the mangling. They all take a second argument for polymorphic
     149types which is the parenthesized list of polymorphic arguments. These
     150arguments are passed to both the exception type and the virtual table type as
     151the arguments do have to match.
     152
     153For example consider a function that is polymorphic over types that have a
     154defined arithmetic exception:
     155\begin{cfa}
     156forall(Num | IS_EXCEPTION(Arithmetic, (Num)))
     157void some_math_function(Num & left, Num & right);
     158\end{cfa}
    132159
    133160\subsection{Termination}
     
    146173throw EXPRESSION;
    147174\end{cfa}
    148 The expression must return a termination-exception reference, where the
    149 termination exception has a type with a @void defaultTerminationHandler(T &)@
    150 (default handler) defined. The handler is found at the call site using \CFA's
    151 trait system and passed into the exception system along with the exception
    152 itself.
    153 
    154 At runtime, a representation of the exception type and an instance of the
    155 exception type is copied into managed memory (heap) to ensure it remains in
     175The expression must return a reference to a termination exception, where the
     176termination exception is any type that satifies @is_termination_exception@
     177at the call site.
     178Through \CFA's trait system the functions in the traits are passed into the
     179throw code. A new @defaultTerminationHandler@ can be defined in any scope to
     180change the throw's behavior (see below).
     181
     182At runtime, the exception returned by the expression
     183is copied into managed memory (heap) to ensure it remains in
    156184scope during unwinding. It is the user's responsibility to ensure the original
    157185exception object at the throw is freed when it goes out of scope. Being
     
    165193try {
    166194        GUARDED_BLOCK
    167 } @catch (EXCEPTION_TYPE$\(_1\)$ * NAME)@ { // termination handler 1
     195} catch (EXCEPTION_TYPE$\(_1\)$ * NAME$\(_1\)$) { // termination handler 1
    168196        HANDLER_BLOCK$\(_1\)$
    169 } @catch (EXCEPTION_TYPE$\(_2\)$ * NAME)@ { // termination handler 2
     197} catch (EXCEPTION_TYPE$\(_2\)$ * NAME$\(_2\)$) { // termination handler 2
    170198        HANDLER_BLOCK$\(_2\)$
    171199}
     
    178206Exception matching checks the representation of the thrown exception-type is
    179207the same or a descendant type of the exception types in the handler clauses. If
    180 there is a match, a pointer to the exception object created at the throw is
    181 bound to @NAME@ and the statements in the associated @HANDLER_BLOCK@ are
    182 executed. If control reaches the end of the handler, the exception is freed,
    183 and control continues after the try statement.
     208it is the same of a descendent of @EXCEPTION_TYPE@$_i$ then @NAME@$_i$ is
     209bound to a pointer to the exception and the statements in @HANDLER_BLOCK@$_i$
     210are executed. If control reaches the end of the handler, the exception is
     211freed and control continues after the try statement.
    184212
    185213The default handler visible at the throw statement is used if no matching
    186214termination handler is found after the entire stack is searched. At that point,
    187215the default handler is called with a reference to the exception object
    188 generated at the throw. If the default handler returns, the system default
    189 action is executed, which often terminates the program. This feature allows
     216generated at the throw. If the default handler returns, control continues
     217from after the throw statement. This feature allows
    190218each exception type to define its own action, such as printing an informative
    191219error message, when an exception is not handled in the program.
     220However the default handler for all exception types triggers a cancellation
     221using the exception.
    192222
    193223\subsection{Resumption}
     
    196226Resumption raise, called ``resume'', is as old as termination
    197227raise~\cite{Goodenough75} but is less popular. In many ways, resumption is
    198 simpler and easier to understand, as it is simply a dynamic call (as in
    199 Lisp). The semantics of resumption is: search the stack for a matching handler,
     228simpler and easier to understand, as it is simply a dynamic call.
     229The semantics of resumption is: search the stack for a matching handler,
    200230execute the handler, and continue execution after the resume. Notice, the stack
    201231cannot be unwound because execution returns to the raise point. Resumption is
     
    209239\end{cfa}
    210240The semantics of the @throwResume@ statement are like the @throw@, but the
    211 expression has a type with a @void defaultResumptionHandler(T &)@ (default
    212 handler) defined, where the handler is found at the call site by the type
    213 system.  At runtime, a representation of the exception type and an instance of
    214 the exception type is \emph{not} copied because the stack is maintained during
    215 the handler search.
     241expression has return a reference a type that satifies the trait
     242@is_resumption_exception@. Like with termination the exception system can
     243use these assertions while (throwing/raising/handling) the exception.
     244
     245At runtime, no copies are made. As the stack is not unwound the exception and
     246any values on the stack will remain in scope while the resumption is handled.
    216247
    217248Then the exception system searches the stack starting from the resume and
    218 proceeding towards the base of the stack, from callee to caller. At each stack
     249proceeding to the base of the stack, from callee to caller. At each stack
    219250frame, a check is made for resumption handlers defined by the @catchResume@
    220251clauses of a @try@ statement.
     
    222253try {
    223254        GUARDED_BLOCK
    224 } @catchResume (EXCEPTION_TYPE$\(_1\)$ * NAME)@ { // resumption handler 1
     255} catchResume (EXCEPTION_TYPE$\(_1\)$ * NAME$\(_1\)$) {
    225256        HANDLER_BLOCK$\(_1\)$
    226 } @catchResume (EXCEPTION_TYPE$\(_2\)$ * NAME)@ { // resumption handler 2
     257} catchResume (EXCEPTION_TYPE$\(_2\)$ * NAME$\(_2\)$) {
    227258        HANDLER_BLOCK$\(_2\)$
    228259}
     
    253284current point on the stack because new try statements may have been pushed by
    254285the handler or functions called from the handler. If there is no match back to
    255 the point of the current handler, the search skips the stack frames already
    256 searched by the first resume and continues after the try statement. The default
    257 handler always continues from default handler associated with the point where
    258 the exception is created.
     286the point of the current handler, the search skips\label{p:searchskip} the
     287stack frames already searched by the first resume and continues after
     288the try statement. The default handler always continues from default
     289handler associated with the point where the exception is created.
    259290
    260291% This might need a diagram. But it is an important part of the justification
     
    275306\end{verbatim}
    276307
    277 This resumption search-pattern reflect the one for termination, which matches
    278 with programmer expectations. However, it avoids the \emph{recursive
    279 resumption} problem. If parts of the stack are searched multiple times, loops
     308This resumption search pattern reflects the one for termination, and so
     309should come naturally to most programmers.
     310However, it avoids the \emph{recursive resumption} problem.
     311If parts of the stack are searched multiple times, loops
    280312can easily form resulting in infinite recursion.
    281313
     
    283315\begin{cfa}
    284316try {
    285         throwResume$\(_1\)$ (E &){};
    286 } catch( E * ) {
    287         throwResume;
    288 }
    289 \end{cfa}
    290 Based on termination semantics, programmer expectation is for the re-resume to
    291 continue searching the stack frames after the try statement. However, the
    292 current try statement is still on the stack below the handler issuing the
    293 reresume (see \VRef{s:Reraise}). Hence, the try statement catches the re-raise
    294 again and does another re-raise \emph{ad infinitum}, which is confusing and
    295 difficult to debug. The \CFA resumption search-pattern skips the try statement
    296 so the reresume search continues after the try, mathcing programmer
    297 expectation.
     317        throwResume (E &){}; // first
     318} catchResume(E *) {
     319        throwResume (E &){}; // second
     320}
     321\end{cfa}
     322If this handler is ever used it will be placed on top of the stack above the
     323try statement. If the stack was not masked than the @throwResume@ in the
     324handler would always be caught by the handler, leading to an infinite loop.
     325Masking avoids this problem and other more complex versions of it involving
     326multiple handlers and exception types.
     327
     328Other masking stratagies could be used; such as masking the handlers that
     329have caught an exception. This one was choosen because it creates a symmetry
     330with termination (masked sections of the stack would be unwound with
     331termination) and having only one pattern to learn is easier.
    298332
    299333\section{Conditional Catch}
    300 Both termination and resumption handler-clauses may perform conditional matching:
     334Both termination and resumption handler clauses can be given an additional
     335condition to further control which exceptions they handle:
    301336\begin{cfa}
    302337catch (EXCEPTION_TYPE * NAME ; @CONDITION@)
     
    305340exception matches, @CONDITION@ is executed. The condition expression may
    306341reference all names in scope at the beginning of the try block and @NAME@
    307 introduced in the handler clause.  If the condition is true, then the handler
     342introduced in the handler clause. If the condition is true, then the handler
    308343matches. Otherwise, the exception search continues at the next appropriate kind
    309344of handler clause in the try block.
     
    322357
    323358\section{Reraise}
     359\color{red}{From Andrew: I recomend we talk about why the language doesn't
     360have rethrows/reraises instead.}
     361
    324362\label{s:Reraise}
    325363Within the handler block or functions called from the handler block, it is
     
    327365@throwResume@, respective.
    328366\begin{cfa}
    329 catch( ... ) {
     367try {
     368        ...
     369} catch( ... ) {
    330370        ... throw; // rethrow
    331371} catchResume( ... ) {
     
    340380handler is generated that does a program-level abort.
    341381
    342 
    343382\section{Finally Clauses}
    344383A @finally@ clause may be placed at the end of a @try@ statement.
     
    346385try {
    347386        GUARDED_BLOCK
    348 } ...   // any number or kind of handler clauses
    349 } finally {
     387} ... // any number or kind of handler clauses
     388... finally {
    350389        FINALLY_BLOCK
    351390}
    352391\end{cfa}
    353 The @FINALLY_BLOCK@ is executed when the try statement is unwound from the
    354 stack, \ie when the @GUARDED_BLOCK@ or any handler clause finishes. Hence, the
    355 finally block is always executed.
     392The @FINALLY_BLOCK@ is executed when the try statement is removed from the
     393stack, including when the @GUARDED_BLOCK@ or any handler clause finishes or
     394during an unwind.
     395The only time the block is not executed is if the program is exited before
     396that happens.
    356397
    357398Execution of the finally block should always finish, meaning control runs off
    358399the end of the block. This requirement ensures always continues as if the
    359400finally clause is not present, \ie finally is for cleanup not changing control
    360 flow.  Because of this requirement, local control flow out of the finally block
    361 is forbidden.  The compiler precludes any @break@, @continue@, @fallthru@ or
     401flow. Because of this requirement, local control flow out of the finally block
     402is forbidden. The compiler precludes any @break@, @continue@, @fallthru@ or
    362403@return@ that causes control to leave the finally block. Other ways to leave
    363404the finally block, such as a long jump or termination are much harder to check,
     
    369410possible forwards the cancellation exception to a different stack.
    370411
     412Cancellation is not an exception operation like termination or resumption.
    371413There is no special statement for starting a cancellation; instead the standard
    372 library function @cancel_stack@ is called passing an exception.  Unlike a
     414library function @cancel_stack@ is called passing an exception. Unlike a
    373415raise, this exception is not used in matching only to pass information about
    374416the cause of the cancellation.
     
    377419\begin{description}
    378420\item[Main Stack:]
    379 
    380421The main stack is the one used by the program main at the start of execution,
    381 and is the only stack in a sequential program.  Hence, when cancellation is
    382 forwarded to the main stack, there is no other forwarding stack, so after the
    383 stack is unwound, there is a program-level abort.
     422and is the only stack in a sequential program. Even in a concurrent program
     423the main stack is only dependent on the environment that started the program.
     424Hence, when the main stack is cancelled there is nowhere else in the program
     425to notify. After the stack is unwound, there is a program-level abort.
    384426
    385427\item[Thread Stack:]
    386428A thread stack is created for a @thread@ object or object that satisfies the
    387 @is_thread@ trait.  A thread only has two points of communication that must
     429@is_thread@ trait. A thread only has two points of communication that must
    388430happen: start and join. As the thread must be running to perform a
    389 cancellation, it must occur after start and before join, so join is a
    390 cancellation point.  After the stack is unwound, the thread halts and waits for
    391 another thread to join with it. The joining thread, checks for a cancellation,
     431cancellation, it must occur after start and before join, so join is used
     432for communication here.
     433After the stack is unwound, the thread halts and waits for
     434another thread to join with it. The joining thread checks for a cancellation,
    392435and if present, resumes exception @ThreadCancelled@.
    393436
     
    397440the exception is not caught. The implicit join does a program abort instead.
    398441
    399 This semantics is for safety. One difficult problem for any exception system is
    400 defining semantics when an exception is raised during an exception search:
    401 which exception has priority, the original or new exception? No matter which
    402 exception is selected, it is possible for the selected one to disrupt or
    403 destroy the context required for the other. {\color{blue} PAB: I do not
    404 understand the following sentences.} This loss of information can happen with
    405 join but as the thread destructor is always run when the stack is being unwound
    406 and one termination/cancellation is already active. Also since they are
    407 implicit they are easier to forget about.
     442This semantics is for safety. If an unwind is triggered while another unwind
     443is underway only one of them can proceed as they both want to ``consume'' the
     444stack. Letting both try to proceed leads to very undefined behaviour.
     445Both termination and cancellation involve unwinding and, since the default
     446@defaultResumptionHandler@ preforms a termination that could more easily
     447happen in an implicate join inside a destructor. So there is an error message
     448and an abort instead.
     449
     450The recommended way to avoid the abort is to handle the intial resumption
     451from the implicate join. If required you may put an explicate join inside a
     452finally clause to disable the check and use the local
     453@defaultResumptionHandler@ instead.
    408454
    409455\item[Coroutine Stack:] A coroutine stack is created for a @coroutine@ object
    410 or object that satisfies the @is_coroutine@ trait.  A coroutine only knows of
    411 two other coroutines, its starter and its last resumer.  The last resumer has
    412 the tightest coupling to the coroutine it activated.  Hence, cancellation of
     456or object that satisfies the @is_coroutine@ trait. A coroutine only knows of
     457two other coroutines, its starter and its last resumer. The last resumer has
     458the tightest coupling to the coroutine it activated. Hence, cancellation of
    413459the active coroutine is forwarded to the last resumer after the stack is
    414460unwound, as the last resumer has the most precise knowledge about the current
  • doc/theses/andrew_beach_MMath/future.tex

    rda3963a r565acf59  
    11\chapter{Future Work}
    22
     3\section{Language Improvements}
     4\CFA is a developing programming language. As such, there are partially or
     5unimplemented features of the language (including several broken components)
     6that I had to workaround while building an exception handling system largely in
     7the \CFA language (some C components).  The following are a few of these
     8issues, and once implemented/fixed, how this would affect the exception system.
     9\begin{itemize}
     10\item
     11The implementation of termination is not portable because it includes
     12hand-crafted assembly statements. These sections must be ported by hand to
     13support more hardware architectures, such as the ARM processor.
     14\item
     15Due to a type-system problem, the catch clause cannot bind the exception to a
     16reference instead of a pointer. Since \CFA has a very general reference
     17capability, programmers will want to use it. Once fixed, this capability should
     18result in little or no change in the exception system.
     19\item
     20Termination handlers cannot use local control-flow transfers, \eg by @break@,
     21@return@, \etc. The reason is that current code generation hoists a handler
     22into a nested function for convenience (versus assemble-code generation at the
     23@try@ statement). Hence, when the handler runs, its code is not in the lexical
     24scope of the @try@ statement, where the local control-flow transfers are
     25meaningful.
     26\item
     27There is no detection of colliding unwinds. It is possible for clean-up code
     28run during an unwind to trigger another unwind that escapes the clean-up code
     29itself; such as a termination exception caught further down the stack or a
     30cancellation. There do exist ways to handle this but currently they are not
     31even detected and the first unwind will simply be forgotten, often leaving
     32it in a bad state.
     33\item
     34Also the exception system did not have a lot of time to be tried and tested.
     35So just letting people use the exception system more will reveal new
     36quality of life upgrades that can be made with time.
     37\end{itemize}
     38
    339\section{Complete Virtual System}
    4 The virtual system should be completed. It was never supposed to be part of
    5 this project and so minimal work was done on it. A draft of what the complete
    6 system might look like was created but it was never finalized or implemented.
    7 A future project in \CFA would be to complete that work and to update the
    8 parts of the exception system that use the current version.
     40The virtual system should be completed. It was not supposed to be part of this
     41project, but was thrust upon it to do exception inheritance; hence, only
     42minimal work was done. A draft for a complete virtual system is available but
     43it is not finalized.  A future \CFA project is to complete that work and then
     44update the exception system that uses the current version.
    945
    10 There are several improvements to the virtual system that would improve
    11 the exception traits. The biggest one is an assertion that checks that one
    12 virtual type is a child of another virtual type. This would capture many of
    13 the requirements much more precisely.
     46There are several improvements to the virtual system that would improve the
     47exception traits. The most important one is an assertion to check one virtual
     48type is a child of another. This check precisely captures many of the
     49correctness requirements.
    1450
    1551The full virtual system might also include other improvement like associated
    16 types. This is a proposed feature that would allow traits to refer to types
    17 not listed in their header. This would allow the exception traits to not
    18 refer to the virtual table type explicatly which would remove the need for
    19 the interface macros.
     52types to allow traits to refer to types not listed in their header. This
     53feature allows exception traits to not refer to the virtual-table type
     54explicitly, removing the need for the current interface macros.
    2055
    21 \section{Additional Throws}
    22 Several other kinds of throws, beyond the termination throw (@throw@),
    23 the resumption throw (@throwResume@) and the re-throws, were considered.
    24 None were as useful as the core throws but they would likely be worth
    25 revising.
     56\section{Additional Raises}
     57Several other kinds of exception raises were considered beyond termination
     58(@throw@), resumption (@throwResume@), and reraise.
    2659
    27 The first ones are throws for asynchronous exceptions, throwing exceptions
    28 from one stack to another. These act like signals allowing for communication
    29 between the stacks. This is usually used with resumption as it allows the
    30 target stack to continue execution normally after the exception has been
    31 handled.
     60The first is a non-local/concurrent raise providing asynchronous exceptions,
     61\ie raising an exception on another stack. This semantics acts like signals
     62allowing for out-of-band communication among coroutines and threads. This kind
     63of raise is often restricted to resumption to allow the target stack to
     64continue execution normally after the exception has been handled. That is,
     65allowing one coroutine/thread to unwind the stack of another via termination is
     66bad software engineering.
    3267
    33 This would much more coordination between the concurrency system and the
    34 exception system to handle. Most of the interesting design decisions around
    35 applying asynchronous exceptions appear to be around masking (controlling
    36 which exceptions may be thrown at a stack). It would likely require more of
    37 the virtual system and would also effect how default handlers are set.
     68Non-local/concurrent requires more coordination between the concurrency system
     69and the exception system. Many of the interesting design decisions centre
     70around masking (controlling which exceptions may be thrown at a stack). It
     71would likely require more of the virtual system and would also effect how
     72default handlers are set.
    3873
    39 The other throws were designed to mimic bidirectional algebraic effects.
    40 Algebraic effects are used in some functional languages and allow a function
     74Other raises were considered to mimic bidirectional algebraic effects.
     75Algebraic effects are used in some functional languages allowing one function
    4176to have another function on the stack resolve an effect (which is defined with
    42 a function-like interface).
    43 These can be mimiced with resumptions and the the new throws were designed
    44 to try and mimic bidirectional algebraic effects, where control can go back
    45 and forth between the function effect caller and handler while the effect
    46 is underway.
     77a functional-like interface).  This semantics can be mimicked with resumptions
     78and new raises were discussed to mimic bidirectional algebraic-effects, where
     79control can go back and forth between the function-effect caller and handler
     80while the effect is underway.
    4781% resume-top & resume-reply
     82These raises would be like the resumption raise except using different search
     83patterns to find the handler.
    4884
    49 These throws would likely be just like the resumption throw except they would
    50 use different search patterns to find the handler to reply to.
     85\section{Zero-Cost Try}
     86\CFA does not have zero-cost try-statements because the compiler generates C
     87code rather than assembler code \see{\VPageref{p:zero-cost}}. When the compiler
     88does create its own assembly (or LLVM byte-code), then zero-cost try-statements
     89are possible. The downside of zero-cost try-statements is the LSDA complexity,
     90its size (program bloat), and the high cost of raising an exception.
    5191
    52 \section{Zero-Cost Exceptions}
    53 \CFA does not have zero-cost exceptions because it does not generate assembly
    54 but instead generates C code. See the implementation section. When the
    55 compiler does start to create its own assembly (or LLVM byte code) then
    56 zero-cost exceptions could be implemented.
     92Alternatively, some research could be done into the simpler alternative method
     93with a non-zero-cost try-statement but much lower cost exception raise. For
     94example, programs are starting to use exception in the normal control path, so
     95more exceptions are thrown. In these cases, the cost balance switches towards
     96low-cost raise. Unfortunately, while exceptions remain exceptional, the
     97libunwind model will probably remain the most effective option.
    5798
    58 Now in zero-cost exceptions the only part that is zero-cost are the try
    59 blocks. Some research could be done into the alternative methods for systems
    60 that expect a lot more exceptions to be thrown, allowing some overhead in
    61 entering and leaving try blocks to make throws faster. But while exceptions
    62 remain exceptional the libunwind model will probably remain the most effective
    63 option.
     99Zero-cost resumptions is still an open problem. First, because libunwind does
     100not support a successful-exiting stack-search without doing an unwind.
     101Workarounds are possible but awkward. Ideally an extension to libunwind could
     102be made, but that would either require separate maintenance or gain enough
     103support to have it folded into the standard.
    64104
    65 Zero-cost resumptions have more problems to solve. First because libunwind
    66 does not support a successful exiting stack search without doing an unwind.
    67 There are several ways to hack that functionality in. Ideally an extension to
    68 libunwind could be made, but that would either require seperate maintenance
    69 or gain enough support to have it folded into the standard.
    70 
    71 Also new techniques to skip previously searched parts of the stack will have
    72 to be developed. The recursive resume problem still remains and ideally the
    73 same pattern of ignoring sections of the stack.
     105Also new techniques to skip previously searched parts of the stack need to be
     106developed to handle the recursive resume problem and support advanced algebraic
     107effects.
    74108
    75109\section{Signal Exceptions}
    76 Exception Handling: Issues and a Proposed Notation suggests there are three
    77 types of exceptions: escape, notify and signal.
    78 Escape exceptions are our termination exceptions, notify exceptions are
    79 resumption exceptions and that leaves signal exception unimplemented.
     110Goodenough~\cite{Goodenough75} suggests three types of exceptions: escape,
     111notify and signal.  Escape are termination exceptions, notify are resumption
     112exceptions, leaving signal unimplemented.
    80113
    81 Signal exceptions allow either behaviour, that is after the exception is
    82 handled control can either return to the throw or from where the handler is
    83 defined.
     114A signal exception allows either behaviour, \ie after an exception is handled,
     115the handler has the option of returning to the raise or after the @try@
     116statement. Currently, \CFA fixes the semantics of the handler return
     117syntactically by the @catch@ or @catchResume@ clause.
    84118
    85 The design should be rexamined and be updated for \CFA. A very direct
    86 translation would perhaps have a new throw and catch pair and a statement
    87 (or statements) could be used to decide if the handler returns to the throw
    88 or continues where it is, but there are other options.
     119Signal exception should be reexamined and possibly be supported in \CFA. A very
     120direct translation is to have a new raise and catch pair, and a new statement
     121(or statements) would indicate if the handler returns to the raise or continues
     122where it is; but there may be other options.
    89123
    90 For instance resumption could be extended to cover this use by allowing
    91 local control flow out of it. This would require an unwind as part of the
    92 transition as there are stack frames that have to be removed.
    93 This would mean there is no notify like throw but because \CFA does not have
    94 exception signatures a termination can be thrown from any resumption handler
    95 already so there are already ways one could try to do this in existing \CFA.
     124For instance, resumption could be extended to cover this use by allowing local
     125control flow out of it. This approach would require an unwind as part of the
     126transition as there are stack frames that have to be removed.  This approach
     127means there is no notify raise, but because \CFA does not have exception
     128signatures, a termination can be thrown from within any resumption handler so
     129there is already a way to do mimic this in existing \CFA.
    96130
    97131% Maybe talk about the escape; and escape CONTROL_STMT; statements or how
    98132% if we could choose if _Unwind_Resume proceeded to the clean-up stage this
    99133% would be much easier to implement.
    100 
    101 \section{Language Improvements}
    102 There is also a lot of work that are not follow ups to this work in terms of
    103 research, some have no interesting research to be done at all, but would
    104 improve \CFA as a programming language. The full list of these would
    105 naturally be quite extensive but here are a few examples that involve
    106 exceptions:
    107 
    108 \begin{itemize}
    109 \item The implementation of termination is not portable because it includes
    110 some assembly statements. These sections will have to be re-written to so
    111 \CFA has full support on more machines.
    112 \item Allowing exception handler to bind the exception to a reference instead
    113 of a pointer. This should actually result in no change in behaviour so there
    114 is no reason not to allow it. It is however a small improvement; giving a bit
    115 of flexibility to the user in what style they want to use.
    116 \item Enabling local control flow (by @break@, @return@ and
    117 similar statements) out of a termination handler. The current set-up makes
    118 this very difficult but the catch function that runs the handler after it has
    119 been matched could be inlined into the function's body, which would make this
    120 much easier. (To do the same for try blocks would probably wait for zero-cost
    121 exceptions, which would allow the try block to be inlined as well.)
    122 \end{itemize}
  • doc/theses/andrew_beach_MMath/implement.tex

    rda3963a r565acf59  
    22% Goes over how all the features are implemented.
    33
     4The implementation work for this thesis covers two components: the virtual
     5system and exceptions. Each component is discussed in detail.
     6
    47\section{Virtual System}
     8\label{s:VirtualSystem}
    59% Virtual table rules. Virtual tables, the pointer to them and the cast.
    6 The \CFA virtual system only has one public facing feature: virtual casts.
    7 However there is a lot of structure to support that and provide some other
    8 features for the standard library.
    9 
    10 All of this is accessed through a field inserted at the beginning of every
    11 virtual type. Currently it is called @virtual_table@ but it is not
    12 ment to be accessed by the user. This field is a pointer to the type's
    13 virtual table instance. It is assigned once during the object's construction
    14 and left alone after that.
    15 
    16 \subsection{Virtual Table Construction}
    17 For each virtual type a virtual table is constructed. This is both a new type
    18 and an instance of that type. Other instances of the type could be created
    19 but the system doesn't use them. So this section will go over the creation of
    20 the type and the instance.
    21 
    22 Creating the single instance is actually very important. The address of the
    23 table acts as the unique identifier for the virtual type. Similarly the first
    24 field in every virtual table is the parent's id; a pointer to the parent
    25 virtual table instance.
    26 
    27 The remaining fields contain the type's virtual members. First come the ones
    28 present on the parent type, in the same order as they were the parent, and
    29 then any that this type introduces. The types of the ones inherited from the
    30 parent may have a slightly modified type, in that references to the
    31 dispatched type are replaced with the current virtual type. These are always
    32 taken by pointer or reference.
    33 
    34 The structure itself is created where the virtual type is created. The name
    35 of the type is created by mangling the name of the base type. The name of the
    36 instance is also generated by name mangling.
    37 
    38 The fields are initialized automatically.
     10While the \CFA virtual system currently has only one public feature, virtual
     11cast \see{\VPageref{p:VirtualCast}}, substantial structure is required to
     12support it, and provide features for exception handling and the standard
     13library.
     14
     15\subsection{Virtual Table}
     16The virtual system is accessed through a private constant field inserted at the
     17beginning of every virtual type, called the virtual-table pointer. This field
     18points at a type's virtual table and is assigned during the object's
     19construction.  The address of a virtual table acts as the unique identifier for
     20the virtual type, and the first field of a virtual table is a pointer to the
     21parent virtual-table or @0p@.  The remaining fields are duplicated from the
     22parent tables in this type's inheritance chain, followed by any fields this type
     23introduces. Parent fields are duplicated so they can be changed (\CC
     24\lstinline[language=c++]|override|), so that references to the dispatched type
     25are replaced with the current virtual type.
     26\PAB{Can you create a simple diagram of the layout?}
     27% These are always taken by pointer or reference.
     28
     29% For each virtual type, a virtual table is constructed. This is both a new type
     30% and an instance of that type. Other instances of the type could be created
     31% but the system doesn't use them. So this section will go over the creation of
     32% the type and the instance.
     33
     34A virtual table is created when the virtual type is created. The name of the
     35type is created by mangling the name of the base type. The name of the instance
     36is also generated by name mangling.  The fields are initialized automatically.
    3937The parent field is initialized by getting the type of the parent field and
    4038using that to calculate the mangled name of the parent's virtual table type.
    4139There are two special fields that are included like normal fields but have
    4240special initialization rules: the @size@ field is the type's size and is
    43 initialized with a sizeof expression, the @align@ field is the type's
    44 alignment and uses an alignof expression. The remaining fields are resolved
    45 to a name matching the field's name and type using the normal visibility
    46 and overload resolution rules of the type system.
    47 
    48 These operations are split up into several groups depending on where they
    49 take place which can vary for monomorphic and polymorphic types. The first
    50 devision is between the declarations and the definitions. Declarations, such
    51 as a function signature or a structure's name, must always be visible but may
    52 be repeated so they go in headers. Definitions, such as function bodies and a
    53 structure's layout, don't have to be visible on use but must occur exactly
    54 once and go into source files.
    55 
     41initialized with a @sizeof@ expression, the @align@ field is the type's
     42alignment and uses an @alignof@ expression. The remaining fields are resolved
     43to a name matching the field's name and type using the normal visibility and
     44overload resolution rules of the type system.
     45
     46These operations are split up into several groups depending on where they take
     47place which varies for monomorphic and polymorphic types. The first devision is
     48between the declarations and the definitions. Declarations, such as a function
     49signature or a aggregate's name, must always be visible but may be repeated in
     50the form of forward declarations in headers. Definitions, such as function
     51bodies and a aggregate's layout, can be separately compiled but must occur
     52exactly once in a source file.
     53
     54\begin{sloppypar}
    5655The declarations include the virtual type definition and forward declarations
    5756of the virtual table instance, constructor, message function and
    58 @get_exception_vtable@. The definition includes the storage and
    59 initialization of the virtual table instance and the bodies of the three
    60 functions.
     57@get_exception_vtable@. The definition includes the storage and initialization
     58of the virtual table instance and the bodies of the three functions.
     59\end{sloppypar}
    6160
    6261Monomorphic instances put all of these two groups in one place each.
    63 
    64 Polymorphic instances also split out the core declarations and definitions
    65 from the per-instance information. The virtual table type and most of the
    66 functions are polymorphic so they are all part of the core. The virtual table
    67 instance and the @get_exception_vtable@ function.
    68 
     62Polymorphic instances also split out the core declarations and definitions from
     63the per-instance information. The virtual table type and most of the functions
     64are polymorphic so they are all part of the core. The virtual table instance
     65and the @get_exception_vtable@ function.
     66
     67\begin{sloppypar}
    6968Coroutines and threads need instances of @CoroutineCancelled@ and
    70 @ThreadCancelled@ respectively to use all of their functionality.
    71 When a new data type is declared with @coroutine@ or @thread@
    72 the forward declaration for the instance is created as well. The definition
    73 of the virtual table is created at the definition of the main function.
     69@ThreadCancelled@ respectively to use all of their functionality.  When a new
     70data type is declared with @coroutine@ or @thread@ the forward declaration for
     71the instance is created as well. The definition of the virtual table is created
     72at the definition of the main function.
     73\end{sloppypar}
    7474
    7575\subsection{Virtual Cast}
    76 Virtual casts are implemented as a function call that does the check and a
    77 old C-style cast to do the type conversion. The C-cast is just to make sure
    78 the generated code is correct so the rest of the section is about that
    79 function.
    80 
    81 The function is @__cfa__virtual_cast@ and it is implemented in the
    82 standard library. It takes a pointer to the target type's virtual table and
    83 the object pointer being cast. The function is very simple, getting the
    84 object's virtual table pointer and then checking to see if it or any of
    85 its ancestors, by using the parent pointers, are the same as the target type
    86 virtual table pointer. It does this in a simple loop.
    87 
    88 For the generated code a forward decaration of the virtual works as follows.
    89 There is a forward declaration of @__cfa__virtual_cast@ in every cfa
    90 file so it can just be used. The object argument is the expression being cast
    91 so that is just placed in the argument list.
    92 
    93 To build the target type parameter the compiler will create a mapping from
    94 concrete type-name -- so for polymorphic types the parameters are filled in
    95 -- to virtual table address. Every virtual table declaraction is added to the
    96 this table; repeats are ignored unless they have conflicting definitions.
    97 This does mean the declaractions have to be in scope, but they should usually
    98 be introduced as part of the type definition.
     76Virtual casts are implemented as a function call that does the subtype check
     77and a C coercion-cast to do the type conversion.
     78% The C-cast is just to make sure the generated code is correct so the rest of
     79% the section is about that function.
     80The function is
     81\begin{cfa}
     82void * __cfa__virtual_cast( struct __cfa__parent_vtable const * parent,
     83        struct __cfa__parent_vtable const * const * child );
     84}
     85\end{cfa}
     86and it is implemented in the standard library. It takes a pointer to the target
     87type's virtual table and the object pointer being cast. The function performs a
     88linear search starting at the object's virtual-table and walking through the
     89the parent pointers, checking to if it or any of its ancestors are the same as
     90the target-type virtual table-pointer.
     91
     92For the generated code, a forward declaration of the virtual works as follows.
     93There is a forward declaration of @__cfa__virtual_cast@ in every \CFA file so
     94it can just be used. The object argument is the expression being cast so that
     95is just placed in the argument list.
     96
     97To build the target type parameter, the compiler creates a mapping from
     98concrete type-name -- so for polymorphic types the parameters are filled in --
     99to virtual table address. Every virtual table declaration is added to the this
     100table; repeats are ignored unless they have conflicting definitions.  Note,
     101these declarations do not have to be in scope, but they should usually be
     102introduced as part of the type definition.
     103
     104\PAB{I do not understood all of \VRef{s:VirtualSystem}. I think you need to
     105write more to make it clear.}
     106
    99107
    100108\section{Exceptions}
     
    106114% resumption doesn't as well.
    107115
    108 Many modern languages work with an interal stack that function push and pop
    109 their local data to. Stack unwinding removes large sections of the stack,
    110 often across functions.
    111 
    112 At a very basic level this can be done with @setjmp@ \& @longjmp@
    113 which simply move the top of the stack, discarding everything on the stack
    114 above a certain point. However this ignores all the clean-up code that should
    115 be run when certain sections of the stack are removed (for \CFA these are from
    116 destructors and finally clauses) and also requires that the point to which the
    117 stack is being unwound is known ahead of time. libunwind is used to address
    118 both of these problems.
    119 
    120 Libunwind, provided in @unwind.h@ on most platorms, is a C library
    121 that provides \CPP style stack unwinding. Its operation is divided into two
    122 phases. The search phase -- phase 1 -- is used to scan the stack and decide
    123 where the unwinding will stop, this allows for a dynamic target. The clean-up
    124 phase -- phase 2 -- does the actual unwinding and also runs any clean-up code
    125 as it goes.
    126 
    127 To use the libunwind each function must have a personality function and an
    128 LSDA (Language Specific Data Area). Libunwind actually does very little, it
    129 simply moves down the stack from function to function. Most of the actions are
    130 implemented by the personality function which libunwind calls on every
    131 function. Since this is shared across many functions or even every function in
    132 a language it will need a bit more information. This is provided by the LSDA
    133 which has the unique information for each function.
    134 
    135 Theoretically the LSDA can contain anything but conventionally it is a table
    136 with entries reperenting areas of the function and what has to be done there
    137 during unwinding. These areas are described in terms of where the instruction
    138 pointer is. If the current value of the instruction pointer is between two
    139 values reperenting the beginning and end of a region then execution is
    140 currently being executed. These are used to mark out try blocks and the
    141 scopes of objects with destructors to run.
    142 
    143 GCC will generate an LSDA and attach its personality function with the
    144 @-fexceptions@ flag. However this only handles the cleanup attribute.
    145 This attribute is used on a variable and specifies a function that should be
    146 run when the variable goes out of scope. The function is passed a pointer to
    147 the object as well so it can be used to mimic destructors. It however cannot
    148 be used to mimic try statements.
    149 
    150 \subsection{Implementing Personality Functions}
    151 Personality functions have a complex interface specified by libunwind.
    152 This section will cover some of the important parts of that interface.
    153 
    154 \begin{lstlisting}
    155 typedef _Unwind_Reason_Code (*_Unwind_Personality_Fn)(
    156     int version,
    157     _Unwind_Action action,
    158     _Unwind_Exception_Class exception_class,
    159     _Unwind_Exception * exception,
    160     struct _Unwind_Context * context);
     116% Many modern languages work with an interal stack that function push and pop
     117% their local data to. Stack unwinding removes large sections of the stack,
     118% often across functions.
     119
     120Stack unwinding is the process of removing stack frames (activations) from the
     121stack. On function entry and return, unwinding is handled directly by the code
     122embedded in the function. Usually, the stack-frame size is known statically
     123based on parameter and local variable declarations.  For dynamically-sized
     124local variables, a runtime computation is necessary to know the frame
     125size. Finally, a function's frame-size may change during execution as local
     126variables (static or dynamic sized) go in and out of scope.
     127Allocating/deallocating stack space is usually an $O(1)$ operation achieved by
     128bumping the hardware stack-pointer up or down as needed.
     129
     130Unwinding across multiple stack frames is more complex because individual stack
     131management code associated with each frame is bypassed. That is, the location
     132of a function's frame-management code is largely unknown and dispersed
     133throughout the function, hence the current frame size managed by that code is
     134also unknown. Hence, code unwinding across frames does not have direct
     135knowledge about what is on the stack, and hence, how much of the stack needs to
     136be removed.
     137
     138% At a very basic level this can be done with @setjmp@ \& @longjmp@ which simply
     139% move the top of the stack, discarding everything on the stack above a certain
     140% point. However this ignores all the cleanup code that should be run when
     141% certain sections of the stack are removed (for \CFA these are from destructors
     142% and finally clauses) and also requires that the point to which the stack is
     143% being unwound is known ahead of time. libunwind is used to address both of
     144% these problems.
     145
     146The traditional unwinding mechanism for C is implemented by saving a snap-shot
     147of a function's state with @setjmp@ and restoring that snap-shot with
     148@longjmp@. This approach bypasses the need to know stack details by simply
     149reseting to a snap-shot of an arbitrary but existing function frame on the
     150stack. It is up to the programmer to ensure the snap-shot is valid when it is
     151reset, making this unwinding approach fragile with potential errors that are
     152difficult to debug because the stack becomes corrupted.
     153
     154However, many languages define cleanup actions that must be taken when objects
     155are deallocated from the stack or blocks end, such as running a variable's
     156destructor or a @try@ statement's @finally@ clause. Handling these mechanisms
     157requires walking the stack and checking each stack frame for these potential
     158actions.
     159
     160For exceptions, it must be possible to walk the stack frames in search of @try@
     161statements to match and execute a handler. For termination exceptions, it must
     162also be possible to unwind all stack frames from the throw to the matching
     163catch, and each of these frames must be checked for cleanup actions. Stack
     164walking is where most of the complexity and expense of exception handling
     165appears.
     166
     167One of the most popular tools for stack management is libunwind, a low-level
     168library that provides tools for stack walking, handler execution, and
     169unwinding. What follows is an overview of all the relevant features of
     170libunwind needed for this work, and how \CFA uses them to implement exception
     171handling.
     172
     173\subsection{libunwind Usage}
     174Libunwind, accessed through @unwind.h@ on most platforms, is a C library that
     175provides \CC-style stack-unwinding. Its operation is divided into two phases:
     176search and cleanup. The dynamic target search -- phase 1 -- is used to scan the
     177stack and decide where unwinding should stop (but no unwinding occurs). The
     178cleanup -- phase 2 -- does the unwinding and also runs any cleanup code.
     179
     180To use libunwind, each function must have a personality function and a Language
     181Specific Data Area (LSDA).  The LSDA has the unique information for each
     182function to tell the personality function where a function is executing, its
     183current stack frame, and what handlers should be checked.  Theoretically, the
     184LSDA can contain any information but conventionally it is a table with entries
     185representing regions of the function and what has to be done there during
     186unwinding. These regions are bracketed by the instruction pointer. If the
     187instruction pointer is within a region's start/end, then execution is currently
     188executing in that region. Regions are used to mark out the scopes of objects
     189with destructors and try blocks.
     190
     191% Libunwind actually does very little, it simply moves down the stack from
     192% function to function. Most of the actions are implemented by the personality
     193% function which libunwind calls on every function. Since this is shared across
     194% many functions or even every function in a language it will need a bit more
     195% information.
     196
     197The GCC compilation flag @-fexceptions@ causes the generation of an LSDA and
     198attaches its personality function. \PAB{to what is it attached?}  However, this
     199flag only handles the cleanup attribute
     200\begin{cfa}
     201void clean_up( int * var ) { ... }
     202int avar __attribute__(( __cleanup(clean_up) ));
     203\end{cfa}
     204which is used on a variable and specifies a function, \eg @clean_up@, run when
     205the variable goes out of scope. The function is passed a pointer to the object
     206so it can be used to mimic destructors. However, this feature cannot be used to
     207mimic @try@ statements.
     208
     209\subsection{Personality Functions}
     210Personality functions have a complex interface specified by libunwind.  This
     211section covers some of the important parts of the interface.
     212
     213A personality function performs four tasks, although not all have to be
     214present.
     215\begin{lstlisting}[language=C,{moredelim=**[is][\color{red}]{@}{@}}]
     216typedef _Unwind_Reason_Code (*@_Unwind_Personality_Fn@) (
     217        _Unwind_Action @action@,
     218        _Unwind_Exception_Class @exception_class@,
     219        _Unwind_Exception * @exception@,
     220        struct _Unwind_Context * @context@
     221);
    161222\end{lstlisting}
    162 
    163 The return value, the reason code, is an enumeration of possible messages
     223The @action@ argument is a bitmask of possible actions:
     224\begin{enumerate}
     225\item
     226@_UA_SEARCH_PHASE@ specifies a search phase and tells the personality function
     227to check for handlers.  If there is a handler in a stack frame, as defined by
     228the language, the personality function returns @_URC_HANDLER_FOUND@; otherwise
     229it return @_URC_CONTINUE_UNWIND@.
     230
     231\item
     232@_UA_CLEANUP_PHASE@ specifies a cleanup phase, where the entire frame is
     233unwound and all cleanup code is run. The personality function does whatever
     234cleanup the language defines (such as running destructors/finalizers) and then
     235generally returns @_URC_CONTINUE_UNWIND@.
     236
     237\item
     238\begin{sloppypar}
     239@_UA_HANDLER_FRAME@ specifies a cleanup phase on a function frame that found a
     240handler. The personality function must prepare to return to normal code
     241execution and return @_URC_INSTALL_CONTEXT@.
     242\end{sloppypar}
     243
     244\item
     245@_UA_FORCE_UNWIND@ specifies a forced unwind call. Forced unwind only performs
     246the cleanup phase and uses a different means to decide when to stop
     247\see{\VRef{s:ForcedUnwind}}.
     248\end{enumerate}
     249
     250The @exception_class@ argument is a copy of the
     251\lstinline[language=C]|exception|'s @exception_class@ field.
     252
     253The \lstinline[language=C]|exception| argument is a pointer to the user
     254provided storage object. It has two public fields, the exception class, which
     255is actually just a number, identifying the exception handling mechanism that
     256created it, and the cleanup function. The cleanup function is called if
     257required by the exception.
     258
     259The @context@ argument is a pointer to an opaque type passed to helper
     260functions called inside the personality function.
     261
     262The return value, @_Unwind_Reason_Code@, is an enumeration of possible messages
    164263that can be passed several places in libunwind. It includes a number of
    165264messages for special cases (some of which should never be used by the
     
    167266personality function should always return @_URC_CONTINUE_UNWIND@.
    168267
    169 The @version@ argument is the verson of the implementation that is
    170 calling the personality function. At this point it appears to always be 1 and
    171 it will likely stay that way until a new version of the API is updated.
    172 
    173 The @action@ argument is set of flags that tell the personality
    174 function when it is being called and what it must do on this invocation.
    175 The flags are as follows:
    176 \begin{itemize}
    177 \item@_UA_SEARCH_PHASE@: This flag is set whenever the personality
    178 function is called during the search phase. The personality function should
    179 decide if unwinding will stop in this function or not. If it does then the
    180 personality function should return @_URC_HANDLER_FOUND@.
    181 \item@_UA_CLEANUP_PHASE@: This flag is set whenever the personality
    182 function is called during the cleanup phase. If no other flags are set this
    183 means the entire frame will be unwound and all cleanup code should be run.
    184 \item@_UA_HANDLER_FRAME@: This flag is set during the cleanup phase
    185 on the function frame that found the handler. The personality function must
    186 prepare to return to normal code execution and return
    187 @_URC_INSTALL_CONTEXT@.
    188 \item@_UA_FORCE_UNWIND@: This flag is set if the personality function
    189 is called through a forced unwind call. Forced unwind only performs the
    190 cleanup phase and uses a different means to decide when to stop. See its
    191 section below.
    192 \end{itemize}
    193 
    194 The @exception_class@ argument is a copy of the @exception@'s
    195 @exception_class@ field.
    196 
    197 The @exception@ argument is a pointer to the user provided storage
    198 object. It has two public fields, the exception class which is actually just
    199 a number that identifies the exception handling mechanism that created it and
    200 the other is the clean-up function. The clean-up function is called if the
    201 exception needs to
    202 
    203 The @context@ argument is a pointer to an opaque type. This is passed
    204 to the many helper functions that can be called inside the personality
    205 function.
    206 
    207268\subsection{Raise Exception}
    208 This could be considered the central function of libunwind. It preforms the
    209 two staged unwinding the library is built around and most of the rest of the
    210 interface of libunwind is here to support it. It's signature is as follows:
    211 
    212 \begin{lstlisting}
     269Raising an exception is the central function of libunwind and it performs a
     270two-staged unwinding.
     271\begin{cfa}
    213272_Unwind_Reason_Code _Unwind_RaiseException(_Unwind_Exception *);
     273\end{cfa}
     274First, the function begins the search phase, calling the personality function
     275of the most recent stack frame. It continues to call personality functions
     276traversing the stack from newest to oldest until a function finds a handler or
     277the end of the stack is reached. In the latter case, raise exception returns
     278@_URC_END_OF_STACK@.
     279
     280Second, when a handler is matched, raise exception continues onto the cleanup
     281phase.
     282Once again, it calls the personality functions of each stack frame from newest
     283to oldest. This pass stops at the stack frame containing the matching handler.
     284If that personality function has not install a handler, it is an error.
     285
     286If an error is encountered, raise exception returns either
     287@_URC_FATAL_PHASE1_ERROR@ or @_URC_FATAL_PHASE2_ERROR@ depending on when the
     288error occurred.
     289
     290\subsection{Forced Unwind}
     291\label{s:ForcedUnwind}
     292Forced Unwind is the other central function in libunwind.
     293\begin{cfa}
     294_Unwind_Reason_Code _Unwind_ForcedUnwind( _Unwind_Exception *,
     295        _Unwind_Stop_Fn, void *);
     296\end{cfa}
     297It also unwinds the stack but it does not use the search phase. Instead another
     298function, the stop function, is used to stop searching.  The exception is the
     299same as the one passed to raise exception. The extra arguments are the stop
     300function and the stop parameter. The stop function has a similar interface as a
     301personality function, except it is also passed the stop parameter.
     302\begin{lstlisting}[language=C,{moredelim=**[is][\color{red}]{@}{@}}]
     303typedef _Unwind_Reason_Code (*@_Unwind_Stop_Fn@)(
     304        _Unwind_Action @action@,
     305        _Unwind_Exception_Class @exception_class@,
     306        _Unwind_Exception * @exception@,
     307        struct _Unwind_Context * @context@,
     308        void * @stop_parameter@);
    214309\end{lstlisting}
    215310
    216 When called the function begins the search phase, calling the personality
    217 function of the most recent stack frame. It will continue to call personality
    218 functions traversing the stack new-to-old until a function finds a handler or
    219 the end of the stack is reached. In the latter case raise exception will
    220 return with @_URC_END_OF_STACK@.
    221 
    222 Once a handler has been found raise exception continues onto the the cleanup
    223 phase. Once again it will call the personality functins of each stack frame
    224 from newest to oldest. This pass will stop at the stack frame that found the
    225 handler last time, if that personality function does not install the handler
    226 it is an error.
    227 
    228 If an error is encountered raise exception will return either
    229 @_URC_FATAL_PHASE1_ERROR@ or @_URC_FATAL_PHASE2_ERROR@ depending
    230 on when the error occured.
    231 
    232 \subsection{Forced Unwind}
    233 This is the second big function in libunwind. It also unwinds a stack but it
    234 does not use the search phase. Instead another function, the stop function,
    235 is used to decide when to stop.
    236 
    237 \begin{lstlisting}
    238 _Unwind_Reason_Code _Unwind_ForcedUnwind(
    239     _Unwind_Exception *, _Unwind_Stop_Fn, void *);
    240 \end{lstlisting}
    241 
    242 The exception is the same as the one passed to raise exception. The extra
    243 arguments are the stop function and the stop parameter. The stop function has
    244 a similar interface as a personality function, except it is also passed the
    245 stop parameter.
    246 
    247 \begin{lstlisting}
    248 typedef _Unwind_Reason_Code (*_Unwind_Stop_Fn)(
    249     int version,
    250     _Unwind_Action action,
    251     _Unwind_Exception_Class exception_class,
    252     _Unwind_Exception * exception,
    253     struct _Unwind_Context * context,
    254     void * stop_parameter);
    255 \end{lstlisting}
    256 
    257311The stop function is called at every stack frame before the personality
    258 function is called and then once more once after all frames of the stack have
    259 been unwound.
    260 
    261 Each time it is called the stop function should return @_URC_NO_REASON@
    262 or transfer control directly to other code outside of libunwind. The
    263 framework does not provide any assistance here.
    264 
    265 Its arguments are the same as the paired personality function.
    266 The actions @_UA_CLEANUP_PHASE@ and @_UA_FORCE_UNWIND@ are always
    267 set when it is called. By the official standard that is all but both GCC and
    268 Clang add an extra action on the last call at the end of the stack:
    269 @_UA_END_OF_STACK@.
     312function is called and then once more after all frames of the stack are
     313unwound.
     314
     315Each time it is called, the stop function should return @_URC_NO_REASON@ or
     316transfer control directly to other code outside of libunwind. The framework
     317does not provide any assistance here.
     318
     319\begin{sloppypar}
     320Its arguments are the same as the paired personality function.  The actions
     321@_UA_CLEANUP_PHASE@ and @_UA_FORCE_UNWIND@ are always set when it is
     322called. Beyond the libunwind standard, both GCC and Clang add an extra action
     323on the last call at the end of the stack: @_UA_END_OF_STACK@.
     324\end{sloppypar}
    270325
    271326\section{Exception Context}
    272327% Should I have another independent section?
    273328% There are only two things in it, top_resume and current_exception. How it is
    274 % stored changes depending on wheither or not the thread-library is linked.
    275 
    276 The exception context is a piece of global storage used to maintain data
    277 across different exception operations and to communicate between different
    278 components.
    279 
    280 Each stack has its own exception context. In a purely sequental program, using
    281 only core Cforall, there is only one stack and the context is global. However
    282 if the library @libcfathread@ is linked then there can be multiple
    283 stacks so they will each need their own.
    284 
    285 To handle this code always gets the exception context from the function
    286 @this_exception_context@. The main exception handling code is in
    287 @libcfa@ and that library also defines the function as a weak symbol
    288 so it acts as a default. Meanwhile in @libcfathread@ the function is
    289 defined as a strong symbol that replaces it when the libraries are linked
    290 together.
    291 
    292 The version of the function defined in @libcfa@ is very simple. It
    293 returns a pointer to a global static variable. With only one stack this
    294 global instance is associated with the only stack.
    295 
    296 The version of the function defined in @libcfathread@ has to handle
    297 more as there are multiple stacks. The exception context is included as
    298 part of the per-stack data stored as part of coroutines. In the cold data
    299 section, stored at the base of each stack, is the exception context for that
    300 stack. The @this_exception_context@ uses the concurrency library to get
    301 the current coroutine and through it the cold data section and the exception
    302 context.
     329% stored changes depending on whether or not the thread-library is linked.
     330
     331The exception context is global storage used to maintain data across different
     332exception operations and to communicate among different components.
     333
     334Each stack must have its own exception context. In a sequential \CFA program,
     335there is only one stack with a single global exception-context. However, when
     336the library @libcfathread@ is linked, there are multiple stacks where each
     337needs its own exception context.
     338
     339General access to the exception context is provided by function
     340@this_exception_context@. For sequential execution, this function is defined as
     341a weak symbol in the \CFA system-library, @libcfa@. When a \CFA program is
     342concurrent, it links with @libcfathread@, where this function is defined with a
     343strong symbol replacing the sequential version.
     344
     345% The version of the function defined in @libcfa@ is very simple. It returns a
     346% pointer to a global static variable. With only one stack this global instance
     347% is associated with the only stack.
     348
     349For coroutines, @this_exception_context@ accesses the exception context stored
     350at the base of the stack. For threads, @this_exception_context@ uses the
     351concurrency library to access the current stack of the thread or coroutine
     352being executed by the thread, and then accesses the exception context stored at
     353the base of this stack.
    303354
    304355\section{Termination}
     
    306357% catches. Talk about GCC nested functions.
    307358
    308 Termination exceptions use libunwind quite heavily because it matches the
    309 intended use from \CPP exceptions very closely. The main complication is that
    310 since the \CFA compiler works by translating to C code it cannot generate the
    311 assembly to form the LSDA for try blocks or destructors.
     359Termination exceptions use libunwind heavily because it matches the intended
     360use from \CC exceptions closely. The main complication for \CFA is that the
     361compiler generates C code, making it very difficult to generate the assembly to
     362form the LSDA for try blocks or destructors.
    312363
    313364\subsection{Memory Management}
    314 The first step of termination is to copy the exception into memory managed by
    315 the exception system. Currently the system just uses malloc, without reserved
    316 memory or and ``small allocation" optimizations. The exception handling
    317 mechanism manages memory for the exception as well as memory for libunwind
    318 and the system's own per-exception storage.
    319 
    320 Exceptions are stored in variable sized block. The first component is a fixed
    321 sized data structure that contains the information for libunwind and the
    322 exception system. The second component is a blob of memory that is big enough
    323 to store the exception. Macros with pointer arthritic and type cast are
    324 used to move between the components or go from the embedded
     365The first step of a termination raise is to copy the exception into memory
     366managed by the exception system. Currently, the system uses @malloc@, rather
     367than reserved memory or the stack top. The exception handling mechanism manages
     368memory for the exception as well as memory for libunwind and the system's own
     369per-exception storage.
     370
     371Exceptions are stored in variable-sized blocks. \PAB{Show a memory layout
     372figure.} The first component is a fixed sized data structure that contains the
     373information for libunwind and the exception system. The second component is an
     374area of memory big enough to store the exception. Macros with pointer arthritic
     375and type cast are used to move between the components or go from the embedded
    325376@_Unwind_Exception@ to the entire node.
    326377
    327 All of these nodes are strung together in a linked list. One linked list per
    328 stack, with the head stored in the exception context. Within each linked list
    329 the most recently thrown exception is at the head and the older exceptions
    330 are further down the list. This list format allows exceptions to be thrown
    331 while a different exception is being handled. Only the exception at the head
    332 of the list is currently being handled, the other will wait for the
    333 exceptions before them to be removed.
    334 
    335 The virtual members in the exception's virtual table. The size of the
    336 exception, the copy function and the free function are all in the virtual
    337 table so they are decided per-exception type. The size and copy function are
    338 used right away when the exception is copied in to managed memory. After the
    339 exception is handled the free function is used to clean up the exception and
    340 then the entire node is passed to free.
    341 
    342 \subsection{Try Statements \& Catch Clauses}
    343 The try statements with termination handlers have a pretty complex conversion
    344 to compensate for the lack of assembly generation. Libunwind requires an LSDA
    345 (Language Specific Data Area) and personality function for a function to
    346 unwind across it. The LSDA in particular is hard to generate at the level of
    347 C which is what the \CFA compiler outputs so a work-around is used.
    348 
    349 This work around is a function called @__cfaehm_try_terminate@ in the
    350 standard library. The contents of a try block and the termination handlers
    351 are converted into functions. These are then passed to the try terminate
    352 function and it calls them. This puts the try statements in their own
    353 functions so that no function has to deal with both termination handlers and
    354 destructors.
    355 
    356 This function has some custom embedded assembly that defines its personality
    357 function and LSDA. This is hand coded in C which is why there is only one
    358 version of it, the compiler has no capability to generate it. The personality
    359 function is structured so that it may be expanded, but really it only handles
    360 this one function. Notably it does not handle any destructors so the function
    361 is constructed so that it does need to run it.
     378All of these nodes are linked together in a list, one list per stack, with the
     379list head stored in the exception context. Within each linked list, the most
     380recently thrown exception is at the head followed by older thrown
     381exceptions. This format allows exceptions to be thrown, while a different
     382exception is being handled. The exception at the head of the list is currently
     383being handled, while other exceptions wait for the exceptions before them to be
     384removed.
     385
     386The virtual members in the exception's virtual table provide the size of the
     387exception, the copy function, and the free function, so they are specific to an
     388exception type. The size and copy function are used immediately to copy an
     389exception into managed memory. After the exception is handled the free function
     390is used to clean up the exception and then the entire node is passed to free.
     391
     392\subsection{Try Statements and Catch Clauses}
     393The try statement with termination handlers is complex because it must
     394compensate for the lack of assembly-code generated from \CFA. Libunwind
     395requires an LSDA and personality function for control to unwind across a
     396function. The LSDA in particular is hard to mimic in generated C code.
     397
     398The workaround is a function called @__cfaehm_try_terminate@ in the standard
     399library. The contents of a try block and the termination handlers are converted
     400into functions. These are then passed to the try terminate function and it
     401calls them. This approach puts a try statement in its own functions so that no
     402function has to deal with both termination handlers and destructors. \PAB{I do
     403not understand the previous sentence.}
     404
     405This function has some custom embedded assembly that defines \emph{its}
     406personality function and LSDA. The assembly is created with handcrafted C @asm@
     407statements, which is why there is only one version of it. The personality
     408function is structured so that it can be expanded, but currently it only
     409handles this one function.  Notably, it does not handle any destructors so the
     410function is constructed so that it does need to run it. \PAB{I do not
     411understand the previous sentence.}
    362412
    363413The three functions passed to try terminate are:
    364 \begin{itemize}
    365 \item The try function: This function is the try block, all the code inside
    366 the try block is placed inside the try function. It takes no parameters and
    367 has no return value. This function is called during regular execution to run
    368 the try block.
    369 \item The match function: This function decides if this try statement should
    370 handle any given termination exception. It takes a pointer to the exception
    371 and returns 0 if the exception is not handled here. Otherwise the return value
    372 is the id of the handler that should handle the exception. It is called
    373 during the search phase.
    374 It is constructed from the conditional part of each handler. It runs each
    375 check in turn, first checking to see if the object
    376 \item The catch function: This function handles the exception. It takes a
    377 pointer to the exception and the handler's id and returns nothing. It is
    378 called after the clean-up phase.
    379 It is constructed by stitching together the bodies of each handler
    380 \end{itemize}
    381 All three are created with GCC nested functions. GCC nested functions can be
    382 used to create closures, functions that can refer to the state of other
    383 functions on the stack. This allows the functions to refer to the main
    384 function and all the variables in scope.
    385 
    386 These nested functions and all other functions besides
    387 @__cfaehm_try_terminate@ in \CFA use the GCC personality function and
    388 the @-fexceptions@ flag to generate the LSDA. This allows destructors
    389 to be implemented with the cleanup attribute.
     414\begin{description}
     415\item[try function:] This function is the try block, all the code inside the
     416try block is placed inside the try function. It takes no parameters and has no
     417return value. This function is called during regular execution to run the try
     418block.
     419
     420\item[match function:] This function is called during the search phase and
     421decides if a catch clause matches the termination exception.  It is constructed
     422from the conditional part of each handler and runs each check, top to bottom,
     423in turn, first checking to see if the exception type matches and then if the
     424condition is true. It takes a pointer to the exception and returns 0 if the
     425exception is not handled here. Otherwise the return value is the id of the
     426handler that matches the exception.
     427
     428\item[handler function:] This function handles the exception. It takes a
     429pointer to the exception and the handler's id and returns nothing. It is called
     430after the cleanup phase.  It is constructed by stitching together the bodies of
     431each handler and dispatches to the selected handler.
     432\end{description}
     433All three functions are created with GCC nested functions. GCC nested functions
     434can be used to create closures, functions that can refer to the state of other
     435functions on the stack. This approach allows the functions to refer to all the
     436variables in scope for the function containing the @try@ statement.  These
     437nested functions and all other functions besides @__cfaehm_try_terminate@ in
     438\CFA use the GCC personality function and the @-fexceptions@ flag to generate
     439the LSDA. This allows destructors to be implemented with the cleanup attribute.
    390440
    391441\section{Resumption}
    392442% The stack-local data, the linked list of nodes.
    393443
    394 Resumption uses a list of nodes for its stack traversal. The head of the list
    395 is stored in the exception context. The nodes in the list just have a pointer
     444Resumption simple to implement because there is no stack unwinding. The
     445resumption raise uses a list of nodes for its stack traversal. The head of the
     446list is stored in the exception context. The nodes in the list have a pointer
    396447to the next node and a pointer to the handler function.
    397448
    398 The on a resumption throw the this list is traversed. At each node the
    399 handler function is called and is passed the exception by pointer. It returns
    400 true if the exception was handled and false otherwise.
    401 
    402 The handler function does both the matching and catching. It tries each
    403 the condition of @catchResume@ in order, top-to-bottom and until it
    404 finds a handler that matches. If no handler matches then the function returns
    405 false. Otherwise the matching handler is run, if it completes successfully
    406 the function returns true. Rethrows, through the @throwResume;@
    407 statement, cause the function to return true.
     449A resumption raise traverses this list. At each node the handler function is
     450called, passing the exception by pointer. It returns true if the exception is
     451handled and false otherwise.
     452
     453The handler function does both the matching and handling. It computes the
     454condition of each @catchResume@ in top-to-bottom order, until it finds a
     455handler that matches. If no handler matches then the function returns
     456false. Otherwise the matching handler is run; if it completes successfully, the
     457function returns true. Reresume, through the @throwResume;@ statement, cause
     458the function to return true.
    408459
    409460% Recursive Resumption Stuff:
    410 Blocking out part of the stack is accomplished by updating the front of the
    411 list as the search continues. Before the handler at a node is called the head
    412 of the list is updated to the next node of the current node. After the search
    413 is complete, successful or not, the head of the list is reset.
    414 
    415 This means the current handler and every handler that has already been
    416 checked are not on the list while a handler is run. If a resumption is thrown
    417 during the handling of another resumption the active handlers and all the
    418 other handler checked up to this point will not be checked again.
     461Search skipping \see{\VPageref{p:searchskip}}, which ignores parts of the stack
     462already examined, is accomplished by updating the front of the list as the
     463search continues. Before the handler at a node is called the head of the list
     464is updated to the next node of the current node. After the search is complete,
     465successful or not, the head of the list is reset.
     466
     467This mechanism means the current handler and every handler that has already
     468been checked are not on the list while a handler is run. If a resumption is
     469thrown during the handling of another resumption the active handlers and all
     470the other handler checked up to this point are not checked again.
    419471
    420472This structure also supports new handler added while the resumption is being
    421473handled. These are added to the front of the list, pointing back along the
    422 stack -- the first one will point over all the checked handlers -- and the
    423 ordering is maintained.
    424 
    425 \subsection{Libunwind Compatibility}
    426 Resumption does not use libunwind for two simple reasons. The first is that
    427 it does not have to unwind anything so would never need to use the clean-up
    428 phase. Still the search phase could be used to make it free to enter or exit
    429 a try statement with resumption handlers in the same way termination handlers
    430 are for the same trade off in the cost of the throw. This is where the second
    431 reason comes in, there is no way to return from a search without installing
    432 a handler or raising an error.
    433 
    434 Although work arounds could be created none seemed to be worth it for the
    435 prototype. This implementation has no difference in behaviour and is much
    436 simpler.
     474stack -- the first one points over all the checked handlers -- and the ordering
     475is maintained.
     476
     477\label{p:zero-cost}
     478Note, the resumption implementation has a cost for entering/exiting a @try@
     479statement with @catchResume@ clauses, whereas a @try@ statement with @catch@
     480clauses has zero-cost entry/exit. While resumption does not need the stack
     481unwinding and cleanup provided by libunwind, it could use the search phase to
     482providing zero-cost enter/exit using the LSDA. Unfortunately, there is no way
     483to return from a libunwind search without installing a handler or raising an
     484error.  Although workarounds might be possible, they are beyond the scope of
     485this thesis. The current resumption implementation has simplicity in its
     486favour.
    437487% Seriously, just compare the size of the two chapters and then consider
    438488% that unwind is required knowledge for that chapter.
     
    440490\section{Finally}
    441491% Uses destructors and GCC nested functions.
    442 Finally clauses are a simple decomposition to some of the existing features.
    443 The code in the block is placed into a GCC nested function with a unique name,
    444 no arguments or return values. This nested function is then set as the
    445 clean-up function of an empty object that is declared at the beginning of a
    446 block placed around the contexts of the try statement.
     492Finally clauses is placed into a GCC nested-function with a unique name, and no
     493arguments or return values. This nested function is then set as the cleanup
     494function of an empty object that is declared at the beginning of a block placed
     495around the context of the associated @try@ statement.
    447496
    448497The rest is handled by GCC. The try block and all handlers are inside the
    449 block. When they are complete control exits the block and the empty object
    450 is cleaned up, which runs the function that contains the finally code.
     498block. At completion, control exits the block and the empty object is cleaned
     499up, which runs the function that contains the finally code.
    451500
    452501\section{Cancellation}
     
    454503
    455504Cancellation also uses libunwind to do its stack traversal and unwinding,
    456 however it uses a different primary function @_Unwind_ForcedUnwind@.
    457 Details of its interface can be found in the unwind section.
    458 
    459 The first step of cancellation is to find the stack was cancelled and which
    460 type of stack it is. Luckily the threads library stores the main thread
    461 pointer and the current thread pointer and every thread stores a pointer to
     505however it uses a different primary function @_Unwind_ForcedUnwind@.  Details
     506of its interface can be found in the \VRef{s:ForcedUnwind}.
     507
     508The first step of cancellation is to find the cancelled stack and its type:
     509coroutine or thread. Fortunately, the thread library stores the main thread
     510pointer and the current thread pointer, and every thread stores a pointer to
    462511its main coroutine and the coroutine it is currently executing.
    463512
    464 So if the the current thread's main and current coroutine do not match, it is
    465 a coroutine cancellation. Otherwise if the main and current thread do not
    466 match, it is a thread cancellation. Otherwise it is a main thread
    467 cancellation.
    468 
    469 However if the threading library is not linked then execution must be on the
    470 main stack as that is the only one that exists. So the entire check is skipped
    471 using the linker and weak symbols. Instead the main thread cancellation is
    472 unconditionally preformed.
    473 
    474 Regardless of how they are choosen afterwords the stop function and the stop
    475 parameter are passed to the forced unwind functon. The general pattern of all
    476 three stop functions is the same, they continue unwinding until the end of
    477 stack when they do there primary work.
    478 
    479 Main stack cancellation it is very simple. The ``transfer" is just an abort,
    480 the program stops executing.
    481 
    482 The coroutine cancellation stores the exception on the coroutine and then
    483 does a coroutine context switch. The rest is handled inside resume. Every time
    484 control returns from a resumed thread there is a check to see if it is
    485 cancelled. If it is the exception is retrieved and the CoroutineCancelled
    486 exception is constructed and loaded. It is then thrown as a regular exception
    487 with the default handler coming from the context of the resumption call.
    488 
    489 The thread cancellation stores the exception on the thread's main stack and
    490 then returns to the scheduler. The rest is handled by the joiner. The wait
    491 for the joined thread to finish works the same but after that it checks
    492 to see if there was a cancellation. If there was the exception is retrieved
    493 and the ThreadCancelled exception is constructed. The default handler is
    494 passed in as a function pointer. If it is null (as it is for the
    495 auto-generated joins on destructor call) it a default is used that simply
    496 calls abort; which gives the required handling on implicate join.
     513The first check is if the current thread's main and current coroutine do not
     514match, implying a coroutine cancellation; otherwise, it is a thread
     515cancellation. Otherwise it is a main thread cancellation. \PAB{Previous
     516sentence does not make sense.}
     517
     518However, if the threading library is not linked, the sequential execution is on
     519the main stack. Hence, the entire check is skipped because the weak-symbol
     520function is loaded. Therefore, a main thread cancellation is unconditionally
     521performed.
     522
     523Regardless of how the stack is chosen, the stop function and parameter are
     524passed to the forced-unwind function. The general pattern of all three stop
     525functions is the same: they continue unwinding until the end of stack when they
     526do there primary work.
     527
     528For main stack cancellation, the transfer is just a program abort.
     529
     530For coroutine cancellation, the exception is stored on the coroutine's stack,
     531and the coroutine context switches to its last resumer. The rest is handled on
     532the backside of the resume, which check if the resumed coroutine is
     533cancelled. If cancelled, the exception is retrieved from the resumed coroutine,
     534and a @CoroutineCancelled@ exception is constructed and loaded with the
     535cancelled exception. It is then resumed as a regular exception with the default
     536handler coming from the context of the resumption call.
     537
     538For thread cancellation, the exception is stored on the thread's main stack and
     539then context switched to the scheduler. The rest is handled by the thread
     540joiner. When the join is complete, the joiner checks if the joined thread is
     541cancelled. If cancelled, the exception is retrieved and the joined thread, and
     542a @ThreadCancelled@ exception is constructed and loaded with the cancelled
     543exception. The default handler is passed in as a function pointer. If it is
     544null (as it is for the auto-generated joins on destructor call), the default is
     545used, which is a program abort.
     546%; which gives the required handling on implicate join.
  • doc/theses/andrew_beach_MMath/thesis-frontpgs.tex

    rda3963a r565acf59  
    3636
    3737        A thesis \\
    38         presented to the University of Waterloo \\ 
     38        presented to the University of Waterloo \\
    3939        in fulfillment of the \\
    4040        thesis requirement for the degree of \\
     
    6464\cleardoublepage
    6565
    66  
     66
    6767%----------------------------------------------------------------------
    6868% EXAMINING COMMITTEE (Required for Ph.D. theses only)
     
    7171\begin{center}\textbf{Examining Committee Membership}\end{center}
    7272  \noindent
    73 The following served on the Examining Committee for this thesis. The decision of the Examining Committee is by majority vote.
    74   \bigskip
    75  
    76   \noindent
    77 \begin{tabbing}
    78 Internal-External Member: \=  \kill % using longest text to define tab length
    79 External Examiner: \>  Bruce Bruce \\
     73The following served on the Examining Committee for this thesis. The decision
     74of the Examining Committee is by majority vote.
     75  \bigskip
     76
     77  \noindent
     78\begin{tabbing}
     79Internal-External Member: \=  \kill % using longest text to define tab length
     80External Examiner: \>  Bruce Bruce \\
    8081\> Professor, Dept. of Philosophy of Zoology, University of Wallamaloo \\
    81 \end{tabbing} 
    82   \bigskip
    83  
     82\end{tabbing}
     83  \bigskip
     84
    8485  \noindent
    8586\begin{tabbing}
     
    9192\end{tabbing}
    9293  \bigskip
    93  
     94
    9495  \noindent
    9596  \begin{tabbing}
     
    99100\end{tabbing}
    100101  \bigskip
    101  
     102
    102103  \noindent
    103104\begin{tabbing}
     
    107108\end{tabbing}
    108109  \bigskip
    109  
     110
    110111  \noindent
    111112\begin{tabbing}
     
    123124  % December 13th, 2006.  It is designed for an electronic thesis.
    124125  \noindent
    125 I hereby declare that I am the sole author of this thesis. This is a true copy of the thesis, including any required final revisions, as accepted by my examiners.
    126 
    127   \bigskip
    128  
     126I hereby declare that I am the sole author of this thesis. This is a true copy
     127of the thesis, including any required final revisions, as accepted by my
     128examiners.
     129
     130  \bigskip
     131
    129132  \noindent
    130133I understand that my thesis may be made electronically available to the public.
  • doc/theses/andrew_beach_MMath/thesis.tex

    rda3963a r565acf59  
    4545% FRONT MATERIAL
    4646%----------------------------------------------------------------------
    47 \input{thesis-frontpgs} 
     47\input{thesis-frontpgs}
    4848
    4949%----------------------------------------------------------------------
     
    6565A \gls{computer} could compute $\pi$ all day long. In fact, subsets of digits
    6666of $\pi$'s decimal approximation would make a good source for psuedo-random
    67 vectors, \gls{rvec} . 
     67vectors, \gls{rvec} .
    6868
    6969%----------------------------------------------------------------------
     
    9696
    9797\begin{itemize}
    98 \item A well-prepared PDF should be 
     98\item A well-prepared PDF should be
    9999  \begin{enumerate}
    100100    \item Of reasonable size, {\it i.e.} photos cropped and compressed.
    101     \item Scalable, to allow enlargment of text and drawings. 
    102   \end{enumerate} 
     101    \item Scalable, to allow enlargment of text and drawings.
     102  \end{enumerate}
    103103\item Photos must be bit maps, and so are not scaleable by definition. TIFF and
    104104BMP are uncompressed formats, while JPEG is compressed. Most photos can be
    105105compressed without losing their illustrative value.
    106 \item Drawings that you make should be scalable vector graphics, \emph{not} 
     106\item Drawings that you make should be scalable vector graphics, \emph{not}
    107107bit maps. Some scalable vector file formats are: EPS, SVG, PNG, WMF. These can
    108 all be converted into PNG or PDF, that pdflatex recognizes. Your drawing 
    109 package probably can export to one of these formats directly. Otherwise, a 
    110 common procedure is to print-to-file through a Postscript printer driver to 
    111 create a PS file, then convert that to EPS (encapsulated PS, which has a 
    112 bounding box to describe its exact size rather than a whole page). 
     108all be converted into PNG or PDF, that pdflatex recognizes. Your drawing
     109package probably can export to one of these formats directly. Otherwise, a
     110common procedure is to print-to-file through a Postscript printer driver to
     111create a PS file, then convert that to EPS (encapsulated PS, which has a
     112bounding box to describe its exact size rather than a whole page).
    113113Programs such as GSView (a Ghostscript GUI) can create both EPS and PDF from
    114114PS files. Appendix~\ref{AppendixA} shows how to generate properly sized Matlab
    115115plots and save them as PDF.
    116116\item It's important to crop your photos and draw your figures to the size that
    117 you want to appear in your thesis. Scaling photos with the 
    118 includegraphics command will cause loss of resolution. And scaling down 
     117you want to appear in your thesis. Scaling photos with the
     118includegraphics command will cause loss of resolution. And scaling down
    119119drawings may cause any text annotations to become too small.
    120120\end{itemize}
    121  
     121
    122122For more information on \LaTeX\, see the uWaterloo Skills for the
    123 Academic Workplace \href{https://uwaterloo.ca/information-systems-technology/services/electronic-thesis-preparation-and-submission-support/ethesis-guide/creating-pdf-version-your-thesis/creating-pdf-files-using-latex/latex-ethesis-and-large-documents}{course notes}. 
     123Academic Workplace \href{https://uwaterloo.ca/information-systems-technology/services/electronic-thesis-preparation-and-submission-support/ethesis-guide/creating-pdf-version-your-thesis/creating-pdf-files-using-latex/latex-ethesis-and-large-documents}{course notes}.
    124124\footnote{
    125125Note that while it is possible to include hyperlinks to external documents,
    126 it is not wise to do so, since anything you can't control may change over time. 
    127 It \emph{would} be appropriate and necessary to provide external links to 
    128 additional resources for a multimedia ``enhanced'' thesis. 
    129 But also note that if the \package{hyperref} package is not included, 
    130 as for the print-optimized option in this thesis template, any \cmmd{href} 
     126it is not wise to do so, since anything you can't control may change over time.
     127It \emph{would} be appropriate and necessary to provide external links to
     128additional resources for a multimedia ``enhanced'' thesis.
     129But also note that if the \package{hyperref} package is not included,
     130as for the print-optimized option in this thesis template, any \cmmd{href}
    131131commands in your logical document are no longer defined.
    132132A work-around employed by this thesis template is to define a dummy
    133 \cmmd{href} command (which does nothing) in the preamble of the document, 
    134 before the \package{hyperref} package is included. 
     133\cmmd{href} command (which does nothing) in the preamble of the document,
     134before the \package{hyperref} package is included.
    135135The dummy definition is then redifined by the
    136136\package{hyperref} package when it is included.
     
    138138
    139139The classic book by Leslie Lamport \cite{lamport.book}, author of \LaTeX , is
    140 worth a look too, and the many available add-on packages are described by 
     140worth a look too, and the many available add-on packages are described by
    141141Goossens \textit{et al} \cite{goossens.book}.
    142142
     
    180180Export Setup button in the figure Property Editor.
    181181
    182 \section{From the Command Line} 
     182\section{From the Command Line}
    183183All figure properties can also be manipulated from the command line. Here's an
    184 example: 
     184example:
    185185\begin{verbatim}
    186186x=[0:0.1:pi];
  • doc/theses/andrew_beach_MMath/unwinding.tex

    rda3963a r565acf59  
    1 \chapter{\texorpdfstring{Unwinding in \CFA}{Unwinding in Cforall}}
     1\chapter{Unwinding in \CFA}
    22
    33Stack unwinding is the process of removing stack frames (activations) from the
     
    110110alternate transfers of control.
    111111
    112 \section{\texorpdfstring{\CFA Implementation}{Cforall Implementation}}
     112\section{\CFA Implementation}
    113113
    114114To use libunwind, \CFA provides several wrappers, its own storage, personality
     
    182182control has returned to normal control flow.
    183183
    184 {\color{blue}PAB: Maybe a diagram would be helpful?}
     184\PAB{Maybe a diagram would be helpful?}
  • doc/theses/andrew_beach_MMath/uw-ethesis-frontpgs.tex

    rda3963a r565acf59  
    1313        \vspace*{1.0cm}
    1414
    15         \Huge
    16         {\bf Exception Handling in \CFA}
     15        {\Huge\bf Exception Handling in \CFA}
    1716
    1817        \vspace*{1.0cm}
    1918
    20         \normalsize
    2119        by \\
    2220
    2321        \vspace*{1.0cm}
    2422
    25         \Large
    26         Andrew James Beach \\
     23        {\Large Andrew James Beach} \\
    2724
    2825        \vspace*{3.0cm}
    2926
    30         \normalsize
    3127        A thesis \\
    32         presented to the University of Waterloo \\ 
     28        presented to the University of Waterloo \\
    3329        in fulfillment of the \\
    3430        thesis requirement for the degree of \\
     
    4339        \vspace*{1.0cm}
    4440
    45         \copyright\ Andrew James Beach \the\year \\
     41        \copyright{} Andrew James Beach \the\year \\
    4642        \end{center}
    4743\end{titlepage}
    4844
    49 % The rest of the front pages should contain no headers and be numbered using Roman numerals starting with `ii'
     45% The rest of the front pages should contain no headers and be numbered using
     46% Roman numerals starting with `ii'.
    5047\pagestyle{plain}
    5148\setcounter{page}{2}
    5249
    53 \cleardoublepage % Ends the current page and causes all figures and tables that have so far appeared in the input to be printed.
    54 % In a two-sided printing style, it also makes the next page a right-hand (odd-numbered) page, producing a blank page if necessary.
     50\cleardoublepage % Ends the current page and causes all figures and tables
     51% that have so far appeared in the input to be printed. In a two-sided
     52% printing style, it also makes the next page a right-hand (odd-numbered)
     53% page, producing a blank page if necessary.
    5554
    56 \begin{comment} 
     55\begin{comment}
    5756% E X A M I N I N G   C O M M I T T E E (Required for Ph.D. theses only)
    5857% Remove or comment out the lines below to remove this page
    5958\begin{center}\textbf{Examining Committee Membership}\end{center}
    6059  \noindent
    61 The following served on the Examining Committee for this thesis. The decision of the Examining Committee is by majority vote.
     60The following served on the Examining Committee for this thesis.
     61The decision of the Examining Committee is by majority vote.
    6262  \bigskip
    63  
     63
    6464  \noindent
    6565\begin{tabbing}
    6666Internal-External Member: \=  \kill % using longest text to define tab length
    67 External Examiner: \>  Bruce Bruce \\ 
     67External Examiner: \>  Bruce Bruce \\
    6868\> Professor, Dept. of Philosophy of Zoology, University of Wallamaloo \\
    69 \end{tabbing} 
     69\end{tabbing}
    7070  \bigskip
    71  
     71
    7272  \noindent
    7373\begin{tabbing}
     
    7979\end{tabbing}
    8080  \bigskip
    81  
     81
    8282  \noindent
    8383  \begin{tabbing}
     
    8787\end{tabbing}
    8888  \bigskip
    89  
     89
    9090  \noindent
    9191\begin{tabbing}
     
    9595\end{tabbing}
    9696  \bigskip
    97  
     97
    9898  \noindent
    9999\begin{tabbing}
     
    111111  % December 13th, 2006.  It is designed for an electronic thesis.
    112112 \begin{center}\textbf{Author's Declaration}\end{center}
    113  
     113
    114114 \noindent
    115 I hereby declare that I am the sole author of this thesis. This is a true copy of the thesis, including any required final revisions, as accepted by my examiners.
     115I hereby declare that I am the sole author of this thesis. This is a true copy
     116of the thesis, including any required final revisions, as accepted by my
     117examiners.
    116118
    117119  \bigskip
    118  
     120
    119121  \noindent
    120122I understand that my thesis may be made electronically available to the public.
  • doc/theses/andrew_beach_MMath/uw-ethesis.tex

    rda3963a r565acf59  
    11%======================================================================
    2 % University of Waterloo Thesis Template for LaTeX 
    3 % Last Updated November, 2020 
    4 % by Stephen Carr, IST Client Services, 
     2% University of Waterloo Thesis Template for LaTeX
     3% Last Updated November, 2020
     4% by Stephen Carr, IST Client Services,
    55% University of Waterloo, 200 University Ave. W., Waterloo, Ontario, Canada
    66% FOR ASSISTANCE, please send mail to request@uwaterloo.ca
    77
    88% DISCLAIMER
    9 % To the best of our knowledge, this template satisfies the current uWaterloo thesis requirements.
    10 % However, it is your responsibility to assure that you have met all requirements of the University and your particular department.
    11 
    12 % Many thanks for the feedback from many graduates who assisted the development of this template.
    13 % Also note that there are explanatory comments and tips throughout this template.
     9% To the best of our knowledge, this template satisfies the current uWaterloo
     10% thesis requirements. However, it is your responsibility to assure that you
     11% have met all requirements of the University and your particular department.
     12
     13% Many thanks for the feedback from many graduates who assisted the
     14% development of this template. Also note that there are explanatory comments
     15% and tips throughout this template.
    1416%======================================================================
    1517% Some important notes on using this template and making it your own...
    1618
    17 % The University of Waterloo has required electronic thesis submission since October 2006.
    18 % See the uWaterloo thesis regulations at
    19 % https://uwaterloo.ca/graduate-studies/thesis.
    20 % This thesis template is geared towards generating a PDF version optimized for viewing on an electronic display, including hyperlinks within the PDF.
    21 
    22 % DON'T FORGET TO ADD YOUR OWN NAME AND TITLE in the "hyperref" package configuration below.
    23 % THIS INFORMATION GETS EMBEDDED IN THE PDF FINAL PDF DOCUMENT.
    24 % You can view the information if you view properties of the PDF document.
    25 
    26 % Many faculties/departments also require one or more printed copies.
    27 % This template attempts to satisfy both types of output.
     19% The University of Waterloo has required electronic thesis submission since
     20% October 2006. See the uWaterloo thesis regulations at:
     21%   https://uwaterloo.ca/graduate-studies/thesis.
     22% This thesis template is geared towards generating a PDF version optimized
     23% for viewing on an electronic display, including hyperlinks within the PDF.
     24
     25% DON'T FORGET TO ADD YOUR OWN NAME AND TITLE in the "hyperref" package
     26% configuration below. THIS INFORMATION GETS EMBEDDED IN THE FINAL PDF
     27% DOCUMENT. You can view the information if you view properties of the PDF.
     28
     29% Many faculties/departments also require one or more printed copies.
     30% This template attempts to satisfy both types of output.
    2831% See additional notes below.
    29 % It is based on the standard "book" document class which provides all necessary sectioning structures and allows multi-part theses.
    30 
    31 % If you are using this template in Overleaf (cloud-based collaboration service), then it is automatically processed and previewed for you as you edit.
    32 
    33 % For people who prefer to install their own LaTeX distributions on their own computers, and process the source files manually, the following notes provide the sequence of tasks:
    34  
     32% It is based on the standard "book" document class which provides all
     33% necessary sectioning structures and allows multi-part theses.
     34
     35% If you are using this template in Overleaf (cloud-based collaboration
     36% service), then it is automatically processed and previewed for you as you
     37% edit.
     38
     39% For people who prefer to install their own LaTeX distributions on their own
     40% computers, and process the source files manually, the following notes
     41% provide the sequence of tasks:
     42
    3543% E.g. to process a thesis called "mythesis.tex" based on this template, run:
    3644
    3745% pdflatex mythesis     -- first pass of the pdflatex processor
    3846% bibtex mythesis       -- generates bibliography from .bib data file(s)
    39 % makeindex         -- should be run only if an index is used
    40 % pdflatex mythesis     -- fixes numbering in cross-references, bibliographic references, glossaries, index, etc.
    41 % pdflatex mythesis     -- it takes a couple of passes to completely process all cross-references
    42 
    43 % If you use the recommended LaTeX editor, Texmaker, you would open the mythesis.tex file, then click the PDFLaTeX button. Then run BibTeX (under the Tools menu).
    44 % Then click the PDFLaTeX button two more times.
    45 % If you have an index as well,you'll need to run MakeIndex from the Tools menu as well, before running pdflatex
    46 % the last two times.
    47 
    48 % N.B. The "pdftex" program allows graphics in the following formats to be included with the "\includegraphics" command: PNG, PDF, JPEG, TIFF
    49 % Tip: Generate your figures and photos in the size you want them to appear in your thesis, rather than scaling them with \includegraphics options.
    50 % Tip: Any drawings you do should be in scalable vector graphic formats: SVG, PNG, WMF, EPS and then converted to PNG or PDF, so they are scalable in the final PDF as well.
     47% makeindex         -- should be run only if an index is used
     48% pdflatex mythesis     -- fixes numbering in cross-references, bibliographic
     49%                      references, glossaries, index, etc.
     50% pdflatex mythesis     -- it takes a couple of passes to completely process all
     51%                      cross-references
     52
     53% If you use the recommended LaTeX editor, Texmaker, you would open the
     54% mythesis.tex file, then click the PDFLaTeX button. Then run BibTeX (under
     55% the Tools menu). Then click the PDFLaTeX button two more times.
     56% If you have an index as well, you'll need to run MakeIndex from the Tools
     57% menu as well, before running pdflatex the last two times.
     58
     59% N.B. The "pdftex" program allows graphics in the following formats to be
     60% included with the "\includegraphics" command: PNG, PDF, JPEG, TIFF
     61% Tip: Generate your figures and photos in the size you want them to appear
     62% in your thesis, rather than scaling them with \includegraphics options.
     63% Tip: Any drawings you do should be in scalable vector graphic formats: SVG,
     64% PNG, WMF, EPS and then converted to PNG or PDF, so they are scalable in the
     65% final PDF as well.
    5166% Tip: Photographs should be cropped and compressed so as not to be too large.
    5267
    53 % To create a PDF output that is optimized for double-sided printing:
    54 % 1) comment-out the \documentclass statement in the preamble below, and un-comment the second \documentclass line.
    55 % 2) change the value assigned below to the boolean variable "PrintVersion" from " false" to "true".
    56 
    57 %======================================================================
     68% To create a PDF output that is optimized for double-sided printing:
     69% 1) comment-out the \documentclass statement in the preamble below, and
     70%    un-comment the second \documentclass line.
     71% 2) change the value assigned below to the boolean variable "PrintVersion"
     72%    from "false" to "true".
     73
     74% ======================================================================
    5875%   D O C U M E N T   P R E A M B L E
    59 % Specify the document class, default style attributes, and page dimensions, etc.
     76% Specify the document class, default style attributes, page dimensions, etc.
    6077% For hyperlinked PDF, suitable for viewing on a computer, use this:
    6178\documentclass[letterpaper,12pt,titlepage,oneside,final]{book}
    6279
    63 % For PDF, suitable for double-sided printing, change the PrintVersion variable below to "true" and use this \documentclass line instead of the one above:
     80% For PDF, suitable for double-sided printing, change the PrintVersion
     81% variable below to "true" and use this \documentclass line instead of the
     82% one above:
    6483%\documentclass[letterpaper,12pt,titlepage,openright,twoside,final]{book}
    6584
     85\usepackage{etoolbox}
     86
    6687% Some LaTeX commands I define for my own nomenclature.
    67 % If you have to, it's easier to make changes to nomenclature once here than in a million places throughout your thesis!
     88% If you have to, it's easier to make changes to nomenclature once here than
     89% in a million places throughout your thesis!
    6890\newcommand{\package}[1]{\textbf{#1}} % package names in bold text
    69 \newcommand{\cmmd}[1]{\textbackslash\texttt{#1}} % command name in tt font 
    70 \newcommand{\href}[1]{#1} % does nothing, but defines the command so the print-optimized version will ignore \href tags (redefined by hyperref pkg).
    71 %\newcommand{\texorpdfstring}[2]{#1} % does nothing, but defines the command
     91\newcommand{\cmmd}[1]{\textbackslash\texttt{#1}} % command name in tt font
     92\newcommand{\href}[1]{#1} % does nothing, but defines the command so the
     93% print-optimized version will ignore \href tags (redefined by hyperref pkg).
    7294% Anything defined here may be redefined by packages added below...
    7395
     
    7698\newboolean{PrintVersion}
    7799\setboolean{PrintVersion}{false}
    78 % CHANGE THIS VALUE TO "true" as necessary, to improve printed results for hard copies by overriding some options of the hyperref package, called below.
     100% CHANGE THIS VALUE TO "true" as necessary, to improve printed results for
     101% hard copies by overriding some options of the hyperref package, called below.
    79102
    80103%\usepackage{nomencl} % For a nomenclature (optional; available from ctan.org)
    81 \usepackage{amsmath,amssymb,amstext} % Lots of math symbols and environments
    82 \usepackage[pdftex]{graphicx} % For including graphics N.B. pdftex graphics driver
     104% Lots of math symbols and environments
     105\usepackage{amsmath,amssymb,amstext}
     106% For including graphics N.B. pdftex graphics driver
     107\usepackage[pdftex]{graphicx}
     108% Removes large sections of the document.
     109\usepackage{comment}
    83110
    84111% Hyperlinks make it very easy to navigate an electronic document.
    85 % In addition, this is where you should specify the thesis title and author as they appear in the properties of the PDF document.
     112% In addition, this is where you should specify the thesis title and author as
     113% they appear in the properties of the PDF document.
    86114% Use the "hyperref" package
    87115% N.B. HYPERREF MUST BE THE LAST PACKAGE LOADED; ADD ADDITIONAL PKGS ABOVE
    88116\usepackage[pdftex,pagebackref=true]{hyperref} % with basic options
    89117%\usepackage[pdftex,pagebackref=true]{hyperref}
    90                 % N.B. pagebackref=true provides links back from the References to the body text. This can cause trouble for printing.
     118% N.B. pagebackref=true provides links back from the References to the body
     119% text. This can cause trouble for printing.
    91120\hypersetup{
    92121    plainpages=false,       % needed if Roman numbers in frontpages
    93     unicode=false,          % non-Latin characters in Acrobats bookmarks
    94     pdftoolbar=true,        % show Acrobats toolbar?
    95     pdfmenubar=true,        % show Acrobats menu?
     122    unicode=false,          % non-Latin characters in Acrobat's bookmarks
     123    pdftoolbar=true,        % show Acrobat's toolbar?
     124    pdfmenubar=true,        % show Acrobat's menu?
    96125    pdffitwindow=false,     % window fit to page when opened
    97126    pdfstartview={FitH},    % fits the width of the page to the window
    98 %    pdftitle={uWaterloo\ LaTeX\ Thesis\ Template},    % title: CHANGE THIS TEXT!
     127%    pdftitle={uWaterloo\ LaTeX\ Thesis\ Template}, % title: CHANGE THIS TEXT!
    99128%    pdfauthor={Author},    % author: CHANGE THIS TEXT! and uncomment this line
    100129%    pdfsubject={Subject},  % subject: CHANGE THIS TEXT! and uncomment this line
    101 %    pdfkeywords={keyword1} {key2} {key3}, % list of keywords, and uncomment this line if desired
     130%    pdfkeywords={keyword1} {key2} {key3}, % optional list of keywords
    102131    pdfnewwindow=true,      % links in new window
    103132    colorlinks=true,        % false: boxed links; true: colored links
     
    107136    urlcolor=cyan           % color of external links
    108137}
    109 \ifthenelse{\boolean{PrintVersion}}{   % for improved print quality, change some hyperref options
     138% for improved print quality, change some hyperref options
     139\ifthenelse{\boolean{PrintVersion}}{
    110140\hypersetup{    % override some previously defined hyperref options
    111141%    colorlinks,%
     
    116146}{} % end of ifthenelse (no else)
    117147
    118 \usepackage[automake,toc,abbreviations]{glossaries-extra} % Exception to the rule of hyperref being the last add-on package
    119 % If glossaries-extra is not in your LaTeX distribution, get it from CTAN (http://ctan.org/pkg/glossaries-extra),
    120 % although it's supposed to be in both the TeX Live and MikTeX distributions. There are also documentation and
    121 % installation instructions there.
     148% Exception to the rule of hyperref being the last add-on package
     149\usepackage[automake,toc,abbreviations]{glossaries-extra}
     150% If glossaries-extra is not in your LaTeX distribution, get it from CTAN
     151% (http://ctan.org/pkg/glossaries-extra), although it's supposed to be in
     152% both the TeX Live and MikTeX distributions. There are also documentation
     153% and installation instructions there.
    122154
    123155% Setting up the page margins...
    124 \setlength{\textheight}{9in}\setlength{\topmargin}{-0.45in}\setlength{\headsep}{0.25in}
    125 % uWaterloo thesis requirements specify a minimum of 1 inch (72pt) margin at the
    126 % top, bottom, and outside page edges and a 1.125 in. (81pt) gutter margin (on binding side).
    127 % While this is not an issue for electronic viewing, a PDF may be printed, and so we have the same page layout for both printed and electronic versions, we leave the gutter margin in.
    128 % Set margins to minimum permitted by uWaterloo thesis regulations:
     156\setlength{\textheight}{9in}
     157\setlength{\topmargin}{-0.45in}
     158\setlength{\headsep}{0.25in}
     159% uWaterloo thesis requirements specify a minimum of 1 inch (72pt) margin at
     160% the top, bottom, and outside page edges and a 1.125 in. (81pt) gutter margin
     161% (on binding side). While this is not an issue for electronic viewing, a PDF
     162% may be printed, and so we have the same page layout for both printed and
     163% electronic versions, we leave the gutter margin in. Set margins to minimum
     164% permitted by uWaterloo thesis regulations:
    129165\setlength{\marginparwidth}{0pt} % width of margin notes
    130166% N.B. If margin notes are used, you must adjust \textwidth, \marginparwidth
    131167% and \marginparsep so that the space left between the margin notes and page
    132168% edge is less than 15 mm (0.6 in.)
    133 \setlength{\marginparsep}{0pt} % width of space between body text and margin notes
    134 \setlength{\evensidemargin}{0.125in} % Adds 1/8 in. to binding side of all
     169% width of space between body text and margin notes
     170\setlength{\marginparsep}{0pt}
     171% Adds 1/8 in. to binding side of all
    135172% even-numbered pages when the "twoside" printing option is selected
    136 \setlength{\oddsidemargin}{0.125in} % Adds 1/8 in. to the left of all pages when "oneside" printing is selected, and to the left of all odd-numbered pages when "twoside" printing is selected
    137 \setlength{\textwidth}{6.375in} % assuming US letter paper (8.5 in. x 11 in.) and side margins as above
     173\setlength{\evensidemargin}{0.125in}
     174% Adds 1/8 in. to the left of all pages when "oneside" printing is selected,
     175% and to the left of all odd-numbered pages when "twoside" printing is selected
     176\setlength{\oddsidemargin}{0.125in}
     177% assuming US letter paper (8.5 in. x 11 in.) and side margins as above
     178\setlength{\textwidth}{6.375in}
    138179\raggedbottom
    139180
    140 % The following statement specifies the amount of space between paragraphs. Other reasonable specifications are \bigskipamount and \smallskipamount.
     181% The following statement specifies the amount of space between paragraphs.
     182% Other reasonable specifications are \bigskipamount and \smallskipamount.
    141183\setlength{\parskip}{\medskipamount}
    142184
    143 % The following statement controls the line spacing. 
    144 % The default spacing corresponds to good typographic conventions and only slight changes (e.g., perhaps "1.2"), if any, should be made.
     185% The following statement controls the line spacing.
     186% The default spacing corresponds to good typographic conventions and only
     187% slight changes (e.g., perhaps "1.2"), if any, should be made.
    145188\renewcommand{\baselinestretch}{1} % this is the default line space setting
    146189
    147190% By default, each chapter will start on a recto (right-hand side) page.
    148 % We also force each section of the front pages to start on a recto page by inserting \cleardoublepage commands.
    149 % In many cases, this will require that the verso (left-hand) page be blank, and while it should be counted, a page number should not be printed.
    150 % The following statements ensure a page number is not printed on an otherwise blank verso page.
     191% We also force each section of the front pages to start on a recto page by
     192% inserting \cleardoublepage commands. In many cases, this will require that
     193% the verso (left-hand) page be blank, and while it should be counted, a page
     194% number should not be printed. The following statements ensure a page number
     195% is not printed on an otherwise blank verso page.
    151196\let\origdoublepage\cleardoublepage
    152197\newcommand{\clearemptydoublepage}{%
     
    154199\let\cleardoublepage\clearemptydoublepage
    155200
    156 % Define Glossary terms (This is properly done here, in the preamble and could also be \input{} from a separate file...)
     201% Define Glossary terms (This is properly done here, in the preamble and
     202% could also be \input{} from a separate file...)
    157203\input{glossaries}
    158204\makeglossaries
    159205
    160 \usepackage{comment}
    161206% cfa macros used in the document
    162207%\usepackage{cfalab}
     208% I'm going to bring back eventually.
     209\makeatletter
     210% Combines all \CC* commands:
     211\newrobustcmd*\Cpp[1][\xspace]{\cfalab@Cpp#1}
     212\newcommand\cfalab@Cpp{C\kern-.1em\hbox{+\kern-.25em+}}
     213% Optional arguments do not work with pdf string. (Some fix-up required.)
     214\pdfstringdefDisableCommands{\def\Cpp{C++}}
     215\makeatother
     216
    163217\input{common}
    164 \CFAStyle                                               % CFA code-style for all languages
    165 \lstset{language=CFA,basicstyle=\linespread{0.9}\tt}    % CFA default lnaguage
     218% CFA code-style for all languages
     219\CFAStyle
     220% CFA default lnaguage
     221\lstset{language=CFA,basicstyle=\linespread{0.9}\tt}
     222% Annotations from Peter:
     223\newcommand{\PAB}[1]{{\color{blue}PAB: #1}}
     224% Change the style of abbreviations:
     225\renewcommand{\abbrevFont}{}
    166226
    167227%======================================================================
    168228%   L O G I C A L    D O C U M E N T
    169229% The logical document contains the main content of your thesis.
    170 % Being a large document, it is a good idea to divide your thesis into several files, each one containing one chapter or other significant chunk of content, so you can easily shuffle things around later if desired.
     230% Being a large document, it is a good idea to divide your thesis into several
     231% files, each one containing one chapter or other significant chunk of content,
     232% so you can easily shuffle things around later if desired.
    171233%======================================================================
    172234\begin{document}
     
    175237% FRONT MATERIAL
    176238% title page,declaration, borrowers' page, abstract, acknowledgements,
    177 % dedication, table of contents, list of tables, list of figures, nomenclature, etc.
    178 %----------------------------------------------------------------------
    179 \input{uw-ethesis-frontpgs}
     239% dedication, table of contents, list of tables, list of figures,
     240% nomenclature, etc.
     241%----------------------------------------------------------------------
     242\input{uw-ethesis-frontpgs}
    180243
    181244%----------------------------------------------------------------------
    182245% MAIN BODY
    183246% We suggest using a separate file for each chapter of your thesis.
    184 % Start each chapter file with the \chapter command.
    185 % Only use \documentclass or \begin{document} and \end{document} commands in this master document.
     247% Start each chapter file with the \chapter command. Only use \documentclass,
     248% \begin{document} and \end{document} commands in this master document.
    186249% Tip: Putting each sentence on a new line is a way to simplify later editing.
    187250%----------------------------------------------------------------------
    188251\input{existing}
    189252\input{features}
    190 \input{unwinding}
     253\input{implement}
     254%\input{unwinding}
    191255\input{future}
    192256
     
    198262% Bibliography
    199263
    200 % The following statement selects the style to use for references. 
    201 % It controls the sort order of the entries in the bibliography and also the formatting for the in-text labels.
     264% The following statement selects the style to use for references.
     265% It controls the sort order of the entries in the bibliography and also the
     266% formatting for the in-text labels.
    202267\bibliographystyle{plain}
    203 % This specifies the location of the file containing the bibliographic information. 
    204 % It assumes you're using BibTeX to manage your references (if not, why not?).
    205 \cleardoublepage % This is needed if the "book" document class is used, to place the anchor in the correct page, because the bibliography will start on its own page.
    206 % Use \clearpage instead if the document class uses the "oneside" argument
    207 \phantomsection  % With hyperref package, enables hyperlinking from the table of contents to bibliography             
    208 % The following statement causes the title "References" to be used for the bibliography section:
     268% This specifies the location of the file containing the bibliographic
     269% information. It assumes you're using BibTeX to manage your references (if
     270% not, why not?).
     271\cleardoublepage % This is needed if the "book" document class is used, to
     272% place the anchor in the correct page, because the bibliography will start
     273% on its own page.
     274% Use \clearpage instead if the document class uses the "oneside" argument.
     275\phantomsection  % With hyperref package, enables hyperlinking from the table
     276% of contents to bibliography.
     277% The following statement causes the title "References" to be used for the
     278% bibliography section:
    209279\renewcommand*{\bibname}{References}
    210280
     
    213283
    214284\bibliography{uw-ethesis,pl}
    215 % Tip: You can create multiple .bib files to organize your references.
    216 % Just list them all in the \bibliogaphy command, separated by commas (no spaces).
    217 
    218 % The following statement causes the specified references to be added to the bibliography even if they were not cited in the text.
    219 % The asterisk is a wildcard that causes all entries in the bibliographic database to be included (optional).
     285% Tip: You can create multiple .bib files to organize your references. Just
     286% list them all in the \bibliogaphy command, separated by commas (no spaces).
     287
     288% The following statement causes the specified references to be added to the
     289% bibliography even if they were not cited in the text. The asterisk is a
     290% wildcard that causes all entries in the bibliographic database to be
     291% included (optional).
    220292% \nocite{*}
    221293%----------------------------------------------------------------------
     
    225297% The \appendix statement indicates the beginning of the appendices.
    226298\appendix
    227 % Add an un-numbered title page before the appendices and a line in the Table of Contents
     299% Add an un-numbered title page before the appendices and a line in the Table
     300% of Contents
    228301% \chapter*{APPENDICES}
    229302% \addcontentsline{toc}{chapter}{APPENDICES}
    230 % Appendices are just more chapters, with different labeling (letters instead of numbers).
     303% Appendices are just more chapters, with different labeling (letters instead
     304% of numbers).
    231305% \input{appendix-matlab_plots.tex}
    232306
    233 % GLOSSARIES (Lists of definitions, abbreviations, symbols, etc. provided by the glossaries-extra package)
     307% GLOSSARIES (Lists of definitions, abbreviations, symbols, etc.
     308% provided by the glossaries-extra package)
    234309% -----------------------------
    235310\printglossaries
  • doc/theses/fangren_yu_COOP_F20/Report.tex

    rda3963a r565acf59  
    102102\CFA language, developed by the Programming Language Group at the University of Waterloo, has a long history, with the initial language design in 1992 by Glen Ditchfield~\cite{Ditchfield92} and the first proof-of-concept compiler built in 2003 by Richard Bilson~\cite{Bilson03}. Many new features have been added to the language over time, but the core of \CFA's type-system --- parametric functions introduced by the @forall@ clause (hence the name of the language) providing parametric overloading --- remains mostly unchanged.
    103103
    104 The current \CFA reference compiler, @cfa-cc@, is designed using the visitor pattern~\cite{vistorpattern} over an abstract syntax tree (AST), where multiple passes over the AST modify it for subsequent passes. @cfa-cc@ still includes many parts taken directly from the original Bilson implementation, which served as the starting point for this enhancement work to the type system. Unfortunately, the prior implementation did not provide the efficiency required for the language to be practical: a \CFA source file of approximately 1000 lines of code can take a multiple minutes to compile. The cause of the problem is that the old compiler used inefficient data structures and algorithms for expression resolution, which involved significant copying and redundant work.
     104The current \CFA reference compiler, @cfa-cc@, is designed using the visitor pattern~\cite{vistorpattern} over an abstract syntax tree (AST), where multiple passes over the AST modify it for subsequent passes. @cfa-cc@ still includes many parts taken directly from the original Bilson implementation, which served as the starting point for this enhancement work to the type system. Unfortunately, the prior implementation did not provide the efficiency required for the language to be practical: a \CFA source file of approximately 1000 lines of code can take multiple minutes to compile. The cause of the problem is that the old compiler used inefficient data structures and algorithms for expression resolution, which involved significant copying and redundant work.
    105105
    106106This report presents a series of optimizations to the performance-critical parts of the resolver, with a major rework of the compiler data-structures using a functional-programming approach to reduce memory complexity. The improvements were suggested by running the compiler builds with a performance profiler against the \CFA standard-library source-code and a test suite to find the most underperforming components in the compiler algorithm.
     
    122122\end{itemize}
    123123
    124 The resolver algorithm, designed for overload resolution, uses a significant amount of reused, and hence copying, for the intermediate representations, especially in the following two places:
     124The resolver algorithm, designed for overload resolution, allows a significant amount of code reused, and hence copying, for the intermediate representations, especially in the following two places:
    125125\begin{itemize}
    126126\item
     
    301301forall( dtype T | sized( T ) )
    302302T * malloc( void ) { return (T *)malloc( sizeof(T) ); } // call C malloc
    303 int * i = malloc();  // type deduced from left-hand size $\Rightarrow$ no size argument or return cast
     303int * i = malloc();  // type deduced from left-hand size $\(\Rightarrow\)$ no size argument or return cast
    304304\end{cfa}
    305305An unbound return-type is problematic in resolver complexity because a single match of a function call with an unbound return type may create multiple candidates. In the worst case, consider a function declared that returns any @otype@ (defined \VPageref{otype}):
     
    432432\begin{cfa}
    433433void f( int );
    434 double g$_1$( int );
    435 int g$_2$( long );
     434double g$\(_1\)$( int );
     435int g$\(_2\)$( long );
    436436f( g( 42 ) );
    437437\end{cfa}
  • doc/theses/thierry_delisle_PhD/thesis/Makefile

    rda3963a r565acf59  
    3232        emptytree \
    3333        fairness \
     34        io_uring \
     35        pivot_ring \
    3436        system \
    3537}
     
    4345## Define the documents that need to be made.
    4446all: thesis.pdf
    45 thesis.pdf: ${TEXTS} ${FIGURES} ${PICTURES} glossary.tex local.bib
     47thesis.pdf: ${TEXTS} ${FIGURES} ${PICTURES} thesis.tex glossary.tex local.bib
    4648
    4749DOCUMENT = thesis.pdf
     
    105107        sed -i 's/$@/${Build}\/$@/g' ${Build}/$@_t
    106108
     109build/fairness.svg: fig/fairness.py | ${Build}
     110        python3 fig/fairness.py build/fairness.svg
     111
    107112## pstex with inverted colors
    108113%.dark.pstex : fig/%.fig Makefile | ${Build}
  • doc/theses/thierry_delisle_PhD/thesis/local.bib

    rda3963a r565acf59  
    512512}
    513513
     514@manual{MAN:bsd/kqueue,
     515  title = {KQUEUE(2) - FreeBSD System Calls Manual},
     516  url   = {https://www.freebsd.org/cgi/man.cgi?query=kqueue},
     517  year  = {2020},
     518  month = {may}
     519}
     520
    514521% Apple's MAC OS X
    515522@manual{MAN:apple/scheduler,
     
    577584
    578585% --------------------------------------------------
     586% Man Pages
     587@manual{MAN:open,
     588  key        = "open",
     589  title      = "open(2) Linux User's Manual",
     590  year       = "2020",
     591  month      = "February",
     592}
     593
     594@manual{MAN:accept,
     595  key        = "accept",
     596  title      = "accept(2) Linux User's Manual",
     597  year       = "2019",
     598  month      = "March",
     599}
     600
     601@manual{MAN:select,
     602  key        = "select",
     603  title      = "select(2) Linux User's Manual",
     604  year       = "2019",
     605  month      = "March",
     606}
     607
     608@manual{MAN:poll,
     609  key        = "poll",
     610  title      = "poll(2) Linux User's Manual",
     611  year       = "2019",
     612  month      = "July",
     613}
     614
     615@manual{MAN:epoll,
     616  key        = "epoll",
     617  title      = "epoll(7) Linux User's Manual",
     618  year       = "2019",
     619  month      = "March",
     620}
     621
     622@manual{MAN:aio,
     623  key        = "aio",
     624  title      = "aio(7) Linux User's Manual",
     625  year       = "2019",
     626  month      = "March",
     627}
     628
     629@misc{MAN:io_uring,
     630  title   = {Efficient IO with io\_uring},
     631  author  = {Axboe, Jens},
     632  year    = "2019",
     633  month   = "March",
     634  version = {0,4},
     635  howpublished = {\url{https://kernel.dk/io_uring.pdf}}
     636}
     637
     638% --------------------------------------------------
    579639% Wikipedia Entries
    580640@misc{wiki:taskparallel,
     
    617677  note = "[Online; accessed 2-January-2021]"
    618678}
     679
     680@misc{wiki:future,
     681  author = "{Wikipedia contributors}",
     682  title = "Futures and promises --- {W}ikipedia{,} The Free Encyclopedia",
     683  year = "2020",
     684  url = "https://en.wikipedia.org/wiki/Futures_and_promises",
     685  note = "[Online; accessed 9-February-2021]"
     686}
  • doc/theses/thierry_delisle_PhD/thesis/text/core.tex

    rda3963a r565acf59  
    4949
    5050\section{Design}
    51 In general, a na\"{i}ve \glsxtrshort{fifo} ready-queue does not scale with increased parallelism from \glspl{hthrd}, resulting in decreased performance. The problem is adding/removing \glspl{thrd} is a single point of contention. As shown in the evaluation sections, most production schedulers do scale when adding \glspl{hthrd}. The common solution to the single point of contention is to shard the ready-queue so each \gls{hthrd} can access the ready-queue without contention, increasing performance though lack of contention.
     51In general, a na\"{i}ve \glsxtrshort{fifo} ready-queue does not scale with increased parallelism from \glspl{hthrd}, resulting in decreased performance. The problem is adding/removing \glspl{thrd} is a single point of contention. As shown in the evaluation sections, most production schedulers do scale when adding \glspl{hthrd}. The common solution to the single point of contention is to shard the ready-queue so each \gls{hthrd} can access the ready-queue without contention, increasing performance.
    5252
    5353\subsection{Sharding} \label{sec:sharding}
    54 An interesting approach to sharding a queue is presented in \cit{Trevors paper}. This algorithm presents a queue with a relaxed \glsxtrshort{fifo} guarantee using an array of strictly \glsxtrshort{fifo} sublists as shown in Figure~\ref{fig:base}. Each \emph{cell} of the array has a timestamp for the last operation and a pointer to a linked-list with a lock and each node in the list is marked with a timestamp indicating when it is added to the list. A push operation is done by picking a random cell, acquiring the list lock, and pushing to the list. If the cell is locked, the operation is simply retried on another random cell until a lock is acquired. A pop operation is done in a similar fashion except two random cells are picked. If both cells are unlocked with non-empty lists, the operation pops the node with the oldest cell timestamp. If one of the cells is unlocked and non-empty, the operation pops from that cell. If both cells are either locked or empty, the operation picks two new random cells and tries again.
     54An interesting approach to sharding a queue is presented in \cit{Trevors paper}. This algorithm presents a queue with a relaxed \glsxtrshort{fifo} guarantee using an array of strictly \glsxtrshort{fifo} sublists as shown in Figure~\ref{fig:base}. Each \emph{cell} of the array has a timestamp for the last operation and a pointer to a linked-list with a lock. Each node in the list is marked with a timestamp indicating when it is added to the list. A push operation is done by picking a random cell, acquiring the list lock, and pushing to the list. If the cell is locked, the operation is simply retried on another random cell until a lock is acquired. A pop operation is done in a similar fashion except two random cells are picked. If both cells are unlocked with non-empty lists, the operation pops the node with the oldest timestamp. If one of the cells is unlocked and non-empty, the operation pops from that cell. If both cells are either locked or empty, the operation picks two new random cells and tries again.
    5555
    5656\begin{figure}
     
    100100\paragraph{Local Information} Figure~\ref{fig:emptytls} shows an approach using dense information, similar to the bitmap, but each \gls{hthrd} keeps its own independent copy. While this approach can offer good scalability \emph{and} low latency, the liveliness and discovery of the information can become a problem. This case is made worst in systems with few processors where even blind random picks can find \glspl{thrd} in a few tries.
    101101
    102 I built a prototype of these approaches and none of these techniques offer satisfying performance when few threads are present. All of these approach hit the same 2 problems. First, randomly picking sub-queues is very fast but means any improvement to the hit rate can easily be countered by a slow-down in look-up speed when there are empty lists. Second, the array is already as sharded to avoid contention bottlenecks, so any denser data structure tends to become a bottleneck. In all cases, these factors meant the best cases scenario, \ie many threads, would get worst throughput, and the worst-case scenario, few threads, would get a better hit rate, but an equivalent poor throughput. As a result I tried an entirely different approach.
     102I built a prototype of these approaches and none of these techniques offer satisfying performance when few threads are present. All of these approach hit the same 2 problems. First, randomly picking sub-queues is very fast. That speed means any improvement to the hit rate can easily be countered by a slow-down in look-up speed, whether or not there are empty lists. Second, the array is already sharded to avoid contention bottlenecks, so any denser data structure tends to become a bottleneck. In all cases, these factors meant the best cases scenario, \ie many threads, would get worst throughput, and the worst-case scenario, few threads, would get a better hit rate, but an equivalent poor throughput. As a result I tried an entirely different approach.
    103103
    104104\subsection{Dynamic Entropy}\cit{https://xkcd.com/2318/}
    105 In the worst-case scenario there are only few \glspl{thrd} ready to run, or more precisely given $P$ \glspl{proc}\footnote{For simplicity, this assumes there is a one-to-one match between \glspl{proc} and \glspl{hthrd}.}, $T$ \glspl{thrd} and $\epsilon$ a very small number, than the worst case scenario can be represented by $\epsilon \ll P$, than $T = P + \epsilon$. It is important to note in this case that fairness is effectively irrelevant. Indeed, this case is close to \emph{actually matching} the model of the ``Ideal multi-tasking CPU'' on page \pageref{q:LinuxCFS}. In this context, it is possible to use a purely internal-locality based approach and still meet the fairness requirements. This approach simply has each \gls{proc} running a single \gls{thrd} repeatedly. Or from the shared ready-queue viewpoint, each \gls{proc} pushes to a given sub-queue and then popes from the \emph{same} subqueue. In cases where $T \gg P$, the scheduler should also achieves similar performance without affecting the fairness guarantees.
     105In the worst-case scenario there are only few \glspl{thrd} ready to run, or more precisely given $P$ \glspl{proc}\footnote{For simplicity, this assumes there is a one-to-one match between \glspl{proc} and \glspl{hthrd}.}, $T$ \glspl{thrd} and $\epsilon$ a very small number, than the worst case scenario can be represented by $T = P + \epsilon$, with $\epsilon \ll P$. It is important to note in this case that fairness is effectively irrelevant. Indeed, this case is close to \emph{actually matching} the model of the ``Ideal multi-tasking CPU'' on page \pageref{q:LinuxCFS}. In this context, it is possible to use a purely internal-locality based approach and still meet the fairness requirements. This approach simply has each \gls{proc} running a single \gls{thrd} repeatedly. Or from the shared ready-queue viewpoint, each \gls{proc} pushes to a given sub-queue and then pops from the \emph{same} subqueue. The challenge is for the the scheduler to achieve good performance in both the $T = P + \epsilon$ case and the $T \gg P$ case, without affecting the fairness guarantees in the later.
    106106
    107 To handle this case, I use a pseudo random-number generator, \glsxtrshort{prng} in a novel way. When the scheduler uses a \glsxtrshort{prng} instance per \gls{proc} exclusively, the random-number seed effectively starts an encoding that produces a list of all accessed subqueues, from latest to oldest. The novel approach is to be able to ``replay'' the \glsxtrshort{prng} backwards and there exist \glsxtrshort{prng}s that are fast, compact \emph{and} can be run forward and backwards. Linear congruential generators~\cite{wiki:lcg} are an example of \glsxtrshort{prng}s that match these requirements.
     107To handle this case, I use a \glsxtrshort{prng}\todo{Fix missing long form} in a novel way. There exist \glsxtrshort{prng}s that are fast, compact and can be run forward \emph{and} backwards.  Linear congruential generators~\cite{wiki:lcg} are an example of \glsxtrshort{prng}s of such \glsxtrshort{prng}s. The novel approach is to use the ability to run backwards to ``replay'' the \glsxtrshort{prng}. The scheduler uses an exclusive \glsxtrshort{prng} instance per \gls{proc}, the random-number seed effectively starts an encoding that produces a list of all accessed subqueues, from latest to oldest. Replaying the \glsxtrshort{prng} to identify cells accessed recently and which probably have data still cached.
    108108
    109109The algorithm works as follows:
  • doc/theses/thierry_delisle_PhD/thesis/text/intro.tex

    rda3963a r565acf59  
    77While previous work on the concurrent package of \CFA focused on features and interfaces, this thesis focuses on performance, introducing \glsxtrshort{api} changes only when required by performance considerations. More specifically, this thesis concentrates on scheduling and \glsxtrshort{io}. Prior to this work, the \CFA runtime used a strictly \glsxtrshort{fifo} \gls{rQ}.
    88
    9 This work exclusively concentrates on Linux as it's operating system since the existing \CFA runtime and compiler does not already support other operating systems. Furthermore, as \CFA is yet to be released, supporting version of Linux older that the latest version is not a goal of this work.
     9This work exclusively concentrates on Linux as it's operating system since the existing \CFA runtime and compiler does not already support other operating systems. Furthermore, as \CFA is yet to be released, supporting version of Linux older than the latest version is not a goal of this work.
  • doc/theses/thierry_delisle_PhD/thesis/text/io.tex

    rda3963a r565acf59  
    1 \chapter{User Level \glsxtrshort{io}}
    2 As mentionned in Section~\ref{prev:io}, User-Level \glsxtrshort{io} requires multiplexing the \glsxtrshort{io} operations of many \glspl{thrd} onto fewer \glspl{proc} using asynchronous \glsxtrshort{io} operations. Various operating systems offer various forms of asynchronous operations and as mentioned in Chapter~\ref{intro}, this work is exclusively focuesd on Linux.
     1\chapter{User Level \io}
     2As mentionned in Section~\ref{prev:io}, User-Level \io requires multiplexing the \io operations of many \glspl{thrd} onto fewer \glspl{proc} using asynchronous \io operations. Various operating systems offer various forms of asynchronous operations and as mentioned in Chapter~\ref{intro}, this work is exclusively focuesd on Linux.
    33
    4 \section{Existing options}
    5 Since \glsxtrshort{io} operations are generally handled by the
     4\section{Kernel Interface}
     5Since this work fundamentally depends on operating system support, the first step of any design is to discuss the available interfaces and pick one (or more) as the foundations of the \io subsystem.
    66
    7 \subsection{\lstinline|epoll|, \lstinline|poll| and \lstinline|select|}
     7\subsection{\lstinline|O_NONBLOCK|}
     8In Linux, files can be opened with the flag @O_NONBLOCK@~\cite{MAN:open} (or @SO_NONBLOCK@~\cite{MAN:accept}, the equivalent for sockets) to use the file descriptors in ``nonblocking mode''. In this mode, ``Neither the open() nor any subsequent \io operations on the [opened file descriptor] will cause the calling
     9process to wait.'' This feature can be used as the foundation for the \io subsystem. However, for the subsystem to be able to block \glspl{thrd} until an operation completes, @O_NONBLOCK@ must be use in conjunction with a system call that monitors when a file descriptor becomes ready, \ie, the next \io operation on it will not cause the process to wait\footnote{In this context, ready means to \emph{some} operation can be performed without blocking. It does not mean that the last operation that return \lstinline|EAGAIN| will succeed on the next try. A file that is ready to read but has only 1 byte available would be an example of this distinction.}.
    810
    9 \subsection{Linux's AIO}
     11There are three options to monitor file descriptors in Linux\footnote{For simplicity, this section omits to mention \lstinline|pselect| and \lstinline|ppoll|. The difference between these system calls and \lstinline|select| and \lstinline|poll| respectively is not relevant for this discussion.}, @select@~\cite{MAN:select}, @poll@~\cite{MAN:poll} and @epoll@~\cite{MAN:epoll}. All three of these options offer a system call that blocks a \gls{kthrd} until at least one of many file descriptor becomes ready. The group of file descriptors being waited on is often referred to as the \newterm{interest set}.
    1012
     13\paragraph{\lstinline|select|} is the oldest of these options, it takes as an input a contiguous array of bits, where each bits represent a file descriptor of interest. On return, it modifies the set in place to identify which of the file descriptors changed status. This means that calling select in a loop requires re-initializing the array each time and the number of file descriptors supported has a hard limit. Another limit of @select@ is that once the call is started, the interest set can no longer be modified. Monitoring a new file descriptor generally requires aborting any in progress call to @select@\footnote{Starting a new call to \lstinline|select| in this case is possible but requires a distinct kernel thread, and as a result is not a acceptable multiplexing solution when the interest set is large and highly dynamic unless the number of parallel calls to select can be strictly bounded.}.
    1114
     15\paragraph{\lstinline|poll|} is an improvement over select, which removes the hard limit on the number of file descriptors and the need to re-initialize the input on every call. It works using an array of structures as an input rather than an array of bits, thus allowing a more compact input for small interest sets. Like @select@, @poll@ suffers from the limitation that the interest set cannot be changed while the call is blocked.
     16
     17\paragraph{\lstinline|epoll|} further improves on these two functions, by allowing the interest set to be dynamically added to and removed from while a \gls{kthrd} is blocked on a call to @epoll@. This is done by creating an \emph{epoll instance} with a persistent intereset set and that is used across multiple calls. This advantage significantly reduces synchronization overhead on the part of the caller (in this case the \io subsystem) since the interest set can be modified when adding or removing file descriptors without having to synchronize with other \glspl{kthrd} potentially calling @epoll@.
     18
     19However, all three of these system calls suffer from generality problems to some extent. The man page for @O_NONBLOCK@ mentions that ``[@O_NONBLOCK@] has no effect for regular files and block devices'', which means none of these three system calls are viable multiplexing strategies for these types of \io operations. Furthermore, @epoll@ has been shown to have some problems with pipes and ttys\cit{Peter's examples in some fashion}. Finally, none of these are useful solutions for multiplexing \io operations that do not have a corresponding file descriptor and can be awkward for operations using multiple file descriptors.
     20
     21\subsection{The POSIX asynchronous I/O (AIO)}
     22An alternative to using @O_NONBLOCK@ is to use the AIO interface. Its interface lets programmers enqueue operations to be performed asynchronously by the kernel. Completions of these operations can be communicated in various ways, either by sending a Linux signal, spawning a new \gls{kthrd} or by polling for completion of one or more operation. For the purpose multiplexing operations, spawning a new \gls{kthrd} is counter-productive but a related solution is discussed in Section~\ref{io:morethreads}. Since using interrupts handlers can also lead to fairly complicated interactions between subsystems, I will concentrate on the different polling methods. AIO only supports read and write operations to file descriptors and those do not have the same limitation as @O_NONBLOCK@, \ie, the file descriptors can be regular files and blocked devices. It also supports batching more than one of these operations in a single system call.
     23
     24AIO offers two different approach to polling. @aio_error@ can be used as a spinning form of polling, returning @EINPROGRESS@ until the operation is completed, and @aio_suspend@ can be used similarly to @select@, @poll@ or @epoll@, to wait until one or more requests have completed. For the purpose of \io multiplexing, @aio_suspend@ is the intended interface. Even if AIO requests can be submitted concurrently, @aio_suspend@ suffers from the same limitation as @select@ and @poll@, \ie, the interest set cannot be dynamically changed while a call to @aio_suspend@ is in progress. Unlike @select@ and @poll@ however, it also suffers from the limitation that it does not specify which requests have completed, meaning programmers then have to poll each request in the interest set using @aio_error@ to identify which requests have completed. This means that, like @select@ and @poll@ but not @epoll@, the time needed to examine polling results increases based in the total number of requests monitored, not the number of completed requests.
     25
     26AIO does not seem to be a particularly popular interface, which I believe is in part due to this less than ideal polling interface. Linus Torvalds talks about this interface as follows :
    1227
    1328\begin{displayquote}
     
    3045in
    3146``some kind of arbitrary \textit{queue up asynchronous system call} model''.
    32 This description is actually quite close to the interface of the interface described in the next section.
     47This description is actually quite close to the interface described in the next section.
    3348
    34 \subsection{\texttt{io\_uring}}
    35 A very recent addition to Linux, @io_uring@\cit{io\_uring} is a framework that aims to solve many of the problems listed with the above mentioned solutions.
     49\subsection{\lstinline|io_uring|}
     50A very recent addition to Linux, @io_uring@\cite{MAN:io_uring} is a framework that aims to solve many of the problems listed with the above mentioned interfaces. Like AIO, it represents \io operations as entries added on a queue. But like @epoll@, new requests can be submitted while a blocking call waiting for requests to complete is already in progress. The @io_uring@ interface uses two ring buffers (referred to simply as rings) as its core, a submit ring to which programmers push \io requests and a completion buffer which programmers poll for completion.
     51
     52One of the big advantages over the interfaces listed above is that it also supports a much wider range of operations. In addition to supporting reads and writes to any file descriptor like AIO, it supports other operations like @open@, @close@, @fsync@, @accept@, @connect@, @send@, @recv@, @splice@, \etc.
     53
     54On top of these, @io_uring@ adds many ``bells and whistles'' like avoiding copies between the kernel and user-space with shared memory, allowing different mechanisms to communicate with device drivers and supporting chains of requests, \ie, requests that automatically trigger followup requests on completion.
    3655
    3756\subsection{Extra Kernel Threads}\label{io:morethreads}
    38 Finally, if the operating system does not offer any satisfying forms of asynchronous \glsxtrshort{io} operations, a solution is to fake it by creating a pool of \glspl{kthrd} and delegating operations to them in order to avoid blocking \glspl{proc}.
     57Finally, if the operating system does not offer any satisfying forms of asynchronous \io operations, a solution is to fake it by creating a pool of \glspl{kthrd} and delegating operations to them in order to avoid blocking \glspl{proc}. The is a compromise on multiplexing. In the worst case, where all \glspl{thrd} are consistently blocking on \io, it devolves into 1-to-1 threading. However, regardless of the frequency of \io operations, it achieves the fundamental goal of not blocking \glspl{proc} when \glspl{thrd} are ready to run. This approach is used by languages like Go\cit{Go} and frameworks like libuv\cit{libuv}, since it has the advantage that it can easily be used across multiple operating systems. This advantage is especially relevant for languages like Go, which offer an homogenous \glsxtrshort{api} across all platforms. As opposed to C, which has a very limited standard api for \io, \eg, the C standard library has no networking.
    3958
    4059\subsection{Discussion}
     60These options effectively fall into two broad camps of solutions, waiting for \io to be ready versus waiting for \io to be completed. All operating systems that support asynchronous \io must offer an interface along one of these lines, but the details can vary drastically. For example, Free BSD offers @kqueue@~\cite{MAN:bsd/kqueue} which behaves similarly to @epoll@ but with some small quality of life improvements, while Windows (Win32)~\cit{https://docs.microsoft.com/en-us/windows/win32/fileio/synchronous-and-asynchronous-i-o} offers ``overlapped I/O'' which handles submissions similarly to @O_NONBLOCK@, with extra flags on the synchronous system call, but waits for completion events, similarly to @io_uring@.
    4161
     62For this project, I have chosen to use @io_uring@, in large parts due to its generality. While @epoll@ has been shown to be a good solution to socket \io (\cite{DBLP:journals/pomacs/KarstenB20}), @io_uring@'s transparent support for files, pipes and more complex operations, like @splice@ and @tee@, make it a better choice as the foundation for a general \io subsystem.
    4263
    4364\section{Event-Engine}
    4465
     66The event engines reponsibility is to use the kernel interface to multiplex many \io operations onto few \glspl{kthrd}. In concrete terms, this means that \glspl{thrd} enter the engine through an interface, the event engines then starts the operation and parks the calling \glspl{thrd}, returning control to the \gls{proc}. The parked \glspl{thrd} are then rescheduled by the event engine once the desired operation has completed.
     67
     68\subsection{\lstinline|io_uring| in depth}
     69Before going into details on the design of the event engine, I will present some more details on the usage of @io_uring@ which are important for the design of the engine.
     70
     71\begin{figure}
     72        \centering
     73        \input{io_uring.pstex_t}
     74        \caption[Overview of \lstinline|io_uring|]{Overview of \lstinline|io_uring| \smallskip\newline Two ring buffer are used to communicate with the kernel, one for completions~(right) and one for submissions~(left). The completion ring contains entries, \newterm{CQE}s: Completion Queue Entries, that are produced by the kernel when an operation completes and then consumed by the application. On the other hand, the application produces \newterm{SQE}s: Submit Queue Entries, which it appends to the submission ring for the kernel to consume. Unlike the completion ring, the submission ring does not contain the entries directly, it indexes into the SQE array (denoted \emph{S}) instead.}
     75        \label{fig:iouring}
     76\end{figure}
     77
     78Figure~\ref{fig:iouring} shows an overview of an @io_uring@ instance. Multiple @io_uring@ instances can be created, in which case they each have a copy of the data structures in the figure. New \io operations are submitted to the kernel following 4 steps which use the components shown in the figure.
     79
     80\paragraph{First} an @sqe@ must be allocated from the pre-allocated array (denoted \emph{S} in Figure~\ref{fig:iouring}). This array is created at the same time as the @io_uring@ instance, is in kernel-locked memory, which means it is both visible by the kernel and the application, and has a fixed size determined at creation. How these entries are allocated is not important for the functionning of @io_uring@, the only requirement is that no entry is reused before the kernel has consumed it.
     81
     82\paragraph{Secondly} the @sqe@ must be filled according to the desired operation. This step is straight forward, the only detail worth mentionning is that @sqe@s have a @user_data@ field that must be filled in order to match submission and completion entries.
     83
     84\paragraph{Thirdly} the @sqe@ must be submitted to the submission ring, this requires appending the index of the @sqe@ to the ring following regular ring buffer steps: \lstinline|{ buffer[head] = item; head++ }|. Since the head is visible to the kernel, some memory barriers may be required to prevent the compiler from reordering these operations. Since the submission ring is a regular ring buffer, more than one @sqe@ can be added at once and the head can be updated only after the entire batch has been updated.
     85
     86\paragraph{Finally} the kernel must be notified of the change to the ring using the system call @io_uring_enter@. The number of elements appended to the submission ring is passed as a parameter and the number of elements consumed is returned. The @io_uring@ instance can be constructed so that this step is not required, but this requires elevated privilege and early version of @io_uring@ had additionnal restrictions.
     87
     88The completion side is simpler, applications call @io_uring_enter@ with the flag @IORING_ENTER_GETEVENTS@ to wait on a desired number of operations to complete. The same call can be used to both submit @sqe@s and wait for operations to complete. When operations do complete the kernel appends a @cqe@ to the completion ring and advances the head of the ring. Each @cqe@ contains the result of the operation as well as a copy of the @user_data@ field of the @sqe@ that triggered the operation. It is not necessary to call @io_uring_enter@ to get new events, the kernel can directly modify the completion ring, the system call is only needed if the application wants to block waiting on operations to complete.
     89
     90The @io_uring_enter@ system call is protected by a lock inside the kernel. This means that concurrent call to @io_uring_enter@ using the same instance are possible, but there is can be no performance gained from parallel calls to @io_uring_enter@. It is possible to do the first three submission steps in parallel, however, doing so requires careful synchronization.
     91
     92@io_uring@ also introduces some constraints on what the number of operations that can be ``in flight'' at the same time. Obviously, @sqe@s are allocated from a fixed-size array, meaning that there is a hard limit to how many @sqe@s can be submitted at once. In addition, the @io_uring_enter@ system call can fail because ``The  kernel [...] ran out of resources to handle [a request]'' or ``The application is attempting to overcommit the number of requests it can  have  pending.''. This requirement means that it can be required to handle bursts of \io requests by holding back some of the requests so they can be submitted at a later time.
     93
     94\subsection{Multiplexing \io: Submission}
     95The submission side is the most complicated aspect of @io_uring@ and from the design decisions made in the submission side, the completion side effectively follows.
     96
     97While it is possible to do the first steps of submission in parallel, the duration of the system call scales with number of entries submitted. The consequence of this is that how much parallelism can be used to prepare submissions for the next system call is limited. Beyond this limit, the length of the system call will be the throughput limiting factor. I have concluded from early experiments that preparing submissions seems to take about as long as the system call itself, which means that with a single @io_uring@ instance, there is no benefit in terms of \io throughput to having more than two \glspl{hthrd}. Therefore the design of the submission engine must manage multiple instances of @io_uring@ running in parallel, effectively sharding @io_uring@ instances. Similarly to scheduling, this sharding can be done privately, \ie, one instance per \glspl{proc}, or in decoupled pools, \ie, a pool of \glspl{proc} use a pool of @io_uring@ instances without one-to-one coupling between any given instance and any given \gls{proc}.
     98
     99\subsubsection{Pool of Instances}
     100One approach is to have multiple shared instances. \Glspl{thrd} attempting \io operations pick one of the available instances and submits operations to that instance. Since the completion will be sent to the same instance, all instances with pending operations must be polled continously\footnote{As will be described in Chapter~\ref{practice}, this does not translate into constant cpu usage.}. Since there is no coupling between \glspl{proc} and @io_uring@ instances in this approach, \glspl{thrd} running on more than one \gls{proc} can attempt to submit to the same instance concurrently. Since @io_uring@ effectively sets the amount of sharding needed to avoid contention on its internal locks, performance in this approach is based on two aspects: the synchronization needed to submit does not induce more contention than @io_uring@ already does and the scheme to route \io requests to specific @io_uring@ instances does not introduce contention. This second aspect has an oversized importance because it comes into play before the sharding of instances, and as such, all \glspl{hthrd} can contend on the routing algorithm.
     101
     102Allocation in this scheme can be handled fairly easily. Free @sqe@s, \ie, @sqe@s that aren't currently being used to represent a request, can be written to safely and have a field called @user_data@ which the kernel only reads to copy to @cqe@s. Allocation also requires no ordering guarantee as all free @sqe@s are interchangeable. This requires a simple concurrent bag. The only added complexity is that the number of @sqe@s is fixed, which means allocation can fail. This failure needs to be pushed up to the routing algorithm, \glspl{thrd} attempting \io operations must not be directed to @io_uring@ instances without any available @sqe@s. Ideally, the routing algorithm would block operations up-front if none of the instances have available @sqe@s.
     103
     104Once an @sqe@ is allocated, \glspl{thrd} can fill them normally, they simply need to keep trac of the @sqe@ index and which instance it belongs to.
     105
     106Once an @sqe@ is filled in, what needs to happen is that the @sqe@ must be added to the submission ring buffer, an operation that is not thread-safe on itself, and the kernel must be notified using the @io_uring_enter@ system call. The submission ring buffer is the same size as the pre-allocated @sqe@ buffer, therefore pushing to the ring buffer cannot fail\footnote{This is because it is invalid to have the same \lstinline|sqe| multiple times in the ring buffer.}. However, as mentioned, the system call itself can fail with the expectation that it will be retried once some of the already submitted operations complete. Since multiple @sqe@s can be submitted to the kernel at once, it is important to strike a balance between batching and latency. Operations that are ready to be submitted should be batched together in few system calls, but at the same time, operations should not be left pending for long period of times before being submitted. This can be handled by either designating one of the submitting \glspl{thrd} as the being responsible for the system call for the current batch of @sqe@s or by having some other party regularly submitting all ready @sqe@s, \eg, the poller \gls{thrd} mentionned later in this section.
     107
     108In the case of designating a \gls{thrd}, ideally, when multiple \glspl{thrd} attempt to submit operations to the same @io_uring@ instance, all requests would be batched together and one of the \glspl{thrd} would do the system call on behalf of the others, referred to as the \newterm{submitter}. In practice however, it is important that the \io requests are not left pending indefinately and as such, it may be required to have a current submitter and a next submitter. Indeed, as long as there is a ``next'' submitter, \glspl{thrd} submitting new \io requests can move on, knowing that some future system call will include their request. Once the system call is done, the submitter must also free @sqe@s so that the allocator can reused them.
     109
     110Finally, the completion side is much simpler since the @io_uring@ system call enforces a natural synchronization point. Polling simply needs to regularly do the system call, go through the produced @cqe@s and communicate the result back to the originating \glspl{thrd}. Since @cqe@s only own a signed 32 bit result, in addition to the copy of the @user_data@ field, all that is needed to communicate the result is a simple future~\cite{wiki:future}. If the submission side does not designate submitters, polling can also submit all @sqe@s as it is polling events.  A simple approach to polling is to allocate a \gls{thrd} per @io_uring@ instance and simply let the poller \glspl{thrd} poll their respective instances when scheduled. This design is especially convinient for reasons explained in Chapter~\ref{practice}.
     111
     112With this pool of instances approach, the big advantage is that it is fairly flexible. It does not impose restrictions on what \glspl{thrd} submitting \io operations can and cannot do between allocations and submissions. It also can gracefully handle running out of ressources, @sqe@s or the kernel returning @EBUSY@. The down side to this is that many of the steps used for submitting need complex synchronization to work properly. The routing and allocation algorithm needs to keep track of which ring instances have available @sqe@s, block incoming requests if no instance is available, prevent barging if \glspl{thrd} are already queued up waiting for @sqe@s and handle @sqe@s being freed. The submission side needs to safely append @sqe@s to the ring buffer, make sure no @sqe@ is dropped or left pending forever, notify the allocation side when @sqe@s can be reused and handle the kernel returning @EBUSY@. Sharding the @io_uring@ instances should alleviate much of the contention caused by this, but all this synchronization may still have non-zero cost.
     113
     114\subsubsection{Private Instances}
     115Another approach is to simply create one ring instance per \gls{proc}. This alleviate the need for synchronization on the submissions, requiring only that \glspl{thrd} are not interrupted in between two submission steps. This is effectively the same requirement as using @thread_local@ variables. Since @sqe@s that are allocated must be submitted to the same ring, on the same \gls{proc}, this effectively forces the application to submit @sqe@s in allocation order\footnote{The actual requirement is that \glspl{thrd} cannot context switch between allocation and submission. This requirement means that from the subsystem's point of view, the allocation and submission are sequential. To remove this requirement, a \gls{thrd} would need the ability to ``yield to a specific \gls{proc}'', \ie, park with the promise that it will be run next on a specific \gls{proc}, the \gls{proc} attached to the correct ring. This is not a current or planned feature of \CFA.}, greatly simplifying both allocation and submission. In this design, allocation and submission form a ring partitionned ring buffer as shown in Figure~\ref{fig:pring}. Once added to the ring buffer, the attached \gls{proc} has a significant amount of flexibility with regards to when to do the system call. Possible options are: when the \gls{proc} runs out of \glspl{thrd} to run, after running a given number of threads \glspl{thrd}, etc.
     116
     117\begin{figure}
     118        \centering
     119        \input{pivot_ring.pstex_t}
     120        \caption[Partitionned ring buffer]{Partitionned ring buffer \smallskip\newline Allocated sqes are appending to the first partition. When submitting, the partition is simply advanced to include all the sqes that should be submitted. The kernel considers the partition as the head of the ring.}
     121        \label{fig:pring}
     122\end{figure}
     123
     124This approach has the advantage that it does not require much of the synchronization needed in the shared approach. This comes at the cost that \glspl{thrd} submitting \io operations have less flexibility, they cannot park or yield, and several exceptional cases are handled poorly. Instances running out of @sqe@s cannot run \glspl{thrd} wanting to do \io operations, in such a case the \gls{thrd} needs to be moved to a different \gls{proc}, the only current way of achieving this would be to @yield()@ hoping to be scheduled on a different \gls{proc}, which is not guaranteed. Another problematic case is that \glspl{thrd} that do not park for long periods of time will delay the submission of any @sqe@ not already submitted. This issue is similar to fairness issues which schedulers that use work-stealing mentioned in the previous chapter.
     125
     126
    45127
    46128\section{Interface}
     129Finally, the last important part of the \io subsystem is it's interface. There are multiple approaches that can be offered to programmers, each with advantages and disadvantages. The new \io subsystem can replace the C runtime's API or extend it. And in the later case the interface can go from very similar to vastly different. The following sections discuss some useful options using @read@ as an example. The standard Linux interface for C is :
     130
     131@ssize_t read(int fd, void *buf, size_t count);@.
     132
     133\subsection{Replacement}
     134Replacing the C \glsxtrshort{api}
     135
     136\subsection{Synchronous Extension}
     137
     138\subsection{Asynchronous Extension}
     139
     140\subsection{Interface directly to \lstinline|io_uring|}
  • doc/theses/thierry_delisle_PhD/thesis/text/runtime.tex

    rda3963a r565acf59  
    1111
    1212\section{Clusters}
    13 \CFA allows the option to group user-level threading, in the form of clusters. Both \glspl{thrd} and \glspl{proc} belong to a specific cluster. \Glspl{thrd} are only be scheduled onto \glspl{proc} in the same cluster and scheduling is done independently of other clusters. Figure~\ref{fig:system} shows an overview of the \CFA runtime, which allows programmers to tightly control parallelism. It also opens the door to handling effects like NUMA, by pining clusters to a specific NUMA node\footnote{This is not currently implemented in \CFA, but the only hurdle left is creating a generic interface for cpu masks.}.
     13\CFA allows the option to group user-level threading, in the form of clusters. Both \glspl{thrd} and \glspl{proc} belong to a specific cluster. \Glspl{thrd} are only scheduled onto \glspl{proc} in the same cluster and scheduling is done independently of other clusters. Figure~\ref{fig:system} shows an overview of the \CFA runtime, which allows programmers to tightly control parallelism. It also opens the door to handling effects like NUMA, by pining clusters to a specific NUMA node\footnote{This is not currently implemented in \CFA, but the only hurdle left is creating a generic interface for cpu masks.}.
    1414
    1515\begin{figure}
     
    2525
    2626\section{\glsxtrshort{io}}\label{prev:io}
    27 Prior to this work, the \CFA runtime did not add any particular support for \glsxtrshort{io} operations. %\CFA being built on C, this means that,
    28 While all I/O operations available in C are available in \CFA, \glsxtrshort{io} operations are designed for the POSIX threading model~\cite{pthreads}. Using these 1:1 threading operations in an M:N threading model means I/O operations block \glspl{proc} instead of \glspl{thrd}. While this can work in certain cases, it limits the number of concurrent operations to the number of \glspl{proc} rather than \glspl{thrd}. It also means deadlock can occur because all \glspl{proc} are blocked even if at least one \gls{thrd} is ready to run. A simple example of this type of deadlock would be as follows:
     27Prior to this work, the \CFA runtime did not add any particular support for \glsxtrshort{io} operations. While all \glsxtrshort{io} operations available in C are available in \CFA, \glsxtrshort{io} operations are designed for the POSIX threading model~\cite{pthreads}. Using these 1:1 threading operations in an M:N threading model means \glsxtrshort{io} operations block \glspl{proc} instead of \glspl{thrd}. While this can work in certain cases, it limits the number of concurrent operations to the number of \glspl{proc} rather than \glspl{thrd}. It also means deadlock can occur because all \glspl{proc} are blocked even if at least one \gls{thrd} is ready to run. A simple example of this type of deadlock would be as follows:
     28
    2929\begin{quote}
    3030Given a simple network program with 2 \glspl{thrd} and a single \gls{proc}, one \gls{thrd} sends network requests to a server and the other \gls{thrd} waits for a response from the server. If the second \gls{thrd} races ahead, it may wait for responses to requests that have not been sent yet. In theory, this should not be a problem, even if the second \gls{thrd} waits, because the first \gls{thrd} is still ready to run and should be able to get CPU time to send the request. With M:N threading, while the first \gls{thrd} is ready, the lone \gls{proc} \emph{cannot} run the first \gls{thrd} if it is blocked in the \glsxtrshort{io} operation of the second \gls{thrd}. If this happen, the system is in a synchronization deadlock\footnote{In this example, the deadlocked could be resolved if the server sends unprompted messages to the client. However, this solution is not general and may not be appropriate even in this simple case.}.
    3131\end{quote}
    32 Therefore, one of the objective of this work is to introduce \emph{User-Level \glsxtrshort{io}}, like \glslink{uthrding}{User-Level \emph{Threading}} blocks \glspl{thrd} rather than \glspl{proc} when doing \glsxtrshort{io} operations, which entails multiplexing the \glsxtrshort{io} operations of many \glspl{thrd} onto fewer \glspl{proc}. This multiplexing requires that a single \gls{proc} be able to execute multiple I/O operations in parallel. This requirement cannot be done with operations that block \glspl{proc}, \ie \glspl{kthrd}, since the first operation would prevent starting new operations for its blocking duration. Executing I/O operations in parallel requires \emph{asynchronous} \glsxtrshort{io}, sometimes referred to as \emph{non-blocking}, since the \gls{kthrd} does not block.
    3332
    34 \section{Interoperating with C}
     33Therefore, one of the objective of this work is to introduce \emph{User-Level \glsxtrshort{io}}, like \glslink{uthrding}{User-Level \emph{Threading}} blocks \glspl{thrd} rather than \glspl{proc} when doing \glsxtrshort{io} operations, which entails multiplexing the \glsxtrshort{io} operations of many \glspl{thrd} onto fewer \glspl{proc}. This multiplexing requires that a single \gls{proc} be able to execute multiple \glsxtrshort{io} operations in parallel. This requirement cannot be done with operations that block \glspl{proc}, \ie \glspl{kthrd}, since the first operation would prevent starting new operations for its blocking duration. Executing \glsxtrshort{io} operations in parallel requires \emph{asynchronous} \glsxtrshort{io}, sometimes referred to as \emph{non-blocking}, since the \gls{kthrd} does not block.
     34
     35\section{Interoperating with \texttt{C}}
    3536While \glsxtrshort{io} operations are the classical example of operations that block \glspl{kthrd}, the non-blocking challenge extends to all blocking system-calls. The POSIX standard states~\cite[\S~2.9.1]{POSIX17}:
    3637\begin{quote}
     
    4445\begin{enumerate}
    4546        \item Precisely identifying blocking C calls is difficult.
    46         \item Introducing new code can have a significant impact on general performance.
     47        \item Introducing control points code can have a significant impact on general performance.
    4748\end{enumerate}
    48 Because of these consequences, this work does not attempt to ``sandbox'' calls to C. Therefore, it is possible for an unidentified library calls to block a \gls{kthrd} leading to deadlocks in \CFA's M:N threading model, which would not occur in a traditional 1:1 threading model. Currently, all M:N thread systems interacting with UNIX without sandboxing suffer from this problem but manage to work very well in the majority of applications. Therefore, a complete solution to this problem is outside the scope of this thesis.
     49Because of these consequences, this work does not attempt to ``sandbox'' calls to C. Therefore, it is possible calls from an unidentified library will block a \gls{kthrd} leading to deadlocks in \CFA's M:N threading model, which would not occur in a traditional 1:1 threading model. Currently, all M:N thread systems interacting with UNIX without sandboxing suffer from this problem but manage to work very well in the majority of applications. Therefore, a complete solution to this problem is outside the scope of this thesis.
  • doc/theses/thierry_delisle_PhD/thesis/thesis.tex

    rda3963a r565acf59  
    8181%\usepackage{nomencl} % For a nomenclature (optional; available from ctan.org)
    8282\usepackage{amsmath,amssymb,amstext} % Lots of math symbols and environments
     83\usepackage{xcolor}
    8384\usepackage{graphicx} % For including graphics
    8485
     
    120121% although it's supposed to be in both the TeX Live and MikTeX distributions. There are also documentation and
    121122% installation instructions there.
    122 \renewcommand*{\glstextformat}[1]{\textsf{#1}}
     123\makeatletter
     124\newcommand*{\glsplainhyperlink}[2]{%
     125  \colorlet{currenttext}{.}% store current text color
     126  \colorlet{currentlink}{\@linkcolor}% store current link color
     127  \hypersetup{linkcolor=currenttext}% set link color
     128  \hyperlink{#1}{#2}%
     129  \hypersetup{linkcolor=currentlink}% reset to default
     130}
     131\let\@glslink\glsplainhyperlink
     132\makeatother
    123133
    124134\usepackage{csquotes}
     
    200210\makeindex
    201211
     212\newcommand\io{\glsxtrshort{io}}%
     213
    202214%======================================================================
    203215%   L O G I C A L    D O C U M E N T -- the content of your thesis
     
    232244\part{Design}
    233245\input{text/core.tex}
     246\input{text/io.tex}
    234247\input{text/practice.tex}
    235 \input{text/io.tex}
    236248\part{Evaluation}
    237249\label{Evaluation}
  • doc/user/figures/Cdecl.fig

    rda3963a r565acf59  
    19192 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
    2020         2850 1200 3600 1200 3600 1350 2850 1350 2850 1200
    21 4 1 0 50 -1 4 10 0.0000 2 120 90 2925 1325 0\001
    22 4 1 0 50 -1 4 10 0.0000 2 120 90 3075 1325 1\001
    23 4 1 0 50 -1 4 10 0.0000 2 120 90 3225 1325 2\001
    24 4 1 0 50 -1 4 10 0.0000 2 120 90 3375 1325 3\001
    25 4 1 0 50 -1 4 10 0.0000 2 120 90 3525 1325 4\001
     214 1 0 50 -1 4 11 0.0000 2 120 90 2925 1325 0\001
     224 1 0 50 -1 4 11 0.0000 2 120 90 3075 1325 1\001
     234 1 0 50 -1 4 11 0.0000 2 120 90 3225 1325 2\001
     244 1 0 50 -1 4 11 0.0000 2 120 90 3375 1325 3\001
     254 1 0 50 -1 4 11 0.0000 2 120 90 3525 1325 4\001
    2626-6
    27272 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
     
    5555        1 1 1.00 45.00 60.00
    5656         2550 1275 2850 1275
    57 4 1 0 50 -1 4 10 0.0000 2 120 90 1350 1650 0\001
    58 4 1 0 50 -1 4 10 0.0000 2 120 90 1500 1650 1\001
    59 4 1 0 50 -1 4 10 0.0000 2 120 90 1650 1650 2\001
    60 4 1 0 50 -1 4 10 0.0000 2 120 90 1800 1650 3\001
    61 4 1 0 50 -1 4 10 0.0000 2 120 90 1950 1650 4\001
    62 4 1 0 50 -1 4 10 0.0000 2 90 90 1200 1325 x\001
    63 4 1 0 50 -1 4 10 0.0000 2 90 90 2400 1325 x\001
     574 1 0 50 -1 4 11 0.0000 2 120 90 1350 1650 0\001
     584 1 0 50 -1 4 11 0.0000 2 120 90 1500 1650 1\001
     594 1 0 50 -1 4 11 0.0000 2 120 90 1650 1650 2\001
     604 1 0 50 -1 4 11 0.0000 2 120 90 1800 1650 3\001
     614 1 0 50 -1 4 11 0.0000 2 120 90 1950 1650 4\001
     624 1 0 50 -1 4 11 0.0000 2 90 90 1200 1325 x\001
     634 1 0 50 -1 4 11 0.0000 2 90 90 2400 1325 x\001
  • doc/user/user.tex

    rda3963a r565acf59  
    1111%% Created On       : Wed Apr  6 14:53:29 2016
    1212%% Last Modified By : Peter A. Buhr
    13 %% Last Modified On : Mon Oct  5 08:57:29 2020
    14 %% Update Count     : 3998
     13%% Last Modified On : Mon Feb  8 21:53:31 2021
     14%% Update Count     : 4327
    1515%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    1616
     
    3737\usepackage{mathptmx}                                   % better math font with "times"
    3838\usepackage[usenames]{color}
     39\usepackage[dvips,plainpages=false,pdfpagelabels,pdfpagemode=UseNone,colorlinks=true,pagebackref=true,linkcolor=blue,citecolor=blue,urlcolor=blue,pagebackref=true,breaklinks=true]{hyperref}
     40\usepackage{breakurl}
     41
     42\renewcommand\footnoterule{\kern -3pt\rule{0.3\linewidth}{0.15pt}\kern 2pt}
     43
     44\usepackage[pagewise]{lineno}
     45\renewcommand{\linenumberfont}{\scriptsize\sffamily}
     46\usepackage[firstpage]{draftwatermark}
     47\SetWatermarkLightness{0.9}
     48
     49% Default underscore is too low and wide. Cannot use lstlisting "literate" as replacing underscore
     50% removes it as a variable-name character so keywords in variables are highlighted. MUST APPEAR
     51% AFTER HYPERREF.
     52\renewcommand{\textunderscore}{\leavevmode\makebox[1.2ex][c]{\rule{1ex}{0.075ex}}}
     53
     54\setlength{\topmargin}{-0.45in}                                                 % move running title into header
     55\setlength{\headsep}{0.25in}
     56
     57%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
     58
    3959\newcommand{\CFALatin}{}
    4060% inline code ©...© (copyright symbol) emacs: C-q M-)
     
    4666% math escape $...$ (dollar symbol)
    4767\input{common}                                          % common CFA document macros
    48 \usepackage[dvips,plainpages=false,pdfpagelabels,pdfpagemode=UseNone,colorlinks=true,pagebackref=true,linkcolor=blue,citecolor=blue,urlcolor=blue,pagebackref=true,breaklinks=true]{hyperref}
    49 \usepackage{breakurl}
    50 
    51 \renewcommand\footnoterule{\kern -3pt\rule{0.3\linewidth}{0.15pt}\kern 2pt}
    52 
    53 \usepackage[pagewise]{lineno}
    54 \renewcommand{\linenumberfont}{\scriptsize\sffamily}
    55 \usepackage[firstpage]{draftwatermark}
    56 \SetWatermarkLightness{0.9}
    57 
    58 % Default underscore is too low and wide. Cannot use lstlisting "literate" as replacing underscore
    59 % removes it as a variable-name character so keywords in variables are highlighted. MUST APPEAR
    60 % AFTER HYPERREF.
    61 \renewcommand{\textunderscore}{\leavevmode\makebox[1.2ex][c]{\rule{1ex}{0.075ex}}}
    62 
    63 \setlength{\topmargin}{-0.45in}                                                 % move running title into header
    64 \setlength{\headsep}{0.25in}
    65 
    66 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    67 
    6868\CFAStyle                                                                                               % use default CFA format-style
     69\lstset{language=CFA}                                                                   % CFA default lnaguage
    6970\lstnewenvironment{C++}[1][]                            % use C++ style
    70 {\lstset{language=C++,moredelim=**[is][\protect\color{red}]{®}{®},#1}}
     71{\lstset{language=C++,moredelim=**[is][\protect\color{red}]{@}{@},#1}}
    7172{}
    7273
     
    8182\newcommand{\Emph}[2][red]{{\color{#1}\textbf{\emph{#2}}}}
    8283\newcommand{\R}[1]{\Textbf{#1}}
     84\newcommand{\RC}[1]{\Textbf{\LstBasicStyle{#1}}}
    8385\newcommand{\B}[1]{{\Textbf[blue]{#1}}}
    8486\newcommand{\G}[1]{{\Textbf[OliveGreen]{#1}}}
     
    104106\author{
    105107\huge \CFA Team \medskip \\
    106 \Large Andrew Beach, Richard Bilson, Peter A. Buhr, Thierry Delisle, \smallskip \\
    107 \Large Glen Ditchfield, Rodolfo G. Esteves, Aaron Moss, Rob Schluntz
     108\Large Andrew Beach, Richard Bilson, Michael Brooks, Peter A. Buhr, Thierry Delisle, \smallskip \\
     109\Large Glen Ditchfield, Rodolfo G. Esteves, Aaron Moss, Colby Parsons, Rob Schluntz, \smallskip \\
     110\Large Fangren Yu, Mubeen Zulfiqar
    108111}% author
    109112
     
    144147\section{Introduction}
    145148
    146 \CFA{}\index{cforall@\CFA}\footnote{Pronounced ``\Index*{C-for-all}'', and written \CFA, CFA, or \CFL.} is a modern general-purpose programming-language, designed as an evolutionary step forward for the C programming language.
     149\CFA{}\index{cforall@\CFA}\footnote{Pronounced ``\Index*{C-for-all}'', and written \CFA, CFA, or \CFL.} is a modern general-purpose concurrent programming-language, designed as an evolutionary step forward for the C programming language.
    147150The syntax of \CFA builds from C and should look immediately familiar to C/\Index*[C++]{\CC{}} programmers.
    148151% Any language feature that is not described here can be assumed to be using the standard \Celeven syntax.
    149 \CFA adds many modern programming-language features that directly lead to increased \emph{\Index{safety}} and \emph{\Index{productivity}}, while maintaining interoperability with existing C programs and achieving similar performance.
     152\CFA adds many modern features that directly lead to increased \emph{\Index{safety}} and \emph{\Index{productivity}}, while maintaining interoperability with existing C programs and achieving similar performance.
    150153Like C, \CFA is a statically typed, procedural (non-\Index{object-oriented}) language with a low-overhead runtime, meaning there is no global \Index{garbage-collection}, but \Index{regional garbage-collection}\index{garbage-collection!regional} is possible.
    151154The primary new features include polymorphic routines and types, exceptions, concurrency, and modules.
     
    157160instead, a programmer evolves a legacy program into \CFA by incrementally incorporating \CFA features.
    158161As well, new programs can be written in \CFA using a combination of C and \CFA features.
     162In many ways, \CFA is to C as \Index{Scala}~\cite{Scala} is to Java, providing a vehicle for new typing and control-flow capabilities on top of a highly popular programming language allowing immediate dissemination.
    159163
    160164\Index*[C++]{\CC{}}~\cite{c++:v1} had a similar goal 30 years ago, allowing object-oriented programming to be incrementally added to C.
     
    165169For example, the following programs compare the C, \CFA, and \CC I/O mechanisms, where the programs output the same result.
    166170\begin{center}
    167 \begin{tabular}{@{}l@{\hspace{1.5em}}l@{\hspace{1.5em}}l@{}}
    168 \multicolumn{1}{c@{\hspace{1.5em}}}{\textbf{C}} & \multicolumn{1}{c}{\textbf{\CFA}}     & \multicolumn{1}{c}{\textbf{\CC}}      \\
    169 \begin{cfa}
    170 #include <stdio.h>§\indexc{stdio.h}§
     171\begin{tabular}{@{}l@{\hspace{1em}}l@{\hspace{1em}}l@{}}
     172\multicolumn{1}{c@{\hspace{1em}}}{\textbf{C}}   & \multicolumn{1}{c}{\textbf{\CFA}}     & \multicolumn{1}{c}{\textbf{\CC}}      \\
     173\begin{cfa}
     174#include <stdio.h>$\indexc{stdio.h}$
    171175
    172176int main( void ) {
    173177        int x = 0, y = 1, z = 2;
    174         ®printf( "%d %d %d\n", x, y, z );®
     178        @printf( "%d %d %d\n", x, y, z );@
    175179}
    176180\end{cfa}
    177181&
    178182\begin{cfa}
    179 #include <fstream>§\indexc{fstream}§
     183#include <fstream>$\indexc{fstream}$
    180184
    181185int main( void ) {
    182186        int x = 0, y = 1, z = 2;
    183         ®sout | x | y | z;®§\indexc{sout}§
     187        @sout | x | y | z;@$\indexc{sout}$
    184188}
    185189\end{cfa}
    186190&
    187191\begin{cfa}
    188 #include <iostream>§\indexc{iostream}§
     192#include <iostream>$\indexc{iostream}$
    189193using namespace std;
    190194int main() {
    191195        int x = 0, y = 1, z = 2;
    192         ®cout<<x<<" "<<y<<" "<<z<<endl;®
     196        @cout<<x<<" "<<y<<" "<<z<<endl;@
    193197}
    194198\end{cfa}
    195199\end{tabular}
    196200\end{center}
    197 While the \CFA I/O looks similar to the \Index*[C++]{\CC{}} output style, there are important differences, such as automatic spacing between variables as in \Index*{Python} (see~\VRef{s:IOLibrary}).
     201While \CFA I/O \see{\VRef{s:StreamIOLibrary}} looks similar to \Index*[C++]{\CC{}}, there are important differences, such as automatic spacing between variables and an implicit newline at the end of the expression list, similar to \Index*{Python}~\cite{Python}.
    198202
    199203
     
    210214\section{Why fix C?}
    211215
    212 The C programming language is a foundational technology for modern computing with millions of lines of code implementing everything from hobby projects to commercial operating-systems.
     216The C programming language is a foundational technology for modern computing with billions of lines of code implementing everything from hobby projects to commercial operating-systems.
    213217This installation base and the programmers producing it represent a massive software-engineering investment spanning decades and likely to continue for decades more.
    214218Even with all its problems, C continues to be popular because it allows writing software at virtually any level in a computer system without restriction.
    215 For system programming, where direct access to hardware, storage management, and real-time issues are a requirement, C is usually the only language of choice.
    216 The TIOBE index~\cite{TIOBE} for February 2020 ranks the top six most \emph{popular} programming languages as \Index*{Java} 17.4\%, C 16.8\%, Python 9.3\%, \Index*[C++]{\CC{}} 6.2\%, \Csharp 5.9\%, Visual Basic 5.9\% = 61.5\%, where the next 50 languages are less than 2\% each, with a long tail.
     219For system programming, where direct access to hardware, storage management, and real-time issues are a requirement, C is the only language of choice.
     220The TIOBE index~\cite{TIOBE} for February 2021 ranks the top six most \emph{popular} programming languages as C 17.4\%, \Index*{Java} 12\%, Python 12\%, \Index*[C++]{\CC{}} 7.6\%, \Csharp 4\%, Visual Basic 3.8\% = 56.8\%, where the next 50 languages are less than 2\% each, with a long tail.
    217221The top 4 rankings over the past 35 years are:
    218222\begin{center}
    219223\setlength{\tabcolsep}{10pt}
    220224\begin{tabular}{@{}rcccccccc@{}}
    221                 & 2020  & 2015  & 2010  & 2005  & 2000  & 1995  & 1990  & 1985  \\ \hline
    222 Java    & 1             & 2             & 1             & 2             & 3             & -             & -             & -             \\
    223 \R{C}   & \R{2} & \R{1} & \R{2} & \R{1} & \R{1} & \R{2} & \R{1} & \R{1} \\
    224 Python  & 3             & 7             & 6             & 6             & 22    & 21    & -             & -             \\
    225 \CC             & 4             & 4             & 4             & 3             & 2             & 1             & 2             & 12    \\
     225                & 2021  & 2016  & 2011  & 2006  & 2001  & 1996  & 1991  & 1986  \\ \hline
     226\R{C}   & \R{1} & \R{2} & \R{2} & \R{1} & \R{1} & \R{1} & \R{1} & \R{1} \\
     227Java    & 2             & 1             & 1             & 2             & 3             & 28    & -             & -             \\
     228Python  & 3             & 5             & 6             & 7             & 23    & 13    & -             & -             \\
     229\CC             & 4             & 3             & 3             & 3             & 2             & 2             & 2             & 8             \\
    226230\end{tabular}
    227231\end{center}
     
    232236As stated, the goal of the \CFA project is to engineer modern language-features into C in an evolutionary rather than revolutionary way.
    233237\CC~\cite{C++14,C++} is an example of a similar project;
    234 however, it largely extended the C language, and did not address most of C's existing problems.\footnote{%
     238however, it largely extended the C language, and did not address many of C's existing problems.\footnote{%
    235239Two important existing problems addressed were changing the type of character literals from ©int© to ©char© and enumerator from ©int© to the type of its enumerators.}
    236240\Index*{Fortran}~\cite{Fortran08}, \Index*{Ada}~\cite{Ada12}, and \Index*{Cobol}~\cite{Cobol14} are examples of programming languages that took an evolutionary approach, where modern language-features (\eg objects, concurrency) are added and problems fixed within the framework of the existing language.
     
    241245
    242246The result of this project is a language that is largely backwards compatible with \Index*[C11]{\Celeven{}}~\cite{C11}, but fixes many of the well known C problems while adding modern language-features.
    243 To achieve these goals required a significant engineering exercise, where we had to ``think inside the existing C box''.
    244 Without these significant extension to C, it is unable to cope with the needs of modern programming problems and programmers;
    245 as a result, it will fade into disuse.
    246 Considering the large body of existing C code and programmers, there is significant impetus to ensure C is transformed into a modern programming language.
     247To achieve these goals required a significant engineering exercise, \ie ``thinking \emph{inside} the C box''.
     248Considering the large body of existing C code and programmers, there is significant impetus to ensure C is transformed into a modern language.
    247249While \Index*[C11]{\Celeven{}} made a few simple extensions to the language, nothing was added to address existing problems in the language or to augment the language with modern language-features.
    248250While some may argue that modern language-features may make C complex and inefficient, it is clear a language without modern capabilities is insufficient for the advanced programming problems existing today.
     
    251253\section{History}
    252254
    253 The \CFA project started with \Index*{Dave Till}\index{Till, Dave}'s \Index*{K-W C}~\cite{Buhr94a,Till89}, which extended C with new declaration syntax, multiple return values from routines, and advanced assignment capabilities using the notion of tuples.
    254 (See~\cite{Werther96} for similar work in \Index*[C++]{\CC{}}.)
     255The \CFA project started with \Index*{Dave Till}\index{Till, Dave}'s \Index*{K-W C}~\cite{Buhr94a,Till89}, which extended C with new declaration syntax, multiple return values from routines, and advanced assignment capabilities using the notion of tuples \see{\cite{Werther96} for similar work in \Index*[C++]{\CC{}}}.
    255256The first \CFA implementation of these extensions was by \Index*{Rodolfo Esteves}\index{Esteves, Rodolfo}~\cite{Esteves04}.
    256257
    257258The signature feature of \CFA is \emph{\Index{overload}able} \Index{parametric-polymorphic} functions~\cite{forceone:impl,Cormack90,Duggan96} with functions generalized using a ©forall© clause (giving the language its name):
    258259\begin{cfa}
    259 ®forall( otype T )® T identity( T val ) { return val; }
    260 int forty_two = identity( 42 ); §\C{// T is bound to int, forty\_two == 42}§
     260@forall( otype T )@ T identity( T val ) { return val; }
     261int forty_two = identity( 42 ); $\C{// T is bound to int, forty\_two == 42}$
    261262\end{cfa}
    262263% extending the C type system with parametric polymorphism and overloading, as opposed to the \Index*[C++]{\CC{}} approach of object-oriented extensions.
    263264\CFA{}\hspace{1pt}'s polymorphism was originally formalized by \Index*{Glen Ditchfield}\index{Ditchfield, Glen}~\cite{Ditchfield92}, and first implemented by \Index*{Richard Bilson}\index{Bilson, Richard}~\cite{Bilson03}.
    264265However, at that time, there was little interesting in extending C, so work did not continue.
    265 As the saying goes, ``\Index*{What goes around, comes around.}'', and there is now renewed interest in the C programming language because of legacy code-bases, so the \CFA project has been restarted.
     266As the saying goes, ``\Index*{What goes around, comes around.}'', and there is now renewed interest in the C programming language because of the legacy code-base, so the \CFA project was restarted in 2015.
    266267
    267268
     
    273274This feature allows \CFA programmers to take advantage of the existing panoply of C libraries to access thousands of external software features.
    274275Language developers often state that adequate \Index{library support} takes more work than designing and implementing the language itself.
    275 Fortunately, \CFA, like \Index*[C++]{\CC{}}, starts with immediate access to all exiting C libraries, and in many cases, can easily wrap library routines with simpler and safer interfaces, at very low cost.
     276Fortunately, \CFA, like \Index*[C++]{\CC{}}, starts with immediate access to all exiting C libraries, and in many cases, can easily wrap library routines with simpler and safer interfaces, at zero or very low cost.
    276277Hence, \CFA begins by leveraging the large repository of C libraries, and than allows programmers to incrementally augment their C programs with modern \Index{backward-compatible} features.
    277278
     
    286287
    287288double key = 5.0, vals[10] = { /* 10 sorted floating values */ };
    288 double * val = (double *)bsearch( &key, vals, 10, sizeof(vals[0]), comp ); §\C{// search sorted array}§
     289double * val = (double *)bsearch( &key, vals, 10, sizeof(vals[0]), comp ); $\C{// search sorted array}$
    289290\end{cfa}
    290291which can be augmented simply with a polymorphic, type-safe, \CFA-overloaded wrappers:
     
    295296
    296297forall( otype T | { int ?<?( T, T ); } ) unsigned int bsearch( T key, const T * arr, size_t size ) {
    297         T * result = bsearch( key, arr, size ); §\C{// call first version}§
    298         return result ? result - arr : size; } §\C{// pointer subtraction includes sizeof(T)}§
    299 
    300 double * val = bsearch( 5.0, vals, 10 ); §\C{// selection based on return type}§
     298        T * result = bsearch( key, arr, size ); $\C{// call first version}$
     299        return result ? result - arr : size; } $\C{// pointer subtraction includes sizeof(T)}$
     300
     301double * val = bsearch( 5.0, vals, 10 ); $\C{// selection based on return type}$
    301302int posn = bsearch( 5.0, vals, 10 );
    302303\end{cfa}
     
    310311\begin{cfa}
    311312forall( dtype T | sized(T) ) T * malloc( void ) { return (T *)malloc( sizeof(T) ); }
    312 int * ip = malloc(); §\C{// select type and size from left-hand side}§
     313int * ip = malloc(); $\C{// select type and size from left-hand side}$
    313314double * dp = malloc();
    314315struct S {...} * sp = malloc();
     
    319320However, it is necessary to differentiate between C and \CFA code because of name \Index{overload}ing, as for \CC.
    320321For example, the C math-library provides the following routines for computing the absolute value of the basic types: ©abs©, ©labs©, ©llabs©, ©fabs©, ©fabsf©, ©fabsl©, ©cabsf©, ©cabs©, and ©cabsl©.
    321 Whereas, \CFA wraps each of these routines into ones with the overloaded name ©abs©:
    322 \begin{cfa}
    323 char ®abs®( char );
    324 extern "C" { int ®abs®( int ); } §\C{// use default C routine for int}§
    325 long int ®abs®( long int );
    326 long long int ®abs®( long long int );
    327 float ®abs®( float );
    328 double ®abs®( double );
    329 long double ®abs®( long double );
    330 float _Complex ®abs®( float _Complex );
    331 double _Complex ®abs®( double _Complex );
    332 long double _Complex ®abs®( long double _Complex );
    333 \end{cfa}
    334 The problem is the name clash between the library routine ©abs© and the \CFA names ©abs©.
    335 Hence, names appearing in an ©extern "C"© block have \newterm*{C linkage}.
    336 Then overloading polymorphism uses a mechanism called \newterm{name mangling}\index{mangling!name} to create unique names that are different from C names, which are not mangled.
    337 Hence, there is the same need, as in \CC, to know if a name is a C or \CFA name, so it can be correctly formed.
    338 There is no way around this problem, other than C's approach of creating unique names for each pairing of operation and types.
    339 
    340 This example strongly illustrates a core idea in \CFA: \emph{the \Index{power of a name}}.
     322Whereas, \CFA wraps each of these routines into one overloaded name ©abs©:
     323\begin{cfa}
     324char @abs@( char );
     325extern "C" { int @abs@( int ); } $\C{// use default C routine for int}$
     326long int @abs@( long int );
     327long long int @abs@( long long int );
     328float @abs@( float );
     329double @abs@( double );
     330long double @abs@( long double );
     331float _Complex @abs@( float _Complex );
     332double _Complex @abs@( double _Complex );
     333long double _Complex @abs@( long double _Complex );
     334\end{cfa}
     335The problem is \Index{name clash} between the C name ©abs© and the \CFA names ©abs©, resulting in two name linkages\index{C linkage}: ©extern "C"© and ©extern "Cforall"© (default).
     336Overloaded names must use \newterm{name mangling}\index{mangling!name} to create unique names that are different from unmangled C names.
     337Hence, there is the same need as in \CC to know if a name is a C or \CFA name, so it can be correctly formed.
     338The only way around this problem is C's approach of creating unique names for each pairing of operation and type.
     339
     340This example illustrates a core idea in \CFA: \emph{the \Index{power of a name}}.
    341341The name ``©abs©'' evokes the notion of absolute value, and many mathematical types provide the notion of absolute value.
    342342Hence, knowing the name ©abs© is sufficient to apply it to any type where it is applicable.
     
    344344
    345345
    346 \section[Compiling a CFA Program]{Compiling a \CFA Program}
     346\section{\CFA Compilation}
    347347
    348348The command ©cfa© is used to compile a \CFA program and is based on the \Index{GNU} \Indexc{gcc} command, \eg:
    349349\begin{cfa}
    350 cfa§\indexc{cfa}\index{compilation!cfa@©cfa©}§ [ gcc-options ] [ C/§\CFA{}§ source-files ] [ assembler/loader files ]
    351 \end{cfa}
    352 \CFA programs having the following ©gcc© flags turned on:
    353 \begin{description}
     350cfa$\indexc{cfa}\index{compilation!cfa@©cfa©}$ [ gcc/$\CFA{}$-options ] [ C/$\CFA{}$ source-files ] [ assembler/loader files ]
     351\end{cfa}
     352There is no ordering among options (flags) and files, unless an option has an argument, which must appear immediately after the option possibly with or without a space separating option and argument.
     353
     354\CFA has the following ©gcc© flags turned on:
     355\begin{description}[topsep=0pt]
    354356\item
    355357\Indexc{-std=gnu11}\index{compilation option!-std=gnu11@{©-std=gnu11©}}
     
    359361Use the traditional GNU semantics for inline routines in C11 mode, which allows inline routines in header files.
    360362\end{description}
    361 The following new \CFA options are available:
    362 \begin{description}
     363
     364\CFA has the following new options:
     365\begin{description}[topsep=0pt]
    363366\item
    364367\Indexc{-CFA}\index{compilation option!-CFA@©-CFA©}
    365 Only the C preprocessor and the \CFA translator steps are performed and the transformed program is written to standard output, which makes it possible to examine the code generated by the \CFA translator.
     368Only the C preprocessor (flag ©-E©) and the \CFA translator steps are performed and the transformed program is written to standard output, which makes it possible to examine the code generated by the \CFA translator.
    366369The generated code starts with the standard \CFA \Index{prelude}.
     370
     371\item
     372\Indexc{-XCFA}\index{compilation option!-XCFA@©-XCFA©}
     373Pass next flag as-is to the ©cfa-cpp© translator (see details below).
    367374
    368375\item
    369376\Indexc{-debug}\index{compilation option!-debug@©-debug©}
    370377The program is linked with the debugging version of the runtime system.
    371 The debug version performs runtime checks to help during the debugging phase of a \CFA program, but can substantially slow program execution.
     378The debug version performs runtime checks to aid the debugging phase of a \CFA program, but can substantially slow program execution.
    372379The runtime checks should only be removed after the program is completely debugged.
    373380\textbf{This option is the default.}
     
    399406\item
    400407\Indexc{-no-include-stdhdr}\index{compilation option!-no-include-stdhdr@©-no-include-stdhdr©}
    401 Do not supply ©extern "C"© wrappers for \Celeven standard include files (see~\VRef{s:StandardHeaders}).
     408Do not supply ©extern "C"© wrappers for \Celeven standard include files \see{\VRef{s:StandardHeaders}}.
    402409\textbf{This option is \emph{not} the default.}
    403410\end{comment}
     
    430437\begin{cfa}
    431438#ifndef __CFORALL__
    432 #include <stdio.h>§\indexc{stdio.h}§ §\C{// C header file}§
     439#include <stdio.h>$\indexc{stdio.h}$ $\C{// C header file}$
    433440#else
    434 #include <fstream>§\indexc{fstream}§ §\C{// \CFA header file}§
     441#include <fstream>$\indexc{fstream}$ $\C{// \CFA header file}$
    435442#endif
    436443\end{cfa}
     
    438445
    439446The \CFA translator has multiple steps.
    440 The following flags control how the tranlator works, the stages run, and printing within a stage.
     447The following flags control how the translator works, the stages run, and printing within a stage.
    441448The majority of these flags are used by \CFA developers, but some are occasionally useful to programmers.
     449Each option must be escaped with \Indexc{-XCFA}\index{translator option!-XCFA@{©-XCFA©}} to direct it to the compiler step, similar to the ©-Xlinker© flag for the linker, \eg:
     450\begin{lstlisting}[language=sh]
     451cfa $test$.cfa -CFA -XCFA -p # print translated code without printing the standard prelude
     452cfa $test$.cfa -XCFA -P -XCFA parse -XCFA -n # show program parse without prelude
     453\end{lstlisting}
    442454\begin{description}[topsep=5pt,itemsep=0pt,parsep=0pt]
    443455\item
    444 \Indexc{-h}\index{translator option!-h@{©-h©}}, \Indexc{--help}\index{translator option!--help@{©--help©}} \, print help message
    445 \item
    446 \Indexc{-l}\index{translator option!-l@{©-l©}}, \Indexc{--libcfa}\index{translator option!--libcfa@{©--libcfa©}} \, generate libcfa.c
     456\Indexc{-c}\index{translator option!-c@{©-c©}}, \Indexc{--colors}\index{translator option!--colors@{©--colors©}} \, diagnostic color: ©never©, ©always©, \lstinline[deletekeywords=auto]{auto}
     457\item
     458\Indexc{-g}\index{translator option!-g@{©-g©}}, \Indexc{--gdb}\index{translator option!--gdb@{©--gdb©}} \, wait for gdb to attach
     459\item
     460\Indexc{-h}\index{translator option!-h@{©-h©}}, \Indexc{--help}\index{translator option!--help@{©--help©}} \, print translator help message
     461\item
     462\Indexc{-l}\index{translator option!-l@{©-l©}}, \Indexc{--libcfa}\index{translator option!--libcfa@{©--libcfa©}} \, generate ©libcfa.c©
    447463\item
    448464\Indexc{-L}\index{translator option!-L@{©-L©}}, \Indexc{--linemarks}\index{translator option!--linemarks@{©--linemarks©}} \, generate line marks
     
    454470\Indexc{-n}\index{translator option!-n@{©-n©}}, \Indexc{--no-prelude}\index{translator option!--no-prelude@{©--no-prelude©}} \, do not read prelude
    455471\item
    456 \Indexc{-p}\index{translator option!-p@{©-p©}}, \Indexc{--prototypes}\index{translator option!--prototypes@{©--prototypes©}} \, generate prototypes for prelude functions
     472\Indexc{-p}\index{translator option!-p@{©-p©}}, \Indexc{--prototypes}\index{translator option!--prototypes@{©--prototypes©}} \, do not generate prelude prototypes $\Rightarrow$ prelude not printed
     473\item
     474\Indexc{-d}\index{translator option!-d@{©-d©}}, \Indexc{--deterministic-out}\index{translator option!--deterministic-out@{©--deterministic-out©}} \, only print deterministic output
    457475\item
    458476\Indexc{-P}\index{translator option!-P@{©-P©}}, \Indexc{--print}\index{translator option!--print@{©--print©}} \, one of:
    459477\begin{description}[topsep=0pt,itemsep=0pt,parsep=0pt]
    460478\item
     479\Indexc{ascodegen}\index{translator option!-P@{©-P©}!©ascodegen©}\index{translator option!--print@{©-print©}!©ascodegen©} \, as codegen rather than AST
     480\item
     481\Indexc{asterr}\index{translator option!-P@{©-P©}!©asterr©}\index{translator option!--print@{©-print©}!©asterr©} \, AST on error
     482\item
     483\Indexc{declstats}\index{translator option!-P@{©-P©}!©declstats©}\index{translator option!--print@{©-print©}!©declstats©} \, code property statistics
     484\item
     485\Indexc{parse}\index{translator option!-P@{©-P©}!©parse©}\index{translator option!--print@{©-print©}!©parse©} \, yacc (parsing) debug information
     486\item
     487\Indexc{pretty}\index{translator option!-P@{©-P©}!©pretty©}\index{translator option!--print@{©-print©}!©pretty©} \, prettyprint for ©ascodegen© flag
     488\item
     489\Indexc{rproto}\index{translator option!-P@{©-P©}!©rproto©}\index{translator option!--print@{©-print©}!©rproto©} \, resolver-proto instance
     490\item
     491\Indexc{rsteps}\index{translator option!-P@{©-P©}!©rsteps©}\index{translator option!--print@{©-print©}!©rsteps©} \, resolver steps
     492\item
     493\Indexc{tree}\index{translator option!-P@{©-P©}!©tree©}\index{translator option!--print@{©-print©}!©tree©} \, parse tree
     494\item
     495\Indexc{ast}\index{translator option!-P@{©-P©}!©ast©}\index{translator option!--print@{©-print©}!©ast©} \, AST after parsing
     496\item
     497\Indexc{symevt}\index{translator option!-P@{©-P©}!©symevt©}\index{translator option!--print@{©-print©}!©symevt©} \, symbol table events
     498\item
    461499\Indexc{altexpr}\index{translator option!-P@{©-P©}!©altexpr©}\index{translator option!--print@{©-print©}!©altexpr©} \, alternatives for expressions
    462500\item
    463 \Indexc{ascodegen}\index{translator option!-P@{©-P©}!©ascodegen©}\index{translator option!--print@{©-print©}!©ascodegen©} \, as codegen rather than AST
    464 \item
    465 \Indexc{ast}\index{translator option!-P@{©-P©}!©ast©}\index{translator option!--print@{©-print©}!©ast©} \, AST after parsing
    466 \item
    467501\Indexc{astdecl}\index{translator option!-P@{©-P©}!©astdecl©}\index{translator option!--print@{©-print©}!©astdecl©} \, AST after declaration validation pass
    468502\item
    469 \Indexc{asterr}\index{translator option!-P@{©-P©}!©asterr©}\index{translator option!--print@{©-print©}!©asterr©} \, AST on error
     503\Indexc{resolver}\index{translator option!-P@{©-P©}!©resolver©}\index{translator option!--print@{©-print©}!©resolver©} \, before resolver step
    470504\item
    471505\Indexc{astexpr}\index{translator option!-P@{©-P©}!©astexpr©}\index{translator option!--print@{©-print©}!©altexpr©} \, AST after expression analysis
    472506\item
     507\Indexc{ctordtor}\index{translator option!-P@{©-P©}!©ctordtor©}\index{translator option!--print@{©-print©}!©ctordtor©} \, after ctor/dtor are replaced
     508\item
     509\Indexc{tuple}\index{translator option!-P@{©-P©}!©tuple©}\index{translator option!--print@{©-print©}!©tuple©} \, after tuple expansion
     510\item
    473511\Indexc{astgen}\index{translator option!-P@{©-P©}!©astgen©}\index{translator option!--print@{©-print©}!©astgen©} \, AST after instantiate generics
    474512\item
    475513\Indexc{box}\index{translator option!-P@{©-P©}!©box©}\index{translator option!--print@{©-print©}!©box©} \, before box step
    476514\item
    477 \Indexc{ctordtor}\index{translator option!-P@{©-P©}!©ctordtor©}\index{translator option!--print@{©-print©}!©ctordtor©} \, after ctor/dtor are replaced
    478 \item
    479515\Indexc{codegen}\index{translator option!-P@{©-P©}!©codegen©}\index{translator option!--print@{©-print©}!©codegen©} \, before code generation
    480 \item
    481 \Indexc{declstats}\index{translator option!-P@{©-P©}!©declstats©}\index{translator option!--print@{©-print©}!©declstats©} \, code property statistics
    482 \item
    483 \Indexc{parse}\index{translator option!-P@{©-P©}!©parse©}\index{translator option!--print@{©-print©}!©parse©} \, yacc (parsing) debug information
    484 \item
    485 \Indexc{pretty}\index{translator option!-P@{©-P©}!©pretty©}\index{translator option!--print@{©-print©}!©pretty©} \, prettyprint for ascodegen flag
    486 \item
    487 \Indexc{resolver}\index{translator option!-P@{©-P©}!©resolver©}\index{translator option!--print@{©-print©}!©resolver©} \, before resolver step
    488 \item
    489 \Indexc{rproto}\index{translator option!-P@{©-P©}!©rproto©}\index{translator option!--print@{©-print©}!©rproto©} \, resolver-proto instance
    490 \item
    491 \Indexc{rsteps}\index{translator option!-P@{©-P©}!©rsteps©}\index{translator option!--print@{©-print©}!©rsteps©} \, resolver steps
    492 \item
    493 \Indexc{symevt}\index{translator option!-P@{©-P©}!©symevt©}\index{translator option!--print@{©-print©}!©symevt©} \, symbol table events
    494 \item
    495 \Indexc{tree}\index{translator option!-P@{©-P©}!©tree©}\index{translator option!--print@{©-print©}!©tree©} \, parse tree
    496 \item
    497 \Indexc{tuple}\index{translator option!-P@{©-P©}!©tuple©}\index{translator option!--print@{©-print©}!©tuple©} \, after tuple expansion
    498516\end{description}
    499517\item
    500518\Indexc{--prelude-dir} <directory> \, prelude directory for debug/nodebug
    501519\item
    502 \Indexc{-S}\index{translator option!-S@{©-S©}!©counters,heap,time,all,none©}, \Indexc{--statistics}\index{translator option!--statistics@{©--statistics©}!©counters,heap,time,all,none©} <option-list> \, enable profiling information:
    503 \begin{description}[topsep=0pt,itemsep=0pt,parsep=0pt]
    504 \item
    505 \Indexc{counters,heap,time,all,none}
    506 \end{description}
     520\Indexc{-S}\index{translator option!-S@{©-S©}!©counters,heap,time,all,none©}, \Indexc{--statistics}\index{translator option!--statistics@{©--statistics©}!©counters,heap,time,all,none©} <option-list> \, enable profiling information: ©counters©, ©heap©, ©time©, ©all©, ©none©
    507521\item
    508522\Indexc{-t}\index{translator option!-t@{©-t©}}, \Indexc{--tree}\index{translator option!--tree@{©--tree©}} build in tree
     
    513527\label{s:BackquoteIdentifiers}
    514528
    515 \CFA introduces several new keywords (see \VRef{s:CFAKeywords}) that can clash with existing C variable-names in legacy code.
     529\CFA introduces several new keywords \see{\VRef{s:CFAKeywords}} that can clash with existing C variable-names in legacy code.
    516530Keyword clashes are accommodated by syntactic transformations using the \CFA backquote escape-mechanism:
    517531\begin{cfa}
    518 int ®``®otype = 3; §\C{// make keyword an identifier}§
    519 double ®``®forall = 3.5;
     532int @``@otype = 3; $\C{// make keyword an identifier}$
     533double @``@forall = 3.5;
    520534\end{cfa}
    521535
    522536Existing C programs with keyword clashes can be converted by enclosing keyword identifiers in backquotes, and eventually the identifier name can be changed to a non-keyword name.
    523 \VRef[Figure]{f:HeaderFileInterposition} shows how clashes in existing C header-files (see~\VRef{s:StandardHeaders}) can be handled using preprocessor \newterm{interposition}: ©#include_next© and ©-I filename©.
     537\VRef[Figure]{f:HeaderFileInterposition} shows how clashes in existing C header-files \see{\VRef{s:StandardHeaders}} can be handled using preprocessor \newterm{interposition}: ©#include_next© and ©-I filename©.
    524538Several common C header-files with keyword clashes are fixed in the standard \CFA header-library, so there is a seamless programming-experience.
    525539
     
    527541\begin{cfa}
    528542// include file uses the CFA keyword "with".
    529 #if ! defined( with ) §\C{// nesting ?}§
    530 #define with ®``®with §\C{// make keyword an identifier}§
     543#if ! defined( with )                                                   $\C{// nesting ?}$
     544#define with @``@with                                                   $\C{// make keyword an identifier}$
    531545#define __CFA_BFD_H__
    532546#endif
    533 §{\color{red}\#\textbf{include\_next} <bfdlink.h>}§ §\C{// must have internal check for multiple expansion}§
    534 #if defined( with ) && defined( __CFA_BFD_H__ ) §\C{// reset only if set}§
     547$\R{\#include\_next} <bfdlink.h>$                               $\C{// must have internal check for multiple expansion}$
     548#if defined( with ) && defined( __CFA_BFD_H__ ) $\C{// reset only if set}$
    535549#undef with
    536550#undef __CFA_BFD_H__
     
    544558\section{Constant Underscores}
    545559
    546 Numeric constants are extended to allow \Index{underscore}s\index{constant!underscore}, \eg:
    547 \begin{cfa}
    548 2®_®147®_®483®_®648; §\C{// decimal constant}§
    549 56®_®ul; §\C{// decimal unsigned long constant}§
    550 0®_®377; §\C{// octal constant}§
    551 0x®_®ff®_®ff; §\C{// hexadecimal constant}§
    552 0x®_®ef3d®_®aa5c; §\C{// hexadecimal constant}§
    553 3.141®_®592®_®654; §\C{// floating constant}§
    554 10®_®e®_®+1®_®00; §\C{// floating constant}§
    555 0x®_®ff®_®ff®_®p®_®3; §\C{// hexadecimal floating}§
    556 0x®_®1.ffff®_®ffff®_®p®_®128®_®l; §\C{// hexadecimal floating long constant}§
    557 L®_®§"\texttt{\textbackslash{x}}§®_®§\texttt{ff}§®_®§\texttt{ee}"§; §\C{// wide character constant}§
     560Numeric constants are extended to allow \Index{underscore}s\index{constant!underscore} as a separator, \eg:
     561\begin{cfa}
     5622@_@147@_@483@_@648; $\C{// decimal constant}$
     56356@_@ul; $\C{// decimal unsigned long constant}$
     5640@_@377; $\C{// octal constant}$
     5650x@_@ff@_@ff; $\C{// hexadecimal constant}$
     5660x@_@ef3d@_@aa5c; $\C{// hexadecimal constant}$
     5673.141@_@592@_@654; $\C{// floating constant}$
     56810@_@e@_@+1@_@00; $\C{// floating constant}$
     5690x@_@ff@_@ff@_@p@_@3; $\C{// hexadecimal floating}$
     5700x@_@1.ffff@_@ffff@_@p@_@128@_@l; $\C{// hexadecimal floating long constant}$
     571L@_@$"\texttt{\textbackslash{x}}$@_@$\texttt{ff}$@_@$\texttt{ee}"$; $\C{// wide character constant}$
    558572\end{cfa}
    559573The rules for placement of underscores are:
     
    574588It is significantly easier to read and enter long constants when they are broken up into smaller groupings (many cultures use comma and/or period among digits for the same purpose).
    575589This extension is backwards compatible, matches with the use of underscore in variable names, and appears in \Index*{Ada} and \Index*{Java} 8.
     590\CC uses the single quote (©'©) as a separator, restricted within a sequence of digits, \eg ©0xaa©©'©©ff©, ©3.141©©'©©592E1©©'©©1©.
    576591
    577592
    578593\section{Exponentiation Operator}
    579594
    580 C, \CC, and Java (and many other programming languages) have no exponentiation operator\index{exponentiation!operator}\index{operator!exponentiation}, \ie $x^y$, and instead use a routine, like \Indexc{pow(x,y)}, to perform the exponentiation operation.
    581 \CFA extends the basic operators with the exponentiation operator ©?®\®?©\index{?\\?@©?®\®?©} and ©?\=?©\index{?\\=?@©®\®=?©}, as in, ©x ®\® y© and ©x ®\®= y©, which means $x^y$ and $x \leftarrow x^y$.
    582 The priority of the exponentiation operator is between the cast and multiplicative operators, so that ©w * (int)x \ (int)y * z© is parenthesized as ©((w * (((int)x) \ ((int)y))) * z)©.
     595C, \CC, and Java (and other programming languages) have no exponentiation operator\index{exponentiation!operator}\index{operator!exponentiation}, \ie $x^y$, and instead use a routine, like \Indexc{pow(x,y)}, to perform the exponentiation operation.
     596\CFA extends the basic operators with the exponentiation operator ©?©\R{©\\©}©?©\index{?\\?@©?@\@?©} and ©?©\R{©\\©}©=?©\index{?\\=?@©@\@=?©}, as in, ©x ©\R{©\\©}© y© and ©x ©\R{©\\©}©= y©, which means $x^y$ and $x \leftarrow x^y$.
     597The priority of the exponentiation operator is between the cast and multiplicative operators, so that ©w * (int)x \ (int)y * z© is parenthesized as ©(w * (((int)x) \ ((int)y))) * z©.
    583598
    584599There are exponentiation operators for integral and floating types, including the builtin \Index{complex} types.
     
    587602Floating exponentiation\index{exponentiation!floating} is performed using \Index{logarithm}s\index{exponentiation!logarithm}, so the exponent cannot be negative.
    588603\begin{cfa}
    589 sout | 1 ®\® 0 | 1 ®\® 1 | 2 ®\® 8 | -4 ®\® 3 | 5 ®\® 3 | 5 ®\® 32 | 5L ®\® 32 | 5L ®\® 64 | -4 ®\® -3 | -4.0 ®\® -3 | 4.0 ®\® 2.1
    590            | (1.0f+2.0fi) ®\® (3.0f+2.0fi);
    591 1 1 256 -64 125 ®0® 3273344365508751233 ®0® ®0® -0.015625 18.3791736799526 0.264715-1.1922i
     604sout | 1 @\@ 0 | 1 @\@ 1 | 2 @\@ 8 | -4 @\@ 3 | 5 @\@ 3 | 5 @\@ 32 | 5L @\@ 32 | 5L @\@ 64 | -4 @\@ -3 | -4.0 @\@ -3 | 4.0 @\@ 2.1
     605           | (1.0f+2.0fi) @\@ (3.0f+2.0fi);
     6061 1 256 -64 125 @0@ 3273344365508751233 @0@ @0@ -0.015625 18.3791736799526 0.264715-1.1922i
    592607\end{cfa}
    593608Note, ©5 \ 32© and ©5L \ 64© overflow, and ©-4 \ -3© is a fraction but stored in an integer so all three computations generate an integral zero.
    594 Parenthesis are necessary for complex constants or the expression is parsed as ©1.0f+®(®2.0fi \ 3.0f®)®+2.0fi©.
     609Because exponentiation has higher priority than ©+©, parenthesis are necessary for exponentiation of \Index{complex constant}s or the expression is parsed as ©1.0f+©\R{©(©}©2.0fi \ 3.0f©\R{©)©}©+2.0fi©, requiring \R{©(©}©1.0f+2.0fi©\R{©)©}© \ ©\R{©(©}©3.0f+2.0fi©\R{©)©}.
     610
    595611The exponentiation operator is available for all the basic types, but for user-defined types, only the integral-computation version is available.
    596612\begin{cfa}
    597 forall( otype OT | { void ?{}( OT & this, one_t ); OT ?*?( OT, OT ); } )
    598 OT ?®\®?( OT ep, unsigned int y );
    599 forall( otype OT | { void ?{}( OT & this, one_t ); OT ?*?( OT, OT ); } )
    600 OT ?®\®?( OT ep, unsigned long int y );
     613forall( otype T | { void ?{}( T & this, one_t ); T ?*?( T, T ); } )
     614T ?@\@?( T ep, unsigned int y );
     615forall( otype T | { void ?{}( T & this, one_t ); T ?*?( T, T ); } )
     616T ?@\@?( T ep, unsigned long int y );
    601617\end{cfa}
    602618The user type ©T© must define multiplication, one (©1©), and ©*©.
     
    609625
    610626%\subsection{\texorpdfstring{\protect\lstinline@if@/\protect\lstinline@while@ Statement}{if Statement}}
    611 \subsection{\texorpdfstring{\LstKeywordStyle{if}/\LstKeywordStyle{while} Statement}{if/while Statement}}
    612 
    613 The ©if©/©while© expression allows declarations, similar to ©for© declaration expression.
    614 (Does not make sense for ©do©-©while©.)
    615 \begin{cfa}
    616 if ( ®int x = f()® ) ... §\C{// x != 0}§
    617 if ( ®int x = f(), y = g()® ) ... §\C{// x != 0 \&\& y != 0}§
    618 if ( ®int x = f(), y = g(); x < y® ) ... §\C{// relational expression}§
    619 if ( ®struct S { int i; } x = { f() }; x.i < 4® ) §\C{// relational expression}§
    620 
    621 while ( ®int x = f()® ) ... §\C{// x != 0}§
    622 while ( ®int x = f(), y = g()® ) ... §\C{// x != 0 \&\& y != 0}§
    623 while ( ®int x = f(), y = g(); x < y® ) ... §\C{// relational expression}§
    624 while ( ®struct S { int i; } x = { f() }; x.i < 4® ) ... §\C{// relational expression}§
    625 \end{cfa}
    626 Unless a relational expression is specified, each variable is compared not equal to 0, which is the standard semantics for the ©if©/©while© expression, and the results are combined using the logical ©&&© operator.\footnote{\CC only provides a single declaration always compared not equal to 0.}
    627 The scope of the declaration(s) is local to the @if@ statement but exist within both the ``then'' and ``else'' clauses.
     627\subsection{\texorpdfstring{\LstKeywordStyle{if} / \LstKeywordStyle{while} Statement}{if / while Statement}}
     628
     629The ©if©/©while© expression allows declarations, similar to ©for© declaration expression.\footnote{
     630Declarations in the ©do©-©while© condition are not useful because they appear after the loop body.}
     631\begin{cfa}
     632if ( @int x = f()@ ) ... $\C{// x != 0}$
     633if ( @int x = f(), y = g()@ ) ... $\C{// x != 0 \&\& y != 0}$
     634if ( @int x = f(), y = g(); x < y@ ) ... $\C{// relational expression}$
     635if ( @struct S { int i; } x = { f() }; x.i < 4@ ) $\C{// relational expression}$
     636
     637while ( @int x = f()@ ) ... $\C{// x != 0}$
     638while ( @int x = f(), y = g()@ ) ... $\C{// x != 0 \&\& y != 0}$
     639while ( @int x = f(), y = g(); x < y@ ) ... $\C{// relational expression}$
     640while ( @struct S { int i; } x = { f() }; x.i < 4@ ) ... $\C{// relational expression}$
     641\end{cfa}
     642Unless a relational expression is specified, each variable is compared not equal to 0, which is the standard semantics for the ©if©/©while© expression, and the results are combined using the logical ©&&© operator.
     643The scope of the declaration(s) is local to the ©if© statement but exist within both the \emph{then} and \emph{else} clauses.
     644\CC only provides a single declaration always compared ©!=© to 0.
    628645
    629646
    630647%\section{\texorpdfstring{\protect\lstinline@case@ Clause}{case Clause}}
    631648\subsection{\texorpdfstring{\LstKeywordStyle{case} Clause}{case Clause}}
     649\label{s:caseClause}
    632650
    633651C restricts the ©case© clause of a ©switch© statement to a single value.
     
    640658\begin{cfa}
    641659switch ( i ) {
    642   case ®1, 3, 5®:
     660  case @1, 3, 5@:
    643661        ...
    644   case ®2, 4, 6®:
     662  case @2, 4, 6@:
    645663        ...
    646664}
     
    670688\begin{cfa}
    671689switch ( i ) {
    672   case ®1~5:® §\C{// 1, 2, 3, 4, 5}§
     690  case @1~5:@ $\C{// 1, 2, 3, 4, 5}$
    673691        ...
    674   case ®10~15:® §\C{// 10, 11, 12, 13, 14, 15}§
     692  case @10~15:@ $\C{// 10, 11, 12, 13, 14, 15}$
    675693        ...
    676694}
     
    678696Lists of subranges are also allowed.
    679697\begin{cfa}
    680 case ®1~5, 12~21, 35~42®:
     698case @1~5, 12~21, 35~42@:
    681699\end{cfa}
    682700
     
    722740if ( argc == 3 ) {
    723741        // open output file
    724         ®// open input file
    725 ®} else if ( argc == 2 ) {
    726         ®// open input file (duplicate)
    727 
    728 ®} else {
     742        @// open input file
     743@} else if ( argc == 2 ) {
     744        @// open input file (duplicate)
     745
     746@} else {
    729747        // usage message
    730748}
     
    733751\end{cquote}
    734752In this example, case 2 is always done if case 3 is done.
    735 This control flow is difficult to simulate with if statements or a ©switch© statement without fall-through as code must be duplicated or placed in a separate routine.
     753This control flow is difficult to simulate with ©if© statements or a ©switch© statement without fall-through as code must be duplicated or placed in a separate routine.
    736754C also uses fall-through to handle multiple case-values resulting in the same action:
    737755\begin{cfa}
    738756switch ( i ) {
    739   ®case 1: case 3: case 5:®     // odd values
     757  @case 1: case 3: case 5:@     // odd values
    740758        // odd action
    741759        break;
    742   ®case 2: case 4: case 6:®     // even values
     760  @case 2: case 4: case 6:@     // even values
    743761        // even action
    744762        break;
    745763}
    746764\end{cfa}
    747 However, this situation is handled in other languages without fall-through by allowing a list of case values.
    748 While fall-through itself is not a problem, the problem occurs when fall-through is the default, as this semantics is unintuitive to many programmers and is different from virtually all other programming languages with a ©switch© statement.
     765This situation better handled without fall-through by allowing a list of case values \see{\VRef{s:caseClause}}.
     766While fall-through itself is not a problem, the problem occurs when fall-through is the default, as this semantics is unintuitive to many programmers and is different from most programming languages with a ©switch© statement.
    749767Hence, default fall-through semantics results in a large number of programming errors as programmers often \emph{forget} the ©break© statement at the end of a ©case© clause, resulting in inadvertent fall-through.
    750768
     
    756774        if ( j < k ) {
    757775                ...
    758           ®case 1:®             // transfer into "if" statement
     776          @case 1:@             // transfer into "if" statement
    759777                ...
    760778        } // if
     
    762780        while ( j < 5 ) {
    763781                ...
    764           ®case 3:®             // transfer into "while" statement
     782          @case 3:@             // transfer into "while" statement
    765783                ...
    766784        } // while
    767785} // switch
    768786\end{cfa}
    769 The problem with this usage is branching into control structures, which is known to cause both comprehension and technical difficulties.
    770 The comprehension problem occurs from the inability to determine how control reaches a particular point due to the number of branches leading to it.
     787This usage branches into control structures, which is known to cause both comprehension and technical difficulties.
     788The comprehension problem results from the inability to determine how control reaches a particular point due to the number of branches leading to it.
    771789The technical problem results from the inability to ensure declaration and initialization of variables when blocks are not entered at the beginning.
    772 There are no positive arguments for this kind of control flow, and therefore, there is a strong impetus to eliminate it.
     790There are few arguments for this kind of control flow, and therefore, there is a strong impetus to eliminate it.
    773791Nevertheless, C does have an idiom where this capability is used, known as ``\Index*{Duff's device}''~\cite{Duff83}:
    774792\begin{cfa}
     
    794812\item
    795813It is possible to place the ©default© clause anywhere in the list of labelled clauses for a ©switch© statement, rather than only at the end.
    796 Virtually all programming languages with a ©switch© statement require the ©default© clause to appear last in the case-clause list.
     814Most programming languages with a ©switch© statement require the ©default© clause to appear last in the case-clause list.
    797815The logic for this semantics is that after checking all the ©case© clauses without success, the ©default© clause is selected;
    798816hence, physically placing the ©default© clause at the end of the ©case© clause list matches with this semantics.
     
    803821\begin{cfa}
    804822switch ( x ) {
    805         ®int y = 1;® §\C{// unreachable initialization}§
    806         ®x = 7;® §\C{// unreachable code without label/branch}§
     823        @int y = 1;@ $\C{// unreachable initialization}$
     824        @x = 7;@ $\C{// unreachable code without label/branch}$
    807825  case 0: ...
    808826        ...
    809         ®int z = 0;® §\C{// unreachable initialization, cannot appear after case}§
     827        @int z = 0;@ $\C{// unreachable initialization, cannot appear after case}$
    810828        z = 2;
    811829  case 1:
    812         ®x = z;® §\C{// without fall through, z is uninitialized}§
     830        @x = z;@ $\C{// without fall through, z is uninitialized}$
    813831}
    814832\end{cfa}
    815833While the declaration of the local variable ©y© is useful with a scope across all ©case© clauses, the initialization for such a variable is defined to never be executed because control always transfers over it.
    816 Furthermore, any statements before the first ©case© clause can only be executed if labelled and transferred to using a ©goto©, either from outside or inside of the ©switch©, both of which are problematic.
    817 As well, the declaration of ©z© cannot occur after the ©case© because a label can only be attached to a statement, and without a fall through to case 3, ©z© is uninitialized.
    818 The key observation is that the ©switch© statement branches into control structure, \ie there are multiple entry points into its statement body.
     834Furthermore, any statements before the first ©case© clause can only be executed if labelled and transferred to using a ©goto©, either from outside or inside of the ©switch©, where both are problematic.
     835As well, the declaration of ©z© cannot occur after the ©case© because a label can only be attached to a statement, and without a fall-through to case 3, ©z© is uninitialized.
     836The key observation is that the ©switch© statement branches into a control structure, \ie there are multiple entry points into its statement body.
    819837\end{enumerate}
    820838
     
    842860Therefore, to preserve backwards compatibility, it is necessary to introduce a new kind of ©switch© statement, called ©choose©, with no implicit fall-through semantics and an explicit fall-through if the last statement of a case-clause ends with the new keyword ©fallthrough©/©fallthru©, \eg:
    843861\begin{cfa}
    844 ®choose® ( i ) {
     862@choose@ ( i ) {
    845863  case 1:  case 2:  case 3:
    846864        ...
    847         ®// implicit end of switch (break)
    848   ®case 5:
     865        @// implicit end of switch (break)
     866  @case 5:
    849867        ...
    850         ®fallthru®; §\C{// explicit fall through}§
     868        @fallthru@; $\C{// explicit fall through}$
    851869  case 7:
    852870        ...
    853         ®break® §\C{// explicit end of switch (redundant)}§
     871        @break@ $\C{// explicit end of switch (redundant)}$
    854872  default:
    855873        j = 3;
    856874}
    857875\end{cfa}
    858 Like the ©switch© statement, the ©choose© statement retains the fall-through semantics for a list of ©case© clauses;
     876Like the ©switch© statement, the ©choose© statement retains the fall-through semantics for a list of ©case© clauses.
    859877An implicit ©break© is applied only at the end of the \emph{statements} following a ©case© clause.
    860878An explicit ©fallthru© is retained because it is a C-idiom most C programmers expect, and its absence might discourage programmers from using the ©choose© statement.
     
    872890\begin{cfa}
    873891switch ( x ) {
    874         ®int i = 0;® §\C{// allowed only at start}§
     892        @int i = 0;@ $\C{// allowed only at start}$
    875893  case 0:
    876894        ...
    877         ®int j = 0;® §\C{// disallowed}§
     895        @int j = 0;@ $\C{// disallowed}$
    878896  case 1:
    879897        {
    880                 ®int k = 0;® §\C{// allowed at different nesting levels}§
     898                @int k = 0;@ $\C{// allowed at different nesting levels}$
    881899                ...
    882           ®case 2:® §\C{// disallow case in nested statements}§
     900          @case 2:@ $\C{// disallow case in nested statements}$
    883901        }
    884902  ...
     
    897915  case 3:
    898916        if ( ... ) {
    899                 ... ®fallthru;® // goto case 4
     917                ... @fallthru;@ // goto case 4
    900918        } else {
    901919                ...
     
    912930choose ( ... ) {
    913931  case 3:
    914         ... ®fallthrough common;®
     932        ... @fallthrough common;@
    915933  case 4:
    916         ... ®fallthrough common;®
    917 
    918   ®common:® // below fallthrough
     934        ... @fallthrough common;@
     935
     936  @common:@ // below fallthrough
    919937                          // at case-clause level
    920938        ...     // common code for cases 3/4
     
    932950                for ( ... ) {
    933951                        // multi-level transfer
    934                         ... ®fallthru common;®
     952                        ... @fallthru common;@
    935953                }
    936954                ...
    937955        }
    938956        ...
    939   ®common:® // below fallthrough
     957  @common:@ // below fallthrough
    940958                          // at case-clause level
    941959\end{cfa}
     
    948966
    949967\begin{figure}
    950 \begin{tabular}{@{}l|l@{}}
    951 \multicolumn{1}{c|}{loop control} & \multicolumn{1}{c}{output} \\
     968\begin{tabular}{@{}l@{\hspace{25pt}}|l@{}}
     969\multicolumn{1}{@{}c@{\hspace{25pt}}|}{loop control} & \multicolumn{1}{c@{}}{output} \\
    952970\hline
    953 \begin{cfa}[xleftmargin=0pt]
    954 while ®()® { sout | "empty"; break; }
    955 do { sout | "empty"; break; } while ®()®;
    956 for ®()® { sout | "empty"; break; }
    957 for ( ®0® ) { sout | "A"; } sout | "zero";
    958 for ( ®1® ) { sout | "A"; }
    959 for ( ®10® ) { sout | "A"; }
    960 for ( ®= 10® ) { sout | "A"; }
    961 for ( ®1 ~= 10 ~ 2® ) { sout | "B"; }
    962 for ( ®10 -~= 1 ~ 2® ) { sout | "C"; }
    963 for ( ®0.5 ~ 5.5® ) { sout | "D"; }
    964 for ( ®5.5 -~ 0.5® ) { sout | "E"; }
    965 for ( ®i; 10® ) { sout | i; }
    966 for ( ®i; = 10® ) { sout | i; }
    967 for ( ®i; 1 ~= 10 ~ 2® ) { sout | i; }
    968 for ( ®i; 10 -~= 1 ~ 2® ) { sout | i; }
    969 for ( ®i; 0.5 ~ 5.5® ) { sout | i; }
    970 for ( ®i; 5.5 -~ 0.5® ) { sout | i; }
    971 for ( ®ui; 2u ~= 10u ~ 2u® ) { sout | ui; }
    972 for ( ®ui; 10u -~= 2u ~ 2u® ) { sout | ui; }
     971\begin{cfa}
     972while @()@ { sout | "empty"; break; }
     973do { sout | "empty"; break; } while @()@;
     974for @()@ { sout | "empty"; break; }
     975for ( @0@ ) { sout | "A"; } sout | "zero";
     976for ( @1@ ) { sout | "A"; }
     977for ( @10@ ) { sout | "A"; }
     978for ( @= 10@ ) { sout | "A"; }
     979for ( @1 ~= 10 ~ 2@ ) { sout | "B"; }
     980for ( @10 -~= 1 ~ 2@ ) { sout | "C"; }
     981for ( @0.5 ~ 5.5@ ) { sout | "D"; }
     982for ( @5.5 -~ 0.5@ ) { sout | "E"; }
     983for ( @i; 10@ ) { sout | i; }
     984for ( @i; = 10@ ) { sout | i; }
     985for ( @i; 1 ~= 10 ~ 2@ ) { sout | i; }
     986for ( @i; 10 -~= 1 ~ 2@ ) { sout | i; }
     987for ( @i; 0.5 ~ 5.5@ ) { sout | i; }
     988for ( @i; 5.5 -~ 0.5@ ) { sout | i; }
     989for ( @ui; 2u ~= 10u ~ 2u@ ) { sout | ui; }
     990for ( @ui; 10u -~= 2u ~ 2u@ ) { sout | ui; }
    973991enum { N = 10 };
    974 for ( ®N® ) { sout | "N"; }
    975 for ( ®i; N® ) { sout | i; }
    976 for ( ®i; N -~ 0® ) { sout | i; }
     992for ( @N@ ) { sout | "N"; }
     993for ( @i; N@ ) { sout | i; }
     994for ( @i; N -~ 0@ ) { sout | i; }
    977995const int start = 3, comp = 10, inc = 2;
    978 for ( ®i; start ~ comp ~ inc + 1® ) { sout | i; }
    979 for ( i; 1 ~ ®@® ) { if ( i > 10 ) break; sout | i; }
    980 for ( i; 10 -~ ®@® ) { if ( i < 0 ) break; sout | i; }
    981 for ( i; 2 ~ ®@® ~ 2 ) { if ( i > 10 ) break; sout | i; }
    982 for ( i; 2.1 ~ ®@® ~ ®@® ) { if ( i > 10.5 ) break; sout | i; i += 1.7; }
    983 for ( i; 10 -~ ®@® ~ 2 ) { if ( i < 0 ) break; sout | i; }
    984 for ( i; 12.1 ~ ®@® ~ ®@® ) { if ( i < 2.5 ) break; sout | i; i -= 1.7; }
    985 for ( i; 5 ®:® j; -5 ~ @ ) { sout | i | j; }
    986 for ( i; 5 ®:® j; -5 -~ @ ) { sout | i | j; }
    987 for ( i; 5 ®:® j; -5 ~ @ ~ 2 ) { sout | i | j; }
    988 for ( i; 5 ®:® j; -5 -~ @ ~ 2 ) { sout | i | j; }
    989 for ( i; 5 ®:® j; -5 ~ @ ) { sout | i | j; }
    990 for ( i; 5 ®:® j; -5 -~ @ ) { sout | i | j; }
    991 for ( i; 5 ®:® j; -5 ~ @ ~ 2 ) { sout | i | j; }
    992 for ( i; 5 ®:® j; -5 -~ @ ~ 2 ) { sout | i | j; }
    993 for ( i; 5 ®:® j; -5 -~ @ ~ 2 ®:® k; 1.5 ~ @ ) { sout | i | j | k; }
    994 for ( i; 5 ®:® j; -5 -~ @ ~ 2 ®:® k; 1.5 ~ @ ) { sout | i | j | k; }
    995 for ( i; 5 ®:® k; 1.5 ~ @ ®:® j; -5 -~ @ ~ 2 ) { sout | i | j | k; }
     996for ( @i; start ~ comp ~ inc + 1@ ) { sout | i; }
     997for ( i; 1 ~ $\R{@}$ ) { if ( i > 10 ) break; sout | i; }
     998for ( i; 10 -~ $\R{@}$ ) { if ( i < 0 ) break; sout | i; }
     999for ( i; 2 ~ $\R{@}$ ~ 2 ) { if ( i > 10 ) break; sout | i; }
     1000for ( i; 2.1 ~ $\R{@}$ ~ $\R{@}$ ) { if ( i > 10.5 ) break; sout | i; i += 1.7; }
     1001for ( i; 10 -~ $\R{@}$ ~ 2 ) { if ( i < 0 ) break; sout | i; }
     1002for ( i; 12.1 ~ $\R{@}$ ~ $\R{@}$ ) { if ( i < 2.5 ) break; sout | i; i -= 1.7; }
     1003for ( i; 5 @:@ j; -5 ~ $@$ ) { sout | i | j; }
     1004for ( i; 5 @:@ j; -5 -~ $@$ ) { sout | i | j; }
     1005for ( i; 5 @:@ j; -5 ~ $@$ ~ 2 ) { sout | i | j; }
     1006for ( i; 5 @:@ j; -5 -~ $@$ ~ 2 ) { sout | i | j; }
     1007for ( i; 5 @:@ j; -5 ~ $@$ ) { sout | i | j; }
     1008for ( i; 5 @:@ j; -5 -~ $@$ ) { sout | i | j; }
     1009for ( i; 5 @:@ j; -5 ~ $@$ ~ 2 ) { sout | i | j; }
     1010for ( i; 5 @:@ j; -5 -~ $@$ ~ 2 ) { sout | i | j; }
     1011for ( i; 5 @:@ j; -5 -~ $@$ ~ 2 @:@ k; 1.5 ~ $@$ ) { sout | i | j | k; }
     1012for ( i; 5 @:@ j; -5 -~ $@$ ~ 2 @:@ k; 1.5 ~ $@$ ) { sout | i | j | k; }
     1013for ( i; 5 @:@ k; 1.5 ~ $@$ @:@ j; -5 -~ $@$ ~ 2 ) { sout | i | j | k; }
    9961014\end{cfa}
    9971015&
     
    10561074\subsection{Loop Control}
    10571075
    1058 The ©for©/©while©/©do-while© loop-control allows empty or simplified ranges (see Figure~\ref{f:LoopControlExamples}).
    1059 \begin{itemize}
     1076Looping a fixed number of times, possibly with a loop index, occurs frequently.
     1077\CFA condenses simply looping to facilitate coding speed and safety.
     1078The ©for©/©while©/©do-while© loop-control is augmented as follows \see{examples in \VRef[Figure]{f:LoopControlExamples}}:
     1079\begin{itemize}[itemsep=0pt]
     1080\item
     1081©0© is the implicit start value;
     1082\item
     1083©1© is the implicit increment value.
     1084\item
     1085The up-to range uses operator ©+=© for increment;
     1086\item
     1087The down-to range uses operator ©-=© for decrement.
    10601088\item
    10611089The loop index is polymorphic in the type of the comparison value N (when the start value is implicit) or the start value M.
     1090\begin{cfa}
     1091for ( i; @5@ )                                  $\C[2.5in]{// typeof(5) i; 5 is comparison value}$
     1092for ( i; @1.5@~5.5~0.5 )                $\C{// typeof(1.5) i; 1.5 is start value}$
     1093\end{cfa}
    10621094\item
    10631095An empty conditional implies comparison value of ©1© (true).
    1064 \item
    1065 A comparison N is implicit up-to exclusive range [0,N©®)®©.
    1066 \item
    1067 A comparison ©=© N is implicit up-to inclusive range [0,N©®]®©.
    1068 \item
    1069 The up-to range M ©~©\index{~@©~©} N means exclusive range [M,N©®)®©.
    1070 \item
    1071 The up-to range M ©~=©\index{~=@©~=©} N means inclusive range [M,N©®]®©.
    1072 \item
    1073 The down-to range M ©-~©\index{-~@©-~©} N means exclusive range [N,M©®)®©.
    1074 \item
    1075 The down-to range M ©-~=©\index{-~=@©-~=©} N means inclusive range [N,M©®]®©.
    1076 \item
    1077 ©0© is the implicit start value;
    1078 \item
    1079 ©1© is the implicit increment value.
    1080 \item
    1081 The up-to range uses operator ©+=© for increment;
    1082 \item
    1083 The down-to range uses operator ©-=© for decrement.
     1096\begin{cfa}
     1097while ( $\R{/*empty*/}$ )               $\C{// while ( true )}$
     1098for ( $\R{/*empty*/}$ )                 $\C{// for ( ; true; )}$
     1099do ... while ( $\R{/*empty*/}$ ) $\C{// do ... while ( true )}$
     1100\end{cfa}
     1101\item
     1102A comparison N is implicit up-to exclusive range [0,N\R{)}.
     1103\begin{cfa}
     1104for ( @5@ )                                             $\C{// for ( typeof(5) i; i < 5; i += 1 )}$
     1105\end{cfa}
     1106\item
     1107A comparison ©=© N is implicit up-to inclusive range [0,N\R{]}.
     1108\begin{cfa}
     1109for ( @=@5 )                                    $\C{// for ( typeof(5) i; i <= 5; i += 1 )}$
     1110\end{cfa}
     1111\item
     1112The up-to range M ©~©\index{~@©~©} N means exclusive range [M,N\R{)}.
     1113\begin{cfa}
     1114for ( 1@~@5 )                                   $\C{// for ( typeof(1) i = 1; i < 5; i += 1 )}$
     1115\end{cfa}
     1116\item
     1117The up-to range M ©~=©\index{~=@©~=©} N means inclusive range [M,N\R{]}.
     1118\begin{cfa}
     1119for ( 1@~=@5 )                                  $\C{// for ( typeof(1) i = 1; i <= 5; i += 1 )}$
     1120\end{cfa}
     1121\item
     1122The down-to range M ©-~©\index{-~@©-~©} N means exclusive range [N,M\R{)}.
     1123\begin{cfa}
     1124for ( 1@-~@5 )                                  $\C{// for ( typeof(1) i = 5; i > 0; i -= 1 )}$
     1125\end{cfa}
     1126\item
     1127The down-to range M ©-~=©\index{-~=@©-~=©} N means inclusive range [N,M\R{]}.
     1128\begin{cfa}
     1129for ( 1@-~=@5 )                                 $\C{// for ( typeof(1) i = 5; i >= 0; i -= 1 )}$
     1130\end{cfa}
    10841131\item
    10851132©@© means put nothing in this field.
     1133\begin{cfa}
     1134for ( 1~$\R{@}$~2 )                             $\C{// for ( typeof(1) i = 1; /*empty*/; i += 2 )}$
     1135\end{cfa}
    10861136\item
    10871137©:© means start another index.
     1138\begin{cfa}
     1139for ( i; 5 @:@ j; 2~12~3 )              $\C{// for ( typeof(i) i = 1, j = 2; i < 5 \&\& j < 12; i += 1, j += 3 )}\CRT$
     1140\end{cfa}
    10881141\end{itemize}
    10891142
     
    11041157\begin{lrbox}{\myboxA}
    11051158\begin{cfa}[tabsize=3]
    1106 ®Compound:® {
    1107         ®Try:® try {
    1108                 ®For:® for ( ... ) {
    1109                         ®While:® while ( ... ) {
    1110                                 ®Do:® do {
    1111                                         ®If:® if ( ... ) {
    1112                                                 ®Switch:® switch ( ... ) {
     1159@Compound:@ {
     1160        @Try:@ try {
     1161                @For:@ for ( ... ) {
     1162                        @While:@ while ( ... ) {
     1163                                @Do:@ do {
     1164                                        @If:@ if ( ... ) {
     1165                                                @Switch:@ switch ( ... ) {
    11131166                                                        case 3:
    1114                                                                 ®break Compound®;
    1115                                                                 ®break Try®;
    1116                                                                 ®break For®;      /* or */  ®continue For®;
    1117                                                                 ®break While®;  /* or */  ®continue While®;
    1118