Ignore:
Timestamp:
Feb 23, 2022, 6:13:02 PM (3 years ago)
Author:
Michael Brooks <mlbrooks@…>
Branches:
ADT, ast-experimental, enum, master, pthread-emulation, qualifiedEnum
Children:
afe9e45, c5af4f9
Parents:
cc7bbe6
Message:

Roll up of string changes for performance testing/improvement, and a couple API features supporting them.

String API changes:
Defining a tuning knob to control the heap growth policy (relapaces former 10% hardcode, downgraded to a default)
Implementing findFrom (allowing find-non-first); leaving find as find-first.

String implementation perf improvements:
Calling C-malloc directly instead of via CFA-alloc.
Replacings loops that copy with memmove calls.
Replacings loops that search for a value with memchr calls.

String perf testing realized:
Makefile supporting several prog-*.cfa, chosen by OPERATION value (implies prog.cfa changes to support the adjusted protocol)
Adjusting the starter/accumulater declarations in PEQ and PTA to behave consistently in cfa v cpp.
Adding tests: allocation, find, normalize, pass-by-val, pass-by-x.
Adding helper shell scripts for: generating flame graphs, collecting/crunching allocation stats using Mubeen's malloc wrappers

File:
1 edited

Legend:

Unmodified
Added
Removed
  • tests/zombies/string-perf/make-corpus.cfa

    rcc7bbe6 r08ed947  
    33#include <limits.h>
    44#include <unistd.h>
     5#include <string.h>
     6
     7// U(0,1)
     8static double U() {
     9    return (double)rand() / (double)INT_MAX;
     10}
    511
    612// generate random draws from a geometric distribution of the given mean
     
    1420static int nextGeoRand() {
    1521    // ret = ⌊ln(U)/ln(1−p)⌋ where U ~ U(0, 1)
    16     double U = (double)rand() / (double)INT_MAX;
    17     return 1 + (int) (log(U) / denom);
     22    return 1 + (int) (log(U()) / denom);
    1823}
    1924
    20 // write a randomly generated alphabetic string whose length is drawn from above distribution
    21 static void emit1() {
    22     int lim = nextGeoRand();
     25// write a randomly generated alphabetic string whose length is adjused from a draw of the above distribution
     26static void emit1( int offset, double mcfreq, char mchar ) {
     27    int lim = offset + nextGeoRand();
    2328    // printf("==%d\n", lim);
    2429    for (i; lim) {
    25         char emit = 'a' + (rand() % ('z'-'a'));
     30        char emit;
     31        if (U() < mcfreq) emit = mchar;
     32        else emit = 'a' + (rand() % ('z'-'a'));
    2633        printf("%c", emit);
    2734    }
     
    2936}
    3037
    31 // usage: ./make-corpus toGen mean
     38// usage: ./make-corpus toGen mean [offset=0] [mcfreq=0.0] [mchar='-']
     39//
     40// Outputs alphabetic (plus magic-char) strings, one per line.
     41// toGen: number of strings (lines)
     42//
     43// generated length ~  offset + geo(mean)
     44//                  >= 1
     45//
     46// offset=0,  mean=1:  constant length 1
     47// offset=0,  mean=2:  lengths go like number of coin tosses it takes to get heads
     48// offset=0,  mean=6:  lengths go like number of cube die rolls it takes to get :::
     49// offset=15, mean=1:  constant length 16
     50// offset=15, mean=2:  population's minimum is 16 and mean is 17
     51//
     52// Magic Char (mc) does not affect these lengths.  Any mc occurrence replaces an alphabetic char.
     53// mcfreq: (in [0,1]) expected fraction of the characters output that are mchar
     54//
    3255int main(int argc, char ** argv) {
    33     assert(argc == 3);
    3456
    35     int toGen = atoi(argv[1]);
    36     assert(toGen > 0);
    37     assert(toGen < 1000000);
     57    int toGen;
     58    int mean;
     59    int offset = 0;
     60    double mcfreq = 0.0;
     61    char mchar = '-';
    3862
    39     int mean = atoi(argv[2]);
    40     assert(mean > 0);
    41     assert(mean < 1000);
     63    assert(argc >= 3 && argc <= 6);
     64    switch(argc) {
     65        case 6:
     66            assert(strlen(argv[5]) == 0);
     67            mchar = argv[5][0];
     68        case 5:
     69            mcfreq = atof(argv[4]);
     70            assert(mcfreq >= 0.0 && mcfreq <= 1.0);
     71        case 4:
     72            offset = atoi(argv[3]);
     73            assert(offset >= 0 && offset < 10000);
     74        default:
     75            mean = atoi(argv[2]);
     76            assert(mean > 0);
     77            assert(mean < 1000);
     78            toGen = atoi(argv[1]);
     79            assert(toGen > 0);
     80            assert(toGen < 1000000);
     81    }
    4282
    4383    initialize(mean);
    4484    for( i; toGen ) {
    45         emit1();
     85        emit1(offset, mcfreq, mchar);
    4686    }
    4787}
Note: See TracChangeset for help on using the changeset viewer.