source: tests/zombies/string-perf/make-corpus.cfa @ d6c5faa

Last change on this file since d6c5faa was 08ed947, checked in by Michael Brooks <mlbrooks@…>, 3 years ago

Roll up of string changes for performance testing/improvement, and a couple API features supporting them.

String API changes:
Defining a tuning knob to control the heap growth policy (relapaces former 10% hardcode, downgraded to a default)
Implementing findFrom (allowing find-non-first); leaving find as find-first.

String implementation perf improvements:
Calling C-malloc directly instead of via CFA-alloc.
Replacings loops that copy with memmove calls.
Replacings loops that search for a value with memchr calls.

String perf testing realized:
Makefile supporting several prog-*.cfa, chosen by OPERATION value (implies prog.cfa changes to support the adjusted protocol)
Adjusting the starter/accumulater declarations in PEQ and PTA to behave consistently in cfa v cpp.
Adding tests: allocation, find, normalize, pass-by-val, pass-by-x.
Adding helper shell scripts for: generating flame graphs, collecting/crunching allocation stats using Mubeen's malloc wrappers

  • Property mode set to 100644
File size: 2.5 KB
Line 
1#include <stdlib.hfa>
2#include <math.h>
3#include <limits.h>
4#include <unistd.h>
5#include <string.h>
6
7// U(0,1)
8static double U() {
9    return (double)rand() / (double)INT_MAX;
10}
11
12// generate random draws from a geometric distribution of the given mean
13// https://math.stackexchange.com/questions/485448/prove-the-way-to-generate-geometrically-distributed-random-numbers
14static double denom;
15static void initialize(int mean) {
16    srand(getpid());
17    double p = 1.0 / (double) mean;
18    denom = log(1-p);
19}
20static int nextGeoRand() {
21    // ret = ⌊ln(U)/ln(1−p)⌋ where U ~ U(0, 1)
22    return 1 + (int) (log(U()) / denom);
23}
24
25// write a randomly generated alphabetic string whose length is adjused from a draw of the above distribution
26static void emit1( int offset, double mcfreq, char mchar ) {
27    int lim = offset + nextGeoRand();
28    // printf("==%d\n", lim);
29    for (i; lim) {
30        char emit;
31        if (U() < mcfreq) emit = mchar;
32        else emit = 'a' + (rand() % ('z'-'a'));
33        printf("%c", emit);
34    }
35    printf("\n");
36}
37
38// usage: ./make-corpus toGen mean [offset=0] [mcfreq=0.0] [mchar='-']
39//
40// Outputs alphabetic (plus magic-char) strings, one per line.
41// toGen: number of strings (lines)
42//
43// generated length ~  offset + geo(mean)
44//                  >= 1
45//
46// offset=0,  mean=1:  constant length 1
47// offset=0,  mean=2:  lengths go like number of coin tosses it takes to get heads
48// offset=0,  mean=6:  lengths go like number of cube die rolls it takes to get :::
49// offset=15, mean=1:  constant length 16
50// offset=15, mean=2:  population's minimum is 16 and mean is 17
51//
52// Magic Char (mc) does not affect these lengths.  Any mc occurrence replaces an alphabetic char.
53// mcfreq: (in [0,1]) expected fraction of the characters output that are mchar
54//
55int main(int argc, char ** argv) {
56
57    int toGen;
58    int mean;
59    int offset = 0;
60    double mcfreq = 0.0;
61    char mchar = '-';
62
63    assert(argc >= 3 && argc <= 6);
64    switch(argc) {
65        case 6:
66            assert(strlen(argv[5]) == 0);
67            mchar = argv[5][0];
68        case 5:
69            mcfreq = atof(argv[4]);
70            assert(mcfreq >= 0.0 && mcfreq <= 1.0);
71        case 4:
72            offset = atoi(argv[3]);
73            assert(offset >= 0 && offset < 10000);
74        default:
75            mean = atoi(argv[2]);
76            assert(mean > 0);
77            assert(mean < 1000);
78            toGen = atoi(argv[1]);
79            assert(toGen > 0);
80            assert(toGen < 1000000);
81    }
82
83    initialize(mean);
84    for( i; toGen ) {
85        emit1(offset, mcfreq, mchar);
86    }
87}
Note: See TracBrowser for help on using the repository browser.