#include #include #include #include // generate random draws from a geometric distribution of the given mean // https://math.stackexchange.com/questions/485448/prove-the-way-to-generate-geometrically-distributed-random-numbers static double denom; static void initialize(int mean) { srand(getpid()); double p = 1.0 / (double) mean; denom = log(1-p); } static int nextGeoRand() { // ret = ⌊ln(U)/ln(1−p)⌋ where U ~ U(0, 1) double U = (double)rand() / (double)INT_MAX; return 1 + (int) (log(U) / denom); } // write a randomly generated alphabetic string whose length is drawn from above distribution static void emit1() { int lim = nextGeoRand(); // printf("==%d\n", lim); for (i; lim) { char emit = 'a' + (rand() % ('z'-'a')); printf("%c", emit); } printf("\n"); } // usage: ./make-corpus toGen mean int main(int argc, char ** argv) { assert(argc == 3); int toGen = atoi(argv[1]); assert(toGen > 0); assert(toGen < 1000000); int mean = atoi(argv[2]); assert(mean > 0); assert(mean < 1000); initialize(mean); for( i; toGen ) { emit1(); } }