Ignore:
Timestamp:
Feb 23, 2022, 6:13:02 PM (3 years ago)
Author:
Michael Brooks <mlbrooks@…>
Branches:
ADT, ast-experimental, enum, master, pthread-emulation, qualifiedEnum
Children:
afe9e45, c5af4f9
Parents:
cc7bbe6
Message:

Roll up of string changes for performance testing/improvement, and a couple API features supporting them.

String API changes:
Defining a tuning knob to control the heap growth policy (relapaces former 10% hardcode, downgraded to a default)
Implementing findFrom (allowing find-non-first); leaving find as find-first.

String implementation perf improvements:
Calling C-malloc directly instead of via CFA-alloc.
Replacings loops that copy with memmove calls.
Replacings loops that search for a value with memchr calls.

String perf testing realized:
Makefile supporting several prog-*.cfa, chosen by OPERATION value (implies prog.cfa changes to support the adjusted protocol)
Adjusting the starter/accumulater declarations in PEQ and PTA to behave consistently in cfa v cpp.
Adding tests: allocation, find, normalize, pass-by-val, pass-by-x.
Adding helper shell scripts for: generating flame graphs, collecting/crunching allocation stats using Mubeen's malloc wrappers

Location:
tests/zombies/string-perf
Files:
13 added
4 edited

Legend:

Unmodified
Added
Removed
  • tests/zombies/string-perf/.gitignore

    rcc7bbe6 r08ed947  
    11!Makefile
    22perfexp-*
     3make-corpus
     4corpus-*.txt
     5corpusx-*.txt
     6measurement-*.csv
  • tests/zombies/string-perf/Makefile

    rcc7bbe6 r08ed947  
    55CFA = $(CFABUILD)/driver/cfa
    66PERFFLAGS_CFA = -nodebug -O2
    7 PERFFLAGS_CXX = -DNDEBUG -O2
     7PERFFLAGS_CXX = -DNDEBUG -O2 -Wl,--no-as-needed -ldl
    88
    99
     
    3737endef
    3838
    39 OPERATIONS=pta peq
     39OPERATIONS=pta peq pbv pall pno
    4040ALLOCS=reuse fresh
    4141CFA_APILEVELS=hl ll
     
    4444
    4545ifneq ($(filter cfa,$(PLATFORMS)),)
    46         CFA_PERFPROGS=$(call hyphCross5,perfexp-cfa,$(CFA_APILEVELS),$(OPERATIONS),$(CFA_SHARINGS),$(ALLOCS))
     46    CFA_APIS=$(call hyphCross,$(CFA_APILEVELS),$(CFA_SHARINGS))
    4747endif
    4848
    4949ifneq ($(filter stl,$(PLATFORMS)),)
    50         STL_PERFPROGS=$(call hyphCross3,perfexp-stl,$(OPERATIONS),$(ALLOCS))
     50    STL_APIS=na-na
    5151endif
    5252
    5353ifneq ($(filter buhr94,$(PLATFORMS)),)
    54         BUHR94_PERFPROGS=$(call hyphCross3,perfexp-buhr94,$(OPERATIONS),$(ALLOCS))
     54    BUHR94_APIS=na-na
    5555endif
     56
     57APIS = $(CFA_APIS) $(STL_APIS) $(BUHR94_APIS)
     58
     59OPERATIONS_USING_ALLOCS=pta peq
     60define enrichOperationsAllocs
     61$(call hyphCross3,$(filter peq pta,$(OPERATIONS)),$(1),$(ALLOCS)) $(call hyphCross3,$(filter-out peq pta,$(OPERATIONS)),$(1),na)
     62endef
     63
     64CFA_PERFPROGS=$(call hyphCross,perfexp-cfa,$(call enrichOperationsAllocs,$(CFA_APIS)))
     65STL_PERFPROGS=$(call hyphCross,perfexp-stl,$(call enrichOperationsAllocs,$(STL_APIS)))
     66BUHR94_PERFPROGS=$(call hyphCross,perfexp-buhr94,$(call enrichOperationsAllocs,$(BUHR94_APIS)))
    5667
    5768PERFPROGS = $(CFA_PERFPROGS) $(STL_PERFPROGS) $(BUHR94_PERFPROGS)
     
    5970all : $(PERFPROGS)
    6071
     72PP_SPLIT := $(shell echo "${PERFPROGS}" | sed -e 's/ /\\n/g')
     73echoPerfProgs:
     74        echo -e '$(PP_SPLIT)'
    6175
     76perfexp-%.o: API=$(call ucHyphProj,$@,2)
     77perfexp-%.o: OPERATION=$(call ucHyphProj,$@,3)
     78perfexp-%.o: CFA_APILEVEL=$(call ucHyphProj,$@,4)
     79perfexp-%.o: CFA_SHARING=$(call ucHyphProj,$@,5)
     80perfexp-%.o: ALLOC=$(call ucHyphProj,$@,6)
     81perfexp-%.o: SCENARIO_SWITCH=-DIMPL_$(API)_$(CFA_APILEVEL)_$(CFA_SHARING) -DOP_$(OPERATION) -DALLOC_$(ALLOC)
    6282
    63 perfexp-cfa-%: CFA_APILEVEL=$(call ucHyphProj,$@,3)
    64 perfexp-cfa-%: OPERATION=$(call ucHyphProj,$@,4)
    65 perfexp-cfa-%: CFA_SHARING=$(call ucHyphProj,$@,5)
    66 perfexp-cfa-%: ALLOC=$(call ucHyphProj,$@,6)
    67 perfexp-cfa-%: prog.cfa $(LIBCFA)
    68         $(CFA) $(PERFFLAGS_CFA) $< -o $@ -DIMPL_CFA_$(CFA_APILEVEL)_$(CFA_SHARING) -DOP_$(OPERATION) -DALLOC_$(ALLOC)
     83perfexp-cfa-%.o: CMD=$(CFA) -c $(PERFFLAGS_CFA) $< -o $@ $(SCENARIO_SWITCH)
     84perfexp-stl-%.o: CMD=$(CXX) -c -xc++ $(PERFFLAGS_CXX) $< -o $@ $(SCENARIO_SWITCH)
     85perfexp-buhr94-%.o: CMD=$(CXX) -xc++ -c $(PERFFLAGS_CXX) $< -o $@ $(SCENARIO_SWITCH)
    6986
    70 perfexp-stl-%: OPERATION=$(call ucHyphProj,$@,3)
    71 perfexp-stl-%: ALLOC=$(call ucHyphProj,$@,4)
    72 perfexp-stl-%: prog.cfa
    73         $(CXX) -xc++ $(PERFFLAGS_CXX) $< -o $@ -DIMPL_STL -DOP_$(OPERATION) -DALLOC_$(ALLOC)
     87perfexp-cfa-peq-%.o: prog.cfa $(LIBCFA)
     88        $(CMD)
     89perfexp-cfa-pta-%.o: prog.cfa $(LIBCFA)
     90        $(CMD)
     91perfexp-cfa-pbv-%.o: prog-passbyval.cfa $(LIBCFA)
     92        $(CMD)
     93perfexp-cfa-pb%.o: prog-passbyX.cfa $(LIBCFA)
     94        $(CMD)
     95perfexp-cfa-pfi-%.o: prog-find.cfa $(LIBCFA)
     96        $(CMD)
     97perfexp-cfa-pall-%.o: prog-allocn.cfa $(LIBCFA)
     98        $(CMD)
     99perfexp-cfa-pno-%.o: prog-normalize.cfa $(LIBCFA)
     100        $(CMD)
     101perfexp-stl-peq-%.o: prog.cfa
     102        $(CMD)
     103perfexp-stl-pta-%.o: prog.cfa
     104        $(CMD)
     105perfexp-stl-pbv-%.o: prog-passbyval.cfa
     106        $(CMD)
     107perfexp-stl-pfi-%.o: prog-find.cfa
     108        $(CMD)
     109perfexp-stl-pall-%.o: prog-allocn.cfa
     110        $(CMD)
     111perfexp-stl-pno-%.o: prog-normalize.cfa
     112        $(CMD)
     113perfexp-buhr94-peq-%.o: prog.cfa buhr94-string.o buhr94-VbyteSM.o
     114        $(CMD)
     115perfexp-buhr94-pta-%.o: prog.cfa buhr94-string.o buhr94-VbyteSM.o
     116        $(CMD)
     117perfexp-buhr94-pta-%.o: prog-passbyval.cfa buhr94-string.o buhr94-VbyteSM.o
     118        $(CMD)
     119perfexp-buhr94-pall-%.o: prog-allocn.cfa buhr94-string.o buhr94-VbyteSM.o
     120        $(CMD)
     121perfexp-buhr94-pno-%.o: prog-normalize.cfa buhr94-string.o buhr94-VbyteSM.o
     122        $(CMD)
    74123
    75 perfexp-buhr94-%.o: OPERATION=$(call ucHyphProj,$@,3)
    76 perfexp-buhr94-%.o: ALLOC=$(call ucHyphProj,$@,4)
    77 perfexp-buhr94-%.o: prog.cfa
    78         $(CXX) -xc++ -c $(PERFFLAGS_CXX) $< -o $@ -DIMPL_BUHR94 -DOP_$(OPERATION) -DALLOC_$(ALLOC)
     124# one of the pbx cases also needs to link with not_string_res.o (handling manually)
     125perfexp-cfa-%: perfexp-cfa-%.o $(LIBCFA)
     126        $(CFA) $(PERFFLAGS_CFA) $< -o $@ 
     127perfexp-stl-%: perfexp-stl-%.o $(LIBCFA)
     128        $(CFA) $(PERFFLAGS_CFA) $< /lib/x86_64-linux-gnu/libstdc++.so.6 -o $@
     129perfexp-buhr94-% : perfexp-buhr94-%.o buhr94-string.o buhr94-VbyteSM.o
     130        $(CXX) $(PERFFLAGS_CXX) $^ -o $@
    79131
    80132buhr94-string.o:
     
    83135buhr94-VbyteSM.o:
    84136        $(CXX) -xc++ -c $(PERFFLAGS_CXX) ~/usys1/sm/string/StringSharing/src/VbyteSM.cc -o $@
    85 
    86 perfexp-buhr94-% : perfexp-buhr94-%.o buhr94-string.o buhr94-VbyteSM.o
    87         $(CXX) $(PERFFLAGS_CXX) $^ -o $@
    88137
    89138clean:
     
    104153                done ; \
    105154        done
     155#                       printed=`./$$prog 10000 - 10 $$corpusbody` ; \
     156
     157CFA_EXPANSIONS=0.02 0.05 0.1 0.2 0.5 0.9
     158
     159measurement2: $(MEASURE)
     160        tofile=measurement-`date '+%F--%H-%M-%S'`.csv ; \
     161        for prog in $(MEASURE) ; do \
     162            for corpus in $(CORPORI) ; do \
     163                        for expansion in $(CFA_EXPANSIONS) ; do \
     164                                corpusbody= ; \
     165                                echo ./$$prog 1000 1.006 $$expansion 10 \`cat $$corpus\` ; \
     166                        done ; \
     167                done ; \
     168        done ; \
     169        echo $$tofile ; \
     170        for prog in $(MEASURE) ; do \
     171            for corpus in $(CORPORI) ; do \
     172                        for expansion in $(CFA_EXPANSIONS) ; do \
     173                                corpusbody=`cat $$corpus` ; \
     174                                printed=`./$$prog 1000 1.006 $$expansion 10 $$corpusbody` ; \
     175                                echo $$prog,$$corpus,$$expansion,$$printed  >>  $$tofile ; \
     176                                echo $$prog,$$corpus,$$expansion,$$printed  ; \
     177                        done ; \
     178                done ; \
     179        done
     180
     181measurement3: $(MEASURE)
     182        for prog in $(MEASURE) ; do \
     183            for corpus in $(CORPORI) ; do \
     184                        for expansion in $(CFA_EXPANSIONS) ; do \
     185                                corpusbody=`cat $$corpus` ; \
     186                                LD_PRELOAD=~/plg2/mubeen-stat-shim/malloc/mallocWrappers.so ./$$prog 1000 1.006 $$expansion 1 $$corpusbody ; \
     187                                mv preload_dump.txt preload_dump--qrun1--$$corpus--expansion-$$expansion.txt ; \
     188                        done ; \
     189                done ; \
     190        done
  • tests/zombies/string-perf/make-corpus.cfa

    rcc7bbe6 r08ed947  
    33#include <limits.h>
    44#include <unistd.h>
     5#include <string.h>
     6
     7// U(0,1)
     8static double U() {
     9    return (double)rand() / (double)INT_MAX;
     10}
    511
    612// generate random draws from a geometric distribution of the given mean
     
    1420static int nextGeoRand() {
    1521    // ret = ⌊ln(U)/ln(1−p)⌋ where U ~ U(0, 1)
    16     double U = (double)rand() / (double)INT_MAX;
    17     return 1 + (int) (log(U) / denom);
     22    return 1 + (int) (log(U()) / denom);
    1823}
    1924
    20 // write a randomly generated alphabetic string whose length is drawn from above distribution
    21 static void emit1() {
    22     int lim = nextGeoRand();
     25// write a randomly generated alphabetic string whose length is adjused from a draw of the above distribution
     26static void emit1( int offset, double mcfreq, char mchar ) {
     27    int lim = offset + nextGeoRand();
    2328    // printf("==%d\n", lim);
    2429    for (i; lim) {
    25         char emit = 'a' + (rand() % ('z'-'a'));
     30        char emit;
     31        if (U() < mcfreq) emit = mchar;
     32        else emit = 'a' + (rand() % ('z'-'a'));
    2633        printf("%c", emit);
    2734    }
     
    2936}
    3037
    31 // usage: ./make-corpus toGen mean
     38// usage: ./make-corpus toGen mean [offset=0] [mcfreq=0.0] [mchar='-']
     39//
     40// Outputs alphabetic (plus magic-char) strings, one per line.
     41// toGen: number of strings (lines)
     42//
     43// generated length ~  offset + geo(mean)
     44//                  >= 1
     45//
     46// offset=0,  mean=1:  constant length 1
     47// offset=0,  mean=2:  lengths go like number of coin tosses it takes to get heads
     48// offset=0,  mean=6:  lengths go like number of cube die rolls it takes to get :::
     49// offset=15, mean=1:  constant length 16
     50// offset=15, mean=2:  population's minimum is 16 and mean is 17
     51//
     52// Magic Char (mc) does not affect these lengths.  Any mc occurrence replaces an alphabetic char.
     53// mcfreq: (in [0,1]) expected fraction of the characters output that are mchar
     54//
    3255int main(int argc, char ** argv) {
    33     assert(argc == 3);
    3456
    35     int toGen = atoi(argv[1]);
    36     assert(toGen > 0);
    37     assert(toGen < 1000000);
     57    int toGen;
     58    int mean;
     59    int offset = 0;
     60    double mcfreq = 0.0;
     61    char mchar = '-';
    3862
    39     int mean = atoi(argv[2]);
    40     assert(mean > 0);
    41     assert(mean < 1000);
     63    assert(argc >= 3 && argc <= 6);
     64    switch(argc) {
     65        case 6:
     66            assert(strlen(argv[5]) == 0);
     67            mchar = argv[5][0];
     68        case 5:
     69            mcfreq = atof(argv[4]);
     70            assert(mcfreq >= 0.0 && mcfreq <= 1.0);
     71        case 4:
     72            offset = atoi(argv[3]);
     73            assert(offset >= 0 && offset < 10000);
     74        default:
     75            mean = atoi(argv[2]);
     76            assert(mean > 0);
     77            assert(mean < 1000);
     78            toGen = atoi(argv[1]);
     79            assert(toGen > 0);
     80            assert(toGen < 1000000);
     81    }
    4282
    4383    initialize(mean);
    4484    for( i; toGen ) {
    45         emit1();
     85        emit1(offset, mcfreq, mchar);
    4686    }
    4787}
  • tests/zombies/string-perf/prog.cfa

    rcc7bbe6 r08ed947  
     1#if defined IMPL_STL_NA_NA
     2  #define IMPL_STL
     3#endif
     4
     5#if defined IMPL_BUHR94_NA_NA
     6  #define IMPL_BUHR94
     7#endif
    18
    29#if defined IMPL_STL
     
    5259#if defined IMPL_CFA
    5360  #include <math.hfa>
     61  extern "C" {
     62    void malloc_stats( void );
     63  }
    5464#elif defined IMPL_CXX
    5565  #include <algorithm>
    5666  using std::min;
     67  #include <malloc.h>
    5768#endif
    5869
     
    114125    #if defined IMPL_CFA_LL
    115126      #define DECLS \
    116         string_res initval = "starter"; \
    117         string_res accum = { initval, COPY_VALUE };
     127        const char* initval = "starter"; \
     128        string_res accum = initval;
    118129    #else
    119130      #define DECLS \
    120         string initval = "starter"; \
     131        const char* initval = "starter"; \
    121132        string accum = initval;
    122133    #endif
     
    149160              #elif defined OP_PEQ
    150161                 accum += toAppend;
     162              #else
     163                 #error Bad OP_
    151164              #endif
    152165            }
     
    157170    printf("%d,%d,%f,%d,%f\n", concatsPerReset, corpuslen, meanCorpusLen, concatsDone, elapsed);
    158171
     172    // malloc_stats();
     173
    159174    return 0;
    160175}
Note: See TracChangeset for help on using the changeset viewer.