source: tests/zombies/string-perf/prog-allocn.cfa @ d344a63

Last change on this file since d344a63 was 08ed947, checked in by Michael Brooks <mlbrooks@…>, 3 years ago

Roll up of string changes for performance testing/improvement, and a couple API features supporting them.

String API changes:
Defining a tuning knob to control the heap growth policy (relapaces former 10% hardcode, downgraded to a default)
Implementing findFrom (allowing find-non-first); leaving find as find-first.

String implementation perf improvements:
Calling C-malloc directly instead of via CFA-alloc.
Replacings loops that copy with memmove calls.
Replacings loops that search for a value with memchr calls.

String perf testing realized:
Makefile supporting several prog-*.cfa, chosen by OPERATION value (implies prog.cfa changes to support the adjusted protocol)
Adjusting the starter/accumulater declarations in PEQ and PTA to behave consistently in cfa v cpp.
Adding tests: allocation, find, normalize, pass-by-val, pass-by-x.
Adding helper shell scripts for: generating flame graphs, collecting/crunching allocation stats using Mubeen's malloc wrappers

  • Property mode set to 100644
File size: 5.8 KB
Line 
1#if defined IMPL_STL_NA_NA
2  #define IMPL_STL
3#endif
4
5#if defined IMPL_BUHR94_NA_NA
6  #define IMPL_BUHR94
7#endif
8
9#if defined IMPL_STL
10  #include <string>
11  #include <iostream>
12  #include <cstdio>
13  using namespace std;
14  #define IMPL_CXX
15
16#elif defined IMPL_CFA_HL_SHARE
17  #define IMPL_CFA_HL
18  #define IMPL_CFA
19
20#elif defined IMPL_CFA_LL_SHARE
21  #define IMPL_CFA_LL
22  #define IMPL_CFA
23
24#elif defined IMPL_CFA_HL_NOSHARE
25  #define IMPL_CFA_HL
26  #define CFA_NOSHARE
27  #define IMPL_CFA
28
29#elif defined IMPL_CFA_LL_NOSHARE
30  #define IMPL_CFA_LL
31  #define CFA_NOSHARE
32  #define IMPL_CFA
33
34#elif defined IMPL_BUHR94
35  #include <iostream>
36  #include <cstdio>
37  #include "/u0/mlbrooks/usys1/sm/string/StringSharing/src/string.h"
38  #define IMPL_CXX
39
40#else
41  #error Bad IMPL_
42#endif
43
44
45#if defined IMPL_CFA_HL
46  #include <string.hfa>
47  extern void TUNING_set_string_heap_liveness_threshold(double);  // in string_res.hfa
48#elif defined IMPL_CFA_LL
49  #include <string_res.hfa>
50#endif
51
52#if defined CFA_NOSHARE
53  #include <string_sharectx.hfa>
54  #define STRING_SHARING_CONTROL \
55    string_sharectx c = { NO_SHARING };
56#else
57  #define STRING_SHARING_CONTROL
58#endif
59
60#if defined IMPL_CFA
61  #include <math.hfa>
62  extern "C" {
63    void malloc_stats( void );
64  }
65#elif defined IMPL_CXX
66  #include <algorithm>
67  using std::min;
68  #include <malloc.h>
69#endif
70
71#include <time.h>
72#include <stdlib.h> // atoi
73#include <string.h> // strlen, only during setup
74
75#if defined IMPL_STL || defined IMPL_BUHR94
76    #define PRINT(s) std::cout << s << std::endl
77#elif defined IMPL_CFA_HL || defined IMPL_CFA_LL
78    #define PRINT(s) sout | s;
79#else
80    #error Unhandled print case
81#endif
82
83#if defined IMPL_CFA_LL
84    #define STRING_T string_res
85    #define ASSIGN_CHAR(str, idx, val) assignAt(str, idx, val)
86#else
87    #define STRING_T string
88    #define ASSIGN_CHAR(str, idx, val) str[idx] = val
89#endif
90
91double meanLen(int N, char ** strings) {
92    int totalLen = 0;
93    for (int i = 0 ; i < N; i ++) {
94        totalLen += strlen(strings[i]);
95    }
96    return (double)totalLen / (double)N;
97}
98
99volatile int checkthis = 0;
100#define MAYBE( op ) if (checkthis) { op; }
101
102int corpuslen = 0;
103char ** corpus = (char**) 0;
104size_t corpus_next_pos = 0;
105
106double repsPerLevel;
107double repBalance = 0.0000001;
108
109clock_t start, endTarget, end_actual;
110size_t allocationCountTarget = 0;
111
112size_t allocationCountActual = 0;
113//size_t allocationBytesActual = 0;
114
115void helper( int depth ) {
116
117    if (depth == 0) return;
118
119    corpus_next_pos += 1;
120    corpus_next_pos %= corpuslen;
121
122    STRING_T q = corpus[corpus_next_pos];
123//    ASSIGN_CHAR(q, 0, '@');    // Turns out my implementation is slow at this step.  A separate test could work it.  It's inessential to the allocation test, given the assumption that both string reps allocate eagerly in their heaps from a constant.  In the STL, that assumption is upheld by my observation that commenting out this line didn't speed it up.  In CFA-share, I know it to be true of the implementation.
124
125    allocationCountActual += 1;
126//    allocationBytesActual += q`len;
127
128//    if (depth > 0) {
129
130        repBalance += repsPerLevel;
131        int curRepLimit = repBalance;
132        repBalance -= curRepLimit;
133
134        for ( int i = 0 ;  i < curRepLimit;  i++ ) {
135            if ((allocationCountActual+1) % allocationCountTarget == 0 && clock() > endTarget) return;
136            helper(depth-1);
137        }
138//    }
139
140    MAYBE(PRINT(q));
141}
142
143int main( int argc, char ** argv ) {
144
145    STRING_SHARING_CONTROL
146
147
148    const char * usage_args[] = {"Depth RepsPerLevel ExpansionThreshold    ExecTimeSecs   Corpus...",
149                                 "Depth RepsPerLevel ExpansionThreshold -w WorkAllocCount Corpus..."};
150    const int static_arg_posns = 5;
151    int used_arg_posns = static_arg_posns;
152
153    int launchDepth = -1;
154    double expansionThreshold = -1.0;
155    int execTimeSecs = -1;
156
157    switch (min(argc, static_arg_posns)) {
158      case 5: if ( strcmp(argv[4], "-w") == 0 ) {
159                used_arg_posns ++;
160                allocationCountTarget = atoi(argv[5]);
161              } else {
162                execTimeSecs = atoi(argv[4]);
163              }
164      case 4: expansionThreshold = atof(argv[3]);
165      case 3: repsPerLevel = atof(argv[2]);
166      case 2: launchDepth = atoi(argv[1]);
167    }
168
169    corpuslen = argc - used_arg_posns;
170    corpus = argv + used_arg_posns;
171
172    if (launchDepth < 1 || repsPerLevel < 1.0 || (execTimeSecs < 1 && allocationCountTarget < 1) || corpuslen < 1 ||
173        (expansionThreshold != -1.0 && (expansionThreshold <= 0.0 || expansionThreshold >= 1.0))) {
174      for (int u = 0; u < sizeof(usage_args) / sizeof(*usage_args); u++) {
175        printf("usage: %s %s\n", argv[0], usage_args[u]);
176      }
177      printf("output:\nxxx,corpusItemCount,corpusMeanLenChars,allocationCountActual,execTimeActualSec,topIters\n");
178      exit(1);
179    }
180
181    if (expansionThreshold != -1.0 ) {
182      #if defined IMPL_CFA
183      TUNING_set_string_heap_liveness_threshold(expansionThreshold);
184      #else
185      printf("cannot set expansion threshold on non-CFA implementation");
186      exit(1);
187      #endif
188    }
189
190    double meanCorpusLen = meanLen(corpuslen, corpus);
191
192    // time driven experiment: re-check time every 10000 allocations
193    if (execTimeSecs > 0) allocationCountTarget = 10000;
194
195    start = clock();
196    endTarget = start + CLOCKS_PER_SEC * max(0, execTimeSecs);
197
198    size_t top_iters = 0;
199
200    for(;;) {
201            #if defined OP_PALL
202                helper( launchDepth );
203            #else
204                #error Bad OP_
205            #endif
206
207            top_iters++;
208
209            if ((allocationCountActual+1) % allocationCountTarget == 0 && clock() > endTarget) break;
210    }
211    end_actual = clock();
212    double elapsed = ((double) (end_actual - start)) / CLOCKS_PER_SEC;
213    printf("xxx,%d,%f,%ld,%f,%ld\n", corpuslen, meanCorpusLen, allocationCountActual, elapsed, top_iters);
214
215    // malloc_stats();
216
217    return 0;
218}
Note: See TracBrowser for help on using the repository browser.