source: doc/theses/mike_brooks_MMath/benchmarks/string/process-allocn-attrib.py@ 7d02d35

Last change on this file since 7d02d35 was 7d02d35, checked in by Mike Brooks <mlbrooks@…>, 4 months ago

Include benchmark changes for data production in string-plot data WIP. Missing from 2410424.

  • Property mode set to 100644
File size: 3.7 KB
Line 
1import sys
2import re
3import pandas as pd
4from collections import defaultdict
5
6# Matched top to bottom, bailing on first match
7# More general rules are not double-counted if they occur later
8# Such cases are commented "overlap"; don't move those too far up
9CATEGORY_RULES = {
10 "text-import": [
11 "_X19eagerCopyCtorHelperFv_S10string_resPKcm__1;_X12_constructorFv_S10string_resPKcm__1;__memmove_ssse3",
12 "_X19eagerCopyCtorHelperFv_S10string_resPKcm__1;_X12_constructorFv_S10string_resPKcm__1;__memcpy_ssse3",
13 "helper;__memcpy_ssse3",
14# "strlen"
15 ],
16 "gc": [
17 "_X19eagerCopyCtorHelperFv_S10string_resPKcm__1;_X12_constructorFv_S10string_resPKcm__1;_X7garbageFv_S9VbyteHeapi__1"
18 ],
19 "malloc-free": [
20 "operator new;_X8doMallocFPv_mj__1",
21 "operator new;malloc",
22 "_X6doFreeFv_Pv__1",
23 "free"
24 ],
25 "ctor-dtor": [
26 "std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >::_M_create",
27 "operator new", # overlap stl malloc-free
28 "operator delete",
29 "_X12_constructorFv_S10string_resPKcm__1" # overlap cfa text import
30 ]
31}
32
33DEFAULT_CATEGORY = "other"
34
35def classify_stack(stack):
36 for category, patterns in CATEGORY_RULES.items():
37 for pattern in patterns:
38 if pattern in stack:
39 return category
40 if re.search(r"_X6helperFv_i__1$", stack):
41 return "harness-leaf"
42 if re.search(r"helper$", stack):
43 return "harness-leaf"
44 return DEFAULT_CATEGORY
45
46# def parse_sut_and_size(filename):
47# # Extract SUT after "perfexp-" and before the next hyphen
48# sut_match = re.search(r"perfexp-([a-zA-Z0-9]+)", filename)
49# # Extract SIZE from "corpus-A-B-C.txt", capturing B
50# size_match = re.search(r"corpus-\d+-(\d+)-\d+\.txt", filename)
51
52# if not sut_match or not size_match:
53# print("Error: Could not parse sut or size from filename.")
54# sys.exit(1)
55
56# return sut_match.group(1), size_match.group(1)
57
58def read_and_aggregate(input_file):
59 category_map = defaultdict(lambda: defaultdict(int)) # category -> lineno -> sample_count
60 total_samples = 0
61
62 with open(input_file) as f:
63 for lineno, line in enumerate(f, 1):
64 line = line.strip()
65 if not line:
66 continue
67 *stack_parts, count_str = line.split()
68 count = int(count_str)
69 stack = ' '.join(stack_parts)
70 category = classify_stack(stack)
71 category_map[category][lineno] += count
72 total_samples += count
73
74 return category_map, total_samples
75
76def flatten(category_map, total_samples): #, sut, size):
77 rows = []
78 for category, source_map in category_map.items():
79 samples_in_category = sum(source_map.values())
80 sources = "|".join(f"{lineno}:{count}" for lineno, count in source_map.items())
81 fraction = samples_in_category / total_samples if total_samples else 0.0
82 rows.append({
83# "sut": sut,
84# "size": size,
85 "category": category,
86 "samples_in_category": samples_in_category,
87 "total_samples": total_samples,
88 "fraction": fraction,
89 "sources": sources
90 })
91 return pd.DataFrame(rows)
92
93def main():
94 if len(sys.argv) != 2:
95 print("Usage: python3 process-allocn-attrib.py <input_file>")
96 sys.exit(1)
97
98 input_file = sys.argv[1]
99 # sut, size = parse_sut_and_size(input_file)
100 category_map, total_samples = read_and_aggregate(input_file)
101 df = flatten(category_map, total_samples) #, sut, size)
102
103 # Print the result to stdout in tab-separated format
104 df.to_csv(sys.stdout, sep="\t", index=False, header=False)
105
106if __name__ == "__main__":
107 main()
Note: See TracBrowser for help on using the repository browser.