Context Navigation

source: doc/theses/mike_brooks_MMath/benchmarks/string/process-allocn-attrib.py@ 7d02d35

Visit:

Last change on this file since 7d02d35 was 7d02d35, checked in by Mike Brooks <mlbrooks@…>, 4 months ago
Include benchmark changes for data production in string-plot data WIP. Missing from 2410424.
Property mode set to `100644`
File size: 3.7 KB

Line
1	import sys
2	import re
3	import pandas as pd
4	from collections import defaultdict
5
6	# Matched top to bottom, bailing on first match
7	# More general rules are not double-counted if they occur later
8	# Such cases are commented "overlap"; don't move those too far up
9	CATEGORY_RULES = {
10	"text-import": [
11	"_X19eagerCopyCtorHelperFv_S10string_resPKcm__1;_X12_constructorFv_S10string_resPKcm__1;__memmove_ssse3",
12	"_X19eagerCopyCtorHelperFv_S10string_resPKcm__1;_X12_constructorFv_S10string_resPKcm__1;__memcpy_ssse3",
13	"helper;__memcpy_ssse3",
14	# "strlen"
15	],
16	"gc": [
17	"_X19eagerCopyCtorHelperFv_S10string_resPKcm__1;_X12_constructorFv_S10string_resPKcm__1;_X7garbageFv_S9VbyteHeapi__1"
18	],
19	"malloc-free": [
20	"operator new;_X8doMallocFPv_mj__1",
21	"operator new;malloc",
22	"_X6doFreeFv_Pv__1",
23	"free"
24	],
25	"ctor-dtor": [
26	"std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >::_M_create",
27	"operator new", # overlap stl malloc-free
28	"operator delete",
29	"_X12_constructorFv_S10string_resPKcm__1" # overlap cfa text import
30	]
31	}
32
33	DEFAULT_CATEGORY = "other"
34
35	def classify_stack(stack):
36	for category, patterns in CATEGORY_RULES.items():
37	for pattern in patterns:
38	if pattern in stack:
39	return category
40	if re.search(r"_X6helperFv_i__1$", stack):
41	return "harness-leaf"
42	if re.search(r"helper$", stack):
43	return "harness-leaf"
44	return DEFAULT_CATEGORY
45
46	# def parse_sut_and_size(filename):
47	# # Extract SUT after "perfexp-" and before the next hyphen
48	# sut_match = re.search(r"perfexp-([a-zA-Z0-9]+)", filename)
49	# # Extract SIZE from "corpus-A-B-C.txt", capturing B
50	# size_match = re.search(r"corpus-\d+-(\d+)-\d+\.txt", filename)
51
52	# if not sut_match or not size_match:
53	# print("Error: Could not parse sut or size from filename.")
54	# sys.exit(1)
55
56	# return sut_match.group(1), size_match.group(1)
57
58	def read_and_aggregate(input_file):
59	category_map = defaultdict(lambda: defaultdict(int)) # category -> lineno -> sample_count
60	total_samples = 0
61
62	with open(input_file) as f:
63	for lineno, line in enumerate(f, 1):
64	line = line.strip()
65	if not line:
66	continue
67	*stack_parts, count_str = line.split()
68	count = int(count_str)
69	stack = ' '.join(stack_parts)
70	category = classify_stack(stack)
71	category_map[category][lineno] += count
72	total_samples += count
73
74	return category_map, total_samples
75
76	def flatten(category_map, total_samples): #, sut, size):
77	rows = []
78	for category, source_map in category_map.items():
79	samples_in_category = sum(source_map.values())
80	sources = "\|".join(f"{lineno}:{count}" for lineno, count in source_map.items())
81	fraction = samples_in_category / total_samples if total_samples else 0.0
82	rows.append({
83	# "sut": sut,
84	# "size": size,
85	"category": category,
86	"samples_in_category": samples_in_category,
87	"total_samples": total_samples,
88	"fraction": fraction,
89	"sources": sources
90	})
91	return pd.DataFrame(rows)
92
93	def main():
94	if len(sys.argv) != 2:
95	print("Usage: python3 process-allocn-attrib.py <input_file>")
96	sys.exit(1)
97
98	input_file = sys.argv[1]
99	# sut, size = parse_sut_and_size(input_file)
100	category_map, total_samples = read_and_aggregate(input_file)
101	df = flatten(category_map, total_samples) #, sut, size)
102
103	# Print the result to stdout in tab-separated format
104	df.to_csv(sys.stdout, sep="\t", index=False, header=False)
105
106	if __name__ == "__main__":
107	main()

Note: See TracBrowser for help on using the repository browser.

Download in other formats: