Context Navigation

source: doc/theses/mike_brooks_MMath/benchmarks/string/process-allocn-attrib.py@ 7d02d35

Visit:

Last change on this file since 7d02d35 was 7d02d35, checked in by Mike Brooks <mlbrooks@…>, 4 months ago
Include benchmark changes for data production in string-plot data WIP. Missing from 2410424.
Property mode set to `100644`
File size: 3.7 KB

Rev	Line
[7d02d35]	1	import sys
	2	import re
	3	import pandas as pd
	4	from collections import defaultdict
	5
	6	# Matched top to bottom, bailing on first match
	7	# More general rules are not double-counted if they occur later
	8	# Such cases are commented "overlap"; don't move those too far up
	9	CATEGORY_RULES = {
	10	"text-import": [
	11	"_X19eagerCopyCtorHelperFv_S10string_resPKcm__1;_X12_constructorFv_S10string_resPKcm__1;__memmove_ssse3",
	12	"_X19eagerCopyCtorHelperFv_S10string_resPKcm__1;_X12_constructorFv_S10string_resPKcm__1;__memcpy_ssse3",
	13	"helper;__memcpy_ssse3",
	14	# "strlen"
	15	],
	16	"gc": [
	17	"_X19eagerCopyCtorHelperFv_S10string_resPKcm__1;_X12_constructorFv_S10string_resPKcm__1;_X7garbageFv_S9VbyteHeapi__1"
	18	],
	19	"malloc-free": [
	20	"operator new;_X8doMallocFPv_mj__1",
	21	"operator new;malloc",
	22	"_X6doFreeFv_Pv__1",
	23	"free"
	24	],
	25	"ctor-dtor": [
	26	"std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >::_M_create",
	27	"operator new", # overlap stl malloc-free
	28	"operator delete",
	29	"_X12_constructorFv_S10string_resPKcm__1" # overlap cfa text import
	30	]
	31	}
	32
	33	DEFAULT_CATEGORY = "other"
	34
	35	def classify_stack(stack):
	36	for category, patterns in CATEGORY_RULES.items():
	37	for pattern in patterns:
	38	if pattern in stack:
	39	return category
	40	if re.search(r"_X6helperFv_i__1$", stack):
	41	return "harness-leaf"
	42	if re.search(r"helper$", stack):
	43	return "harness-leaf"
	44	return DEFAULT_CATEGORY
	45
	46	# def parse_sut_and_size(filename):
	47	# # Extract SUT after "perfexp-" and before the next hyphen
	48	# sut_match = re.search(r"perfexp-([a-zA-Z0-9]+)", filename)
	49	# # Extract SIZE from "corpus-A-B-C.txt", capturing B
	50	# size_match = re.search(r"corpus-\d+-(\d+)-\d+\.txt", filename)
	51
	52	# if not sut_match or not size_match:
	53	# print("Error: Could not parse sut or size from filename.")
	54	# sys.exit(1)
	55
	56	# return sut_match.group(1), size_match.group(1)
	57
	58	def read_and_aggregate(input_file):
	59	category_map = defaultdict(lambda: defaultdict(int)) # category -> lineno -> sample_count
	60	total_samples = 0
	61
	62	with open(input_file) as f:
	63	for lineno, line in enumerate(f, 1):
	64	line = line.strip()
	65	if not line:
	66	continue
	67	*stack_parts, count_str = line.split()
	68	count = int(count_str)
	69	stack = ' '.join(stack_parts)
	70	category = classify_stack(stack)
	71	category_map[category][lineno] += count
	72	total_samples += count
	73
	74	return category_map, total_samples
	75
	76	def flatten(category_map, total_samples): #, sut, size):
	77	rows = []
	78	for category, source_map in category_map.items():
	79	samples_in_category = sum(source_map.values())
	80	sources = "\|".join(f"{lineno}:{count}" for lineno, count in source_map.items())
	81	fraction = samples_in_category / total_samples if total_samples else 0.0
	82	rows.append({
	83	# "sut": sut,
	84	# "size": size,
	85	"category": category,
	86	"samples_in_category": samples_in_category,
	87	"total_samples": total_samples,
	88	"fraction": fraction,
	89	"sources": sources
	90	})
	91	return pd.DataFrame(rows)
	92
	93	def main():
	94	if len(sys.argv) != 2:
	95	print("Usage: python3 process-allocn-attrib.py <input_file>")
	96	sys.exit(1)
	97
	98	input_file = sys.argv[1]
	99	# sut, size = parse_sut_and_size(input_file)
	100	category_map, total_samples = read_and_aggregate(input_file)
	101	df = flatten(category_map, total_samples) #, sut, size)
	102
	103	# Print the result to stdout in tab-separated format
	104	df.to_csv(sys.stdout, sep="\t", index=False, header=False)
	105
	106	if __name__ == "__main__":
	107	main()

Note: See TracBrowser for help on using the repository browser.

Download in other formats: