[7d02d35] | 1 | import sys
|
---|
| 2 | import re
|
---|
| 3 | import pandas as pd
|
---|
| 4 | from collections import defaultdict
|
---|
| 5 |
|
---|
| 6 | # Matched top to bottom, bailing on first match
|
---|
| 7 | # More general rules are not double-counted if they occur later
|
---|
| 8 | # Such cases are commented "overlap"; don't move those too far up
|
---|
| 9 | CATEGORY_RULES = {
|
---|
| 10 | "text-import": [
|
---|
| 11 | "_X19eagerCopyCtorHelperFv_S10string_resPKcm__1;_X12_constructorFv_S10string_resPKcm__1;__memmove_ssse3",
|
---|
| 12 | "_X19eagerCopyCtorHelperFv_S10string_resPKcm__1;_X12_constructorFv_S10string_resPKcm__1;__memcpy_ssse3",
|
---|
| 13 | "helper;__memcpy_ssse3",
|
---|
| 14 | # "strlen"
|
---|
| 15 | ],
|
---|
| 16 | "gc": [
|
---|
| 17 | "_X19eagerCopyCtorHelperFv_S10string_resPKcm__1;_X12_constructorFv_S10string_resPKcm__1;_X7garbageFv_S9VbyteHeapi__1"
|
---|
| 18 | ],
|
---|
| 19 | "malloc-free": [
|
---|
| 20 | "operator new;_X8doMallocFPv_mj__1",
|
---|
| 21 | "operator new;malloc",
|
---|
| 22 | "_X6doFreeFv_Pv__1",
|
---|
| 23 | "free"
|
---|
| 24 | ],
|
---|
| 25 | "ctor-dtor": [
|
---|
| 26 | "std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >::_M_create",
|
---|
| 27 | "operator new", # overlap stl malloc-free
|
---|
| 28 | "operator delete",
|
---|
| 29 | "_X12_constructorFv_S10string_resPKcm__1" # overlap cfa text import
|
---|
| 30 | ]
|
---|
| 31 | }
|
---|
| 32 |
|
---|
| 33 | DEFAULT_CATEGORY = "other"
|
---|
| 34 |
|
---|
| 35 | def classify_stack(stack):
|
---|
| 36 | for category, patterns in CATEGORY_RULES.items():
|
---|
| 37 | for pattern in patterns:
|
---|
| 38 | if pattern in stack:
|
---|
| 39 | return category
|
---|
| 40 | if re.search(r"_X6helperFv_i__1$", stack):
|
---|
| 41 | return "harness-leaf"
|
---|
| 42 | if re.search(r"helper$", stack):
|
---|
| 43 | return "harness-leaf"
|
---|
| 44 | return DEFAULT_CATEGORY
|
---|
| 45 |
|
---|
| 46 | # def parse_sut_and_size(filename):
|
---|
| 47 | # # Extract SUT after "perfexp-" and before the next hyphen
|
---|
| 48 | # sut_match = re.search(r"perfexp-([a-zA-Z0-9]+)", filename)
|
---|
| 49 | # # Extract SIZE from "corpus-A-B-C.txt", capturing B
|
---|
| 50 | # size_match = re.search(r"corpus-\d+-(\d+)-\d+\.txt", filename)
|
---|
| 51 |
|
---|
| 52 | # if not sut_match or not size_match:
|
---|
| 53 | # print("Error: Could not parse sut or size from filename.")
|
---|
| 54 | # sys.exit(1)
|
---|
| 55 |
|
---|
| 56 | # return sut_match.group(1), size_match.group(1)
|
---|
| 57 |
|
---|
| 58 | def read_and_aggregate(input_file):
|
---|
| 59 | category_map = defaultdict(lambda: defaultdict(int)) # category -> lineno -> sample_count
|
---|
| 60 | total_samples = 0
|
---|
| 61 |
|
---|
| 62 | with open(input_file) as f:
|
---|
| 63 | for lineno, line in enumerate(f, 1):
|
---|
| 64 | line = line.strip()
|
---|
| 65 | if not line:
|
---|
| 66 | continue
|
---|
| 67 | *stack_parts, count_str = line.split()
|
---|
| 68 | count = int(count_str)
|
---|
| 69 | stack = ' '.join(stack_parts)
|
---|
| 70 | category = classify_stack(stack)
|
---|
| 71 | category_map[category][lineno] += count
|
---|
| 72 | total_samples += count
|
---|
| 73 |
|
---|
| 74 | return category_map, total_samples
|
---|
| 75 |
|
---|
| 76 | def flatten(category_map, total_samples): #, sut, size):
|
---|
| 77 | rows = []
|
---|
| 78 | for category, source_map in category_map.items():
|
---|
| 79 | samples_in_category = sum(source_map.values())
|
---|
| 80 | sources = "|".join(f"{lineno}:{count}" for lineno, count in source_map.items())
|
---|
| 81 | fraction = samples_in_category / total_samples if total_samples else 0.0
|
---|
| 82 | rows.append({
|
---|
| 83 | # "sut": sut,
|
---|
| 84 | # "size": size,
|
---|
| 85 | "category": category,
|
---|
| 86 | "samples_in_category": samples_in_category,
|
---|
| 87 | "total_samples": total_samples,
|
---|
| 88 | "fraction": fraction,
|
---|
| 89 | "sources": sources
|
---|
| 90 | })
|
---|
| 91 | return pd.DataFrame(rows)
|
---|
| 92 |
|
---|
| 93 | def main():
|
---|
| 94 | if len(sys.argv) != 2:
|
---|
| 95 | print("Usage: python3 process-allocn-attrib.py <input_file>")
|
---|
| 96 | sys.exit(1)
|
---|
| 97 |
|
---|
| 98 | input_file = sys.argv[1]
|
---|
| 99 | # sut, size = parse_sut_and_size(input_file)
|
---|
| 100 | category_map, total_samples = read_and_aggregate(input_file)
|
---|
| 101 | df = flatten(category_map, total_samples) #, sut, size)
|
---|
| 102 |
|
---|
| 103 | # Print the result to stdout in tab-separated format
|
---|
| 104 | df.to_csv(sys.stdout, sep="\t", index=False, header=False)
|
---|
| 105 |
|
---|
| 106 | if __name__ == "__main__":
|
---|
| 107 | main()
|
---|