source: doc/theses/mike_brooks_MMath/benchmarks/string/string-length-summary.py@ 6fe4a7f

Last change on this file since 6fe4a7f was e0350e0, checked in by Michael Brooks <mlbrooks@…>, 4 months ago

Recent rework of string benchmarks

  • Property mode set to 100644
File size: 2.1 KB
Line 
1import sys
2import os
3from collections import Counter
4import math
5import statistics
6
7def load_string_lengths(filename):
8 with open(filename, 'r', encoding='utf-8') as f:
9 return [len(line.rstrip('\n')) for line in f]
10
11def compute_histogram(lengths, bucket_size):
12 histogram = Counter()
13 for length in lengths:
14 if length == 0:
15 bucket = 0
16 else:
17 bucket = ((length - 1) // bucket_size) * bucket_size + 1
18 histogram[bucket] += 1
19 return dict(sorted(histogram.items()))
20
21def print_histogram(histogram, bucket_size):
22 print("\nHistogram of string lengths:")
23 for bucket_start in histogram:
24 if bucket_start == 0:
25 label = " 0–0"
26 else:
27 bucket_end = bucket_start + bucket_size - 1
28 label = f"{bucket_start:>4}{bucket_end:<4}"
29 count = histogram[bucket_start]
30 print(f"{label}: {count}")
31
32def print_stats(lengths):
33 print("\nStatistical Summary:")
34 print(f" Count: {len(lengths)}")
35 print(f" Min: {min(lengths)}")
36 print(f" Max: {max(lengths)}")
37 print(f" Mean: {statistics.mean(lengths):.2f}")
38 print(f" Median: {statistics.median(lengths):.2f}")
39 print(f" Std Dev: {statistics.stdev(lengths):.2f}" if len(lengths) > 1 else " Std Dev: N/A (only one item)")
40
41def main():
42 if len(sys.argv) != 3:
43 print("Usage: python string_length_summary.py <filename> <bucket_size>")
44 sys.exit(1)
45
46 filename = sys.argv[1]
47 try:
48 bucket_size = int(sys.argv[2])
49 if bucket_size <= 0:
50 raise ValueError
51 except ValueError:
52 print("Error: Bucket size must be a positive integer.")
53 sys.exit(1)
54
55 if not os.path.exists(filename):
56 print(f"File not found: {filename}")
57 sys.exit(1)
58
59 lengths = load_string_lengths(filename)
60
61 if not lengths:
62 print("File is empty or contains no valid lines.")
63 sys.exit(0)
64
65 print_stats(lengths)
66 histogram = compute_histogram(lengths, bucket_size)
67 print_histogram(histogram, bucket_size)
68
69if __name__ == "__main__":
70 main()
Note: See TracBrowser for help on using the repository browser.