1 | import sys
|
---|
2 | import os
|
---|
3 | from collections import Counter
|
---|
4 | import math
|
---|
5 | import statistics
|
---|
6 |
|
---|
7 | def load_string_lengths(filename):
|
---|
8 | with open(filename, 'r', encoding='utf-8') as f:
|
---|
9 | return [len(line.rstrip('\n')) for line in f]
|
---|
10 |
|
---|
11 | def compute_histogram(lengths, bucket_size):
|
---|
12 | histogram = Counter()
|
---|
13 | for length in lengths:
|
---|
14 | if length == 0:
|
---|
15 | bucket = 0
|
---|
16 | else:
|
---|
17 | bucket = ((length - 1) // bucket_size) * bucket_size + 1
|
---|
18 | histogram[bucket] += 1
|
---|
19 | return dict(sorted(histogram.items()))
|
---|
20 |
|
---|
21 | def print_histogram(histogram, bucket_size):
|
---|
22 | print("\nHistogram of string lengths:")
|
---|
23 | for bucket_start in histogram:
|
---|
24 | if bucket_start == 0:
|
---|
25 | label = " 0–0"
|
---|
26 | else:
|
---|
27 | bucket_end = bucket_start + bucket_size - 1
|
---|
28 | label = f"{bucket_start:>4}–{bucket_end:<4}"
|
---|
29 | count = histogram[bucket_start]
|
---|
30 | print(f"{label}: {count}")
|
---|
31 |
|
---|
32 | def print_stats(lengths):
|
---|
33 | print("\nStatistical Summary:")
|
---|
34 | print(f" Count: {len(lengths)}")
|
---|
35 | print(f" Min: {min(lengths)}")
|
---|
36 | print(f" Max: {max(lengths)}")
|
---|
37 | print(f" Mean: {statistics.mean(lengths):.2f}")
|
---|
38 | print(f" Median: {statistics.median(lengths):.2f}")
|
---|
39 | print(f" Std Dev: {statistics.stdev(lengths):.2f}" if len(lengths) > 1 else " Std Dev: N/A (only one item)")
|
---|
40 |
|
---|
41 | def main():
|
---|
42 | if len(sys.argv) != 3:
|
---|
43 | print("Usage: python string_length_summary.py <filename> <bucket_size>")
|
---|
44 | sys.exit(1)
|
---|
45 |
|
---|
46 | filename = sys.argv[1]
|
---|
47 | try:
|
---|
48 | bucket_size = int(sys.argv[2])
|
---|
49 | if bucket_size <= 0:
|
---|
50 | raise ValueError
|
---|
51 | except ValueError:
|
---|
52 | print("Error: Bucket size must be a positive integer.")
|
---|
53 | sys.exit(1)
|
---|
54 |
|
---|
55 | if not os.path.exists(filename):
|
---|
56 | print(f"File not found: {filename}")
|
---|
57 | sys.exit(1)
|
---|
58 |
|
---|
59 | lengths = load_string_lengths(filename)
|
---|
60 |
|
---|
61 | if not lengths:
|
---|
62 | print("File is empty or contains no valid lines.")
|
---|
63 | sys.exit(0)
|
---|
64 |
|
---|
65 | print_stats(lengths)
|
---|
66 | histogram = compute_histogram(lengths, bucket_size)
|
---|
67 | print_histogram(histogram, bucket_size)
|
---|
68 |
|
---|
69 | if __name__ == "__main__":
|
---|
70 | main()
|
---|