[e0350e0] | 1 | import sys
|
---|
| 2 | import os
|
---|
| 3 | from collections import Counter
|
---|
| 4 | import math
|
---|
| 5 | import statistics
|
---|
| 6 |
|
---|
| 7 | def load_string_lengths(filename):
|
---|
| 8 | with open(filename, 'r', encoding='utf-8') as f:
|
---|
| 9 | return [len(line.rstrip('\n')) for line in f]
|
---|
| 10 |
|
---|
| 11 | def compute_histogram(lengths, bucket_size):
|
---|
| 12 | histogram = Counter()
|
---|
| 13 | for length in lengths:
|
---|
| 14 | if length == 0:
|
---|
| 15 | bucket = 0
|
---|
| 16 | else:
|
---|
| 17 | bucket = ((length - 1) // bucket_size) * bucket_size + 1
|
---|
| 18 | histogram[bucket] += 1
|
---|
| 19 | return dict(sorted(histogram.items()))
|
---|
| 20 |
|
---|
| 21 | def print_histogram(histogram, bucket_size):
|
---|
| 22 | print("\nHistogram of string lengths:")
|
---|
| 23 | for bucket_start in histogram:
|
---|
| 24 | if bucket_start == 0:
|
---|
| 25 | label = " 0–0"
|
---|
| 26 | else:
|
---|
| 27 | bucket_end = bucket_start + bucket_size - 1
|
---|
| 28 | label = f"{bucket_start:>4}–{bucket_end:<4}"
|
---|
| 29 | count = histogram[bucket_start]
|
---|
| 30 | print(f"{label}: {count}")
|
---|
| 31 |
|
---|
| 32 | def print_stats(lengths):
|
---|
| 33 | print("\nStatistical Summary:")
|
---|
| 34 | print(f" Count: {len(lengths)}")
|
---|
| 35 | print(f" Min: {min(lengths)}")
|
---|
| 36 | print(f" Max: {max(lengths)}")
|
---|
| 37 | print(f" Mean: {statistics.mean(lengths):.2f}")
|
---|
| 38 | print(f" Median: {statistics.median(lengths):.2f}")
|
---|
| 39 | print(f" Std Dev: {statistics.stdev(lengths):.2f}" if len(lengths) > 1 else " Std Dev: N/A (only one item)")
|
---|
| 40 |
|
---|
| 41 | def main():
|
---|
| 42 | if len(sys.argv) != 3:
|
---|
| 43 | print("Usage: python string_length_summary.py <filename> <bucket_size>")
|
---|
| 44 | sys.exit(1)
|
---|
| 45 |
|
---|
| 46 | filename = sys.argv[1]
|
---|
| 47 | try:
|
---|
| 48 | bucket_size = int(sys.argv[2])
|
---|
| 49 | if bucket_size <= 0:
|
---|
| 50 | raise ValueError
|
---|
| 51 | except ValueError:
|
---|
| 52 | print("Error: Bucket size must be a positive integer.")
|
---|
| 53 | sys.exit(1)
|
---|
| 54 |
|
---|
| 55 | if not os.path.exists(filename):
|
---|
| 56 | print(f"File not found: {filename}")
|
---|
| 57 | sys.exit(1)
|
---|
| 58 |
|
---|
| 59 | lengths = load_string_lengths(filename)
|
---|
| 60 |
|
---|
| 61 | if not lengths:
|
---|
| 62 | print("File is empty or contains no valid lines.")
|
---|
| 63 | sys.exit(0)
|
---|
| 64 |
|
---|
| 65 | print_stats(lengths)
|
---|
| 66 | histogram = compute_histogram(lengths, bucket_size)
|
---|
| 67 | print_histogram(histogram, bucket_size)
|
---|
| 68 |
|
---|
| 69 | if __name__ == "__main__":
|
---|
| 70 | main()
|
---|