| [e0350e0] | 1 | import sys | 
|---|
|  | 2 | import os | 
|---|
|  | 3 | from collections import Counter | 
|---|
|  | 4 | import math | 
|---|
|  | 5 | import statistics | 
|---|
|  | 6 |  | 
|---|
|  | 7 | def load_string_lengths(filename): | 
|---|
|  | 8 | with open(filename, 'r', encoding='utf-8') as f: | 
|---|
|  | 9 | return [len(line.rstrip('\n')) for line in f] | 
|---|
|  | 10 |  | 
|---|
|  | 11 | def compute_histogram(lengths, bucket_size): | 
|---|
|  | 12 | histogram = Counter() | 
|---|
|  | 13 | for length in lengths: | 
|---|
|  | 14 | if length == 0: | 
|---|
|  | 15 | bucket = 0 | 
|---|
|  | 16 | else: | 
|---|
|  | 17 | bucket = ((length - 1) // bucket_size) * bucket_size + 1 | 
|---|
|  | 18 | histogram[bucket] += 1 | 
|---|
|  | 19 | return dict(sorted(histogram.items())) | 
|---|
|  | 20 |  | 
|---|
|  | 21 | def print_histogram(histogram, bucket_size): | 
|---|
|  | 22 | print("\nHistogram of string lengths:") | 
|---|
|  | 23 | for bucket_start in histogram: | 
|---|
|  | 24 | if bucket_start == 0: | 
|---|
|  | 25 | label = "   0–0" | 
|---|
|  | 26 | else: | 
|---|
|  | 27 | bucket_end = bucket_start + bucket_size - 1 | 
|---|
|  | 28 | label = f"{bucket_start:>4}–{bucket_end:<4}" | 
|---|
|  | 29 | count = histogram[bucket_start] | 
|---|
|  | 30 | print(f"{label}: {count}") | 
|---|
|  | 31 |  | 
|---|
|  | 32 | def print_stats(lengths): | 
|---|
|  | 33 | print("\nStatistical Summary:") | 
|---|
|  | 34 | print(f"  Count:   {len(lengths)}") | 
|---|
|  | 35 | print(f"  Min:     {min(lengths)}") | 
|---|
|  | 36 | print(f"  Max:     {max(lengths)}") | 
|---|
|  | 37 | print(f"  Mean:    {statistics.mean(lengths):.2f}") | 
|---|
|  | 38 | print(f"  Median:  {statistics.median(lengths):.2f}") | 
|---|
|  | 39 | print(f"  Std Dev: {statistics.stdev(lengths):.2f}" if len(lengths) > 1 else "  Std Dev: N/A (only one item)") | 
|---|
|  | 40 |  | 
|---|
|  | 41 | def main(): | 
|---|
|  | 42 | if len(sys.argv) != 3: | 
|---|
|  | 43 | print("Usage: python string_length_summary.py <filename> <bucket_size>") | 
|---|
|  | 44 | sys.exit(1) | 
|---|
|  | 45 |  | 
|---|
|  | 46 | filename = sys.argv[1] | 
|---|
|  | 47 | try: | 
|---|
|  | 48 | bucket_size = int(sys.argv[2]) | 
|---|
|  | 49 | if bucket_size <= 0: | 
|---|
|  | 50 | raise ValueError | 
|---|
|  | 51 | except ValueError: | 
|---|
|  | 52 | print("Error: Bucket size must be a positive integer.") | 
|---|
|  | 53 | sys.exit(1) | 
|---|
|  | 54 |  | 
|---|
|  | 55 | if not os.path.exists(filename): | 
|---|
|  | 56 | print(f"File not found: {filename}") | 
|---|
|  | 57 | sys.exit(1) | 
|---|
|  | 58 |  | 
|---|
|  | 59 | lengths = load_string_lengths(filename) | 
|---|
|  | 60 |  | 
|---|
|  | 61 | if not lengths: | 
|---|
|  | 62 | print("File is empty or contains no valid lines.") | 
|---|
|  | 63 | sys.exit(0) | 
|---|
|  | 64 |  | 
|---|
|  | 65 | print_stats(lengths) | 
|---|
|  | 66 | histogram = compute_histogram(lengths, bucket_size) | 
|---|
|  | 67 | print_histogram(histogram, bucket_size) | 
|---|
|  | 68 |  | 
|---|
|  | 69 | if __name__ == "__main__": | 
|---|
|  | 70 | main() | 
|---|