import sys
import os
from collections import Counter
import math
import statistics

def load_string_lengths(filename):
    with open(filename, 'r', encoding='utf-8') as f:
        return [len(line.rstrip('\n')) for line in f]

def compute_histogram(lengths, bucket_size):
    histogram = Counter()
    for length in lengths:
        if length == 0:
            bucket = 0
        else:
            bucket = ((length - 1) // bucket_size) * bucket_size + 1
        histogram[bucket] += 1
    return dict(sorted(histogram.items()))

def print_histogram(histogram, bucket_size):
    print("\nHistogram of string lengths:")
    for bucket_start in histogram:
        if bucket_start == 0:
            label = "   0–0"
        else:
            bucket_end = bucket_start + bucket_size - 1
            label = f"{bucket_start:>4}–{bucket_end:<4}"
        count = histogram[bucket_start]
        print(f"{label}: {count}")

def print_stats(lengths):
    print("\nStatistical Summary:")
    print(f"  Count:   {len(lengths)}")
    print(f"  Min:     {min(lengths)}")
    print(f"  Max:     {max(lengths)}")
    print(f"  Mean:    {statistics.mean(lengths):.2f}")
    print(f"  Median:  {statistics.median(lengths):.2f}")
    print(f"  Std Dev: {statistics.stdev(lengths):.2f}" if len(lengths) > 1 else "  Std Dev: N/A (only one item)")

def main():
    if len(sys.argv) != 3:
        print("Usage: python string_length_summary.py <filename> <bucket_size>")
        sys.exit(1)

    filename = sys.argv[1]
    try:
        bucket_size = int(sys.argv[2])
        if bucket_size <= 0:
            raise ValueError
    except ValueError:
        print("Error: Bucket size must be a positive integer.")
        sys.exit(1)

    if not os.path.exists(filename):
        print(f"File not found: {filename}")
        sys.exit(1)

    lengths = load_string_lengths(filename)

    if not lengths:
        print("File is empty or contains no valid lines.")
        sys.exit(0)

    print_stats(lengths)
    histogram = compute_histogram(lengths, bucket_size)
    print_histogram(histogram, bucket_size)

if __name__ == "__main__":
    main()
