#!/usr/bin/python3

import argparse
import decimal
import math
import re
import sys

import collections, functools, operator

def parse(reg, lines):
	m = [re.findall(reg,l) for l in lines]
	return [*filter(None, m)][0][0]

def wavg(vals, ws):
	t = sum(ws)
	if t == 0:
		return 0.0
	s = sum([vals[i] * ws[i] for i in range(len(vals))])
	return s / t

def hist(s):
	s = s.split()
	h = [int(v) for v in s]
	return dict([(k, v) for (k,v) in enumerate(h) if v != 0])

class Result:
	def __init__(self):
		self.total = {}
		self.connect = {}
		self.request = {}
		self.reply = {}
		self.misc = {}
		self.errors = {}
		self.session = {}

	@staticmethod
	def from_file(file):
		r = Result()
		lines  = [l for l in file]
		print(lines[0].strip())
		#------------------------------
		# total
		totals = parse(r'^Total: connections ([0-9]+) requests ([0-9]+) replies ([0-9]+) test-duration ([\.0-9]+) s', lines)
		r.total = {
			'connections': int(totals[0]),
			'requests': int(totals[1]),
			'replies': int(totals[2]),
			'duration': float(totals[3])
		}

		#------------------------------
		# connection
		connection1 = parse(r'^Connection rate: ([\.0-9]+) conn/s \(([\.0-9]+) ms/conn, <=([0-9]+) concurrent connections\)', lines)
		connection2 = parse(r'^Connection time \[ms\]: min ([\.0-9]+) avg ([\.0-9]+) max ([\.0-9]+) median ([\.0-9]+) stddev ([\.0-9]+)', lines)
		connection3 = parse(r'^Connection time \[ms\]: connect ([\.0-9]+)', lines)
		connection4 = parse(r'^Connection length \[replies/conn\]: ([\.0-9]+)', lines)
		r.connect = {
			'rate': { 'conn/s': float(connection1[0]), 'ms/conn': float(connection1[1]), '<=': int(connection1[2]) },
			'time': { 'min': float(connection2[0]), 'avg': float(connection2[1]), 'max': float(connection2[2]), 'median': float(connection2[3]), 'stddev': float(connection2[4]) },
			'connect': float(connection3),
			'length': float(connection4)
		}

		#------------------------------
		# request
		request1 = parse(r'^Request rate: ([\.0-9]+) req/s \(([\.0-9]+) ms/req\)', lines)
		request2 = parse(r'^Request size \[B\]: ([\.0-9]+)', lines)
		r.request = {
			'req/s': float(request1[0]),
			'ms/req': float(request1[1]),
			'size': float(request2)
		}

		#------------------------------
		# reply
		replies1 = parse(r'^Reply rate \[replies/s\]: min ([\.0-9]+) avg ([\.0-9]+) max ([\.0-9]+) stddev ([\.0-9]+)', lines)
		replies2 = parse(r'^Reply time \[ms\]: response ([\.0-9]+) transfer ([\.0-9]+)', lines)
		replies3 = parse(r'^Reply size \[B\]: header ([\.0-9]+) content ([\.0-9]+) footer ([\.0-9]+) \(total ([\.0-9]+)\)', lines)
		replies4 = parse(r'^Reply status: 1xx=([0-9]+) 2xx=([0-9]+) 3xx=([0-9]+) 4xx=([0-9]+) 5xx=([0-9]+)', lines)
		r.reply = {
			'rate' : { 'min': float(replies1[0]), 'avg': float(replies1[1]), 'max': float(replies1[2]), 'stddev': float(replies1[3]) },
			'time' : { 'response': float(replies2[0]), 'transfer': float(replies2[1]) },
			'size' : { 'header': float(replies3[0]), 'content': float(replies3[1]), 'footer': float(replies3[2]), 'total': float(replies3[3]) },
			'status' : { '1xx': int(replies4[0]), '2xx': int(replies4[1]), '3xx': int(replies4[2]), '4xx': int(replies4[3]), '5xx': int(replies4[4]) }
		}

		#------------------------------
		# misc
		misc1 = parse(r'^CPU time \[s\]: user ([\.0-9]+) system ([\.0-9]+) \(user ([\.0-9]+)% system ([\.0-9]+)% total ([\.0-9]+)%\)', lines)
		misc2 = parse(r'^Net I/O: ([\.0-9]+) KB/s \(([\.0-9]+)\*10\^([0-9]+) bps\)', lines)
		r.misc = {
			'usr': float(misc1[0]),
			'sys': float(misc1[1]),
			'usr%': float(misc1[2]),
			'sys%': float(misc1[3]),
			'total%': float(misc1[4]),
			'KB/S': float(misc2[0]),
			'bps': float(misc2[1]) * math.pow(10, int(misc2[2]))
		}

		#------------------------------
		# errors
		errors1 = parse(r'^Errors: total ([0-9]+) client-timo ([0-9]+) socket-timo ([0-9]+) connrefused ([0-9]+) connreset ([0-9]+)', lines)
		errors2 = parse(r'^Errors: fd-unavail ([0-9]+) addrunavail ([0-9]+) ftab-full ([0-9]+) other ([0-9]+)', lines)
		r.errors = {
			'total': int(errors1[0]),
			'client-timout': int(errors1[1]),
			'socket-timout': int(errors1[2]),
			'connection-refused': int(errors1[3]),
			'connection-reset': int(errors1[4]),
			'fd-unavailable': int(errors2[0]),
			'address-unavailable': int(errors2[1]),
			'ftab-full': int(errors2[2]),
			'other': int(errors2[3])
		}

		#------------------------------
		# session
		session1 = parse(r'^Session rate \[sess/s\]: min ([\.0-9]+) avg ([\.0-9]+) max ([\.0-9]+) stddev ([\.0-9]+) \(([0-9]+)/([0-9]+)\)', lines)
		session2 = parse(r'^Session: avg ([\.0-9]+) connections/session', lines)
		session3 = parse(r'^Session lifetime \[s\]: ([\.0-9]+)', lines)
		session4 = parse(r'^Session failtime \[s\]: ([\.0-9]+)', lines)
		session5 = parse(r'^Session length histogram: ([ 0-9]+)', lines)
		r.session = {
			'rate': { 'min': float(session1[0]), 'avg': float(session1[1]), 'max': float(session1[2]), 'stddev': float(session1[3]) },
			'successes': int(session1[4]),
			'totals': int(session1[5]),
			'conns/ses': float(session2),
			'lifetime': float(session3),
			'failtime': float(session4),
			'hist': hist(session5)
		}

		return r

if __name__ == "__main__":
	#------------------------------
	# parse args
	parser = argparse.ArgumentParser(description='Script aggregates httperf output')
	parser.add_argument('files', metavar='files', type=argparse.FileType('r'), nargs='*', help='a list of files to aggregate')

	try:
		args =  parser.parse_args()
	except:
		print('ERROR: invalid arguments', file=sys.stderr)
		parser.print_help(sys.stderr)
		sys.exit(1)

	if len(args.files) == 0:
		print('No input files', file=sys.stderr)
		parser.print_help(sys.stderr)
		sys.exit(1)

	#------------------------------
	# Construct objects
	results = [Result.from_file(f) for f in args.files]

	#==================================================
	# Print
	#==================================================
	totals = dict(functools.reduce(operator.add, map(collections.Counter, [r.total for r in results])))
	totals['duration-'] = min([r.total['duration'] for r in results])
	totals['duration+'] = max([r.total['duration'] for r in results])
	print("")
	print("")
	print("Total: connections {:,} requests {:,} replies {:,} test-duration {}-{} s".format(totals['connections'], totals['requests'], totals['replies'], totals['duration-'], totals['duration+']))
	print("")

	#==================================================
	connections = {
		'conn/s': sum([r.connect['rate']['conn/s'] for r in results]),
		'<=': sum([r.connect['rate']['<='] for r in results]),
		'min': min([r.connect['time']['min'] for r in results]),
		'avg': wavg([r.connect['time']['avg'] for r in results], [r.total['connections'] for r in results]),
		'max': max([r.connect['time']['max'] for r in results]),
		'median': wavg([r.connect['time']['median'] for r in results], [r.total['connections'] for r in results]),
		'stddev': wavg([r.connect['time']['stddev'] for r in results], [r.total['connections'] for r in results]),
		'connect': wavg([r.connect['connect'] for r in results], [r.total['connections'] for r in results]),
		'length': wavg([r.connect['length'] for r in results], [r.total['connections'] for r in results])
	}
	print("Connection rate: {:,.2f} conn/s ({:.2f} ms/conn, <={:,} concurrent connections)".format(connections['conn/s'], 1000.0 / connections['conn/s'], connections['<=']))
	print("Connection time [ms]: min {:,.2f} avg {:,.2f} max {:,.2f} avg median {:,.2f} avg stddev {:,.2f}".format(connections['min'], connections['avg'], connections['max'], connections['median'], connections['stddev']))
	print("Connection time [ms]: connect {:,.2f}".format(connections['connect']))
	print("Connection length [replies/conn]: {:,.2f}".format(connections['length']))
	print("")

	#==================================================
	requests = {
		'req/s': sum([r.request['req/s'] for r in results]),
		'size': wavg([r.request['size'] for r in results], [r.total['requests'] for r in results])
	}
	print("Request rate: {:,.2f} req/s ({:.2f} ms/req)".format(requests['req/s'], 1000.0 / requests['req/s']))
	print("Request size [B]: {:,.2f}".format(requests['size']))
	print("")

	#==================================================
	replies = {
		'min': sum([r.reply['rate']['min'] for r in results]),
		'avg': sum([r.reply['rate']['avg'] for r in results]),
		'max': sum([r.reply['rate']['max'] for r in results]),
		'std':  wavg([r.reply['rate']['stddev'] for r in results], [r.total['replies'] for r in results])
	}
	print("Reply rate [replies/s]: min {:,.2f} avg {:,.2f} max {:,.2f} avg stddev {:,.2f}".format(replies['min'], replies['avg'], replies['max'], replies['std']))
	replies = {
		'rs': wavg([r.reply['time']['response'] for r in results], [r.total['replies'] for r in results]),
		'tr': wavg([r.reply['time']['transfer'] for r in results], [r.total['replies'] for r in results])
	}
	print("Reply time [ms]: response {:,.2f} transfer {:,.2f}".format(replies['rs'], replies['tr']))
	replies = {
		'hd': wavg([r.reply['size']['header' ] for r in results], [r.total['replies'] for r in results]),
		'ct': wavg([r.reply['size']['content'] for r in results], [r.total['replies'] for r in results]),
		'ft': wavg([r.reply['size']['footer' ] for r in results], [r.total['replies'] for r in results]),
		'tt': wavg([r.reply['size']['total'  ] for r in results], [r.total['replies'] for r in results])
	}
	print("Reply size [B]: header {:,.2f} content {:,.2f} footer {:,.2f} (total {:,.2f})".format(replies['hd'], replies['ct'], replies['ft'], replies['tt']))
	replies = {
		'1xx': sum([r.reply['status']['1xx'] for r in results]),
		'2xx': sum([r.reply['status']['2xx'] for r in results]),
		'3xx': sum([r.reply['status']['3xx'] for r in results]),
		'4xx': sum([r.reply['status']['4xx'] for r in results]),
		'5xx': sum([r.reply['status']['5xx'] for r in results])
	}
	print("Reply status: 1xx={:,} 2xx={:,} 3xx={:,} 4xx={:,} 5xx={:,}".format(replies['1xx'], replies['2xx'], replies['3xx'], replies['4xx'], replies['5xx']))
	print("")

	#==================================================
	misc = dict(functools.reduce(operator.add, map(collections.Counter, [r.misc for r in results])))
	print("CPU time [s]: user {:.2f} system {:.2f} (user {:.2f}% system {:.2f}% total {:.2f}%)".format(misc['usr'], misc['sys'], misc['usr%'], misc['sys%'], misc['total%']))
	print("Net I/O: {:,.2f} KB/s ({} bps)".format(misc['KB/S'], decimal.Decimal(misc['bps']).normalize().to_eng_string()))
	print("")

	#==================================================
	errors = dict(functools.reduce(lambda a, b: a.update(b) or a, [r.errors for r in results], collections.Counter()))
	print("Errors: total {} client-timo {} socket-timo {} connrefused {} connreset {}".format(errors['total'], errors['client-timout'], errors['socket-timout'], errors['connection-refused'], errors['connection-reset']))
	print("Errors: fd-unavail {} addrunavail {} ftab-full {} other {}".format(errors['fd-unavailable'], errors['address-unavailable'], errors['ftab-full'], errors['other']))
	print("")

	#==================================================
	sessions = {
		'min': sum([r.session['rate']['min'] for r in results]),
		'avg': wavg([r.session['rate']['avg'] for r in results], [r.session['totals'] for r in results]),
		'max': sum([r.session['rate']['max'] for r in results]),
		'stddev':  wavg([r.session['rate']['stddev'] for r in results], [r.session['totals'] for r in results]),
		'successes': sum([r.session['successes'] for r in results]),
		'totals': sum([r.session['totals'] for r in results]),
		'conns/ses': wavg([r.session['conns/ses'] for r in results], [r.session['totals'] for r in results]),
		'lifetime': wavg([r.session['lifetime'] for r in results], [r.session['successes'] for r in results]),
		'failtime': wavg([r.session['failtime'] for r in results], [r.session['totals'] - r.session['successes'] for r in results]),
	}
	print("Session rate [sess/s]: min {:.2f} avg {:.2f} max {:.2f} avg stddev {:.2f} ({:,}/{:,})".format(sessions['min'], sessions['avg'], sessions['max'], sessions['stddev'], sessions['successes'], sessions['totals']))
	print("Session: avg {:.2f} connections/session".format(sessions['conns/ses']))
	print("Session lifetime [s]: {:.2f}".format(sessions['lifetime']))
	print("Session failtime [s]: {:.2f}".format(sessions['failtime']))

	hist = dict(functools.reduce(operator.add, map(collections.Counter, [r.session['hist'] for r in results])))
	hist = ["{}: {}".format(key, value) for key, value in sorted(hist.items(), key=lambda x: x[0])]
	print("Session length histogram: [{}]".format(", ".join(hist)))