Context Navigation

source: benchmark/convoy/genConvoyStats.py@ 917f67dd

Visit:

Last change on this file since 917f67dd was ebbe941, checked in by caparsons <caparson@…>, 3 years ago
changed algorithm to approximation since max cycle finding is NP-hard
Property mode set to `100644`
File size: 8.2 KB

Rev	Line
[8e64cb4]	1	#!/usr/bin/env python3
	2
[93e0603]	3	import os
	4	import sys
	5	import math
	6	import argparse
	7	import statistics as st
	8
	9	# Parsing Logic
	10	parser = argparse.ArgumentParser(prog = 'GenConvoyStats',
	11	description = 'Analyzes handoff matrix output and uses Markov chain modelling to provide upper and lower bounds on the largest long term convoy',
	12	epilog = '')
	13
	14	parser.add_argument("Filename")
[fd6e3a4]	15	parser.add_argument("-r", "--RunsPerNumThds", default=5, help="Number of trials per # of threads. Default is 5")
[93e0603]	16	parser.add_argument("-m", "--MaxThreads", default=32, help="Maximum number of threads. Default is 32")
	17	parser.add_argument("-o", "--OutputFile", default='', help="File to write output to")
	18	parser.add_argument("-v", "--Verbose", help="Verbose output. Will print per run stats alongside aggregates", action='count', default=0)
[8e64cb4]	19	parser.add_argument("-s", "--Single", help="Indicates that input only contains a single run. Assumes the number of threads is the value passed to -m", action='count', default=0)
[93e0603]	20
	21	args = parser.parse_args()
	22
	23	# handoff data
	24	handoff = []
	25	sumPerRow = []
	26	handoffTotal = 0
	27	maxCycleSum = 0
	28	maxCycle = []
	29
	30	# per thread data (across runs)
	31	minBound = []
	32	maxBound = []
	33	expected = []
	34
	35	# input file descriptor
	36	readFile = open(args.Filename, "r")
	37
	38	writeFile = False
	39	if args.OutputFile != '':
	40	writeFile = open(args.Filename, "w")
	41
	42	def reset():
	43	global handoff, sumPerRow, handoffTotal, readFile, maxCycleSum, maxCycle
	44	handoff.clear()
	45	sumPerRow.clear()
	46	handoffTotal = 0
	47	maxCycleSum = 0
	48	maxCycle.clear()
	49
	50	def thdReset():
	51	global minBound, maxBound, expected
	52	minBound.clear()
	53	maxBound.clear()
	54	expected.clear()
	55
	56	def output(string):
	57	global writeFile
	58	if writeFile:
	59	writeFile.write(string + '\n')
	60
	61	print(string)
	62
	63	# reads in handoff matrix for a single run and accumulates row/matrix total at the same time
	64	def readInMatrix(currThds):
	65	global handoff, sumPerRow, handoffTotal, readFile
	66	for i in range(currThds):
	67	line = readFile.readline()
	68
	69	# Deal with EOF
	70	if not line:
	71	print("Incorrect arguments or file format: error in readInMatrix")
	72	sys.exit(1)
	73
	74	# deal with any empty lines
	75	while line == '\n':
	76	line = readFile.readline()
	77
	78	row = []
	79	rowSum = 0
	80
	81	# convert row into list of ints and accumulate
	82	for val in line.replace(',','').split():
	83	row.append(int(val))
	84	rowSum += int(val)
	85
	86	#store row in global state
	87	handoff.append(row)
	88	sumPerRow.append(rowSum)
	89	handoffTotal += rowSum
	90
	91	# moves current non empty line in readFile to line after line described by first two chars
	92	def goToLineByStartingChars(string):
	93	global readFile
	94	# find start of relevant data
	95	line = ""
	96	while True:
	97	line = readFile.readline()
	98	if not line:
	99	print("Incorrect arguments or file format: error in goToLineByStartingChars")
	100	sys.exit(1)
	101
	102	# strip after checking for EOF so we can distinguish EOF vs empty line
	103	line = line.strip()
	104
	105	# discard lines until we see the column line in output
	106	if line and line[0:len(string)] == string:
	107	break
	108
	109	# recursively find largest cycle included a specific node using DFS
	110	def findLargestCycle(startNode, currNode, visited, globalVisited, currSum, currCycle):
	111	global handoff, maxCycle, maxCycleSum
	112
[ebbe941]	113	# print("CurrNode: " + str(currNode) + " StartNode: " + str(startNode))
[93e0603]	114	# print(currCycle)
	115	# if we visit a node from a previous call then return since we have already
	116	# looked at all cycles containing that node
	117	if globalVisited[currNode]:
	118	# print("globalVisited")
	119	return
	120
	121	# found a cycle
	122	if visited[currNode]:
[ebbe941]	123	# if currNode == startNode:
[93e0603]	124	# print("LocalVisited, curr: " + str(currSum) + ", max: " + str(maxCycleSum) + ", Start: " + str(startNode) )
	125	# if the cycle contains the start node check if it is our new max cycle
	126	if currNode == startNode and currSum > maxCycleSum:
	127	# print("NewMax")
	128	maxCycleSum = currSum
	129	maxCycle = currCycle.copy()
	130	return
	131
	132	visited[currNode] = True
[ebbe941]	133	# print(visited)
[93e0603]	134	for idx, val in enumerate(handoff[currNode]):
	135	# continue if no edge
[ebbe941]	136	# if val == 0:
	137	# continue
[93e0603]	138
	139	currCycle.append(currNode)
	140	findLargestCycle(startNode, idx, visited, globalVisited, currSum + val, currCycle)
	141	currCycle.pop()
[ebbe941]	142	# print(currNode)
[93e0603]	143	visited[currNode] = False
	144
	145
	146
	147	def analyzeRun():
	148	global handoff, sumPerRow, handoffTotal, maxCycle, maxCycleSum, minBound, maxBound, expected
	149	currThds = len(handoff)
	150
[ebbe941]	151
	152	# This is NP-Hard and is currently not tractable so we just estimate the largest cycle
	153	# the code to do so is commented below
	154
	155	# find largest cycle
	156	# globalVisited = [False] * currThds
	157	# for i in range(currThds):
	158	# # print(i)
	159	# visited = [False] * currThds
	160	# findLargestCycle(i, i, visited, globalVisited, 0, [])
	161	# globalVisited[i] = True
	162
	163	# # calculate stats
	164	# cycleHandoffs = []
	165	# cycleHandoffs.append(handoff[maxCycle[-1]][maxCycle[0]])
	166	# sumOfMaxCycleRows = sumPerRow[maxCycle[0]]
	167
	168	# # expected handoff is MULT P( handoff ) for each handoff in max cycle
	169	# expectedConvoy = handoff[maxCycle[-1]][maxCycle[0]] / sumPerRow[maxCycle[-1]]
	170	# for idx, val in enumerate(maxCycle[1:]):
	171	# cycleHandoffs.append(handoff[maxCycle[idx]][val])
	172	# sumOfMaxCycleRows += sumPerRow[val]
	173	# expectedConvoy = expectedConvoy * handoff[maxCycle[idx]][val] / sumPerRow[maxCycle[idx]]
	174
	175	# # adjust expected bound
	176	# # if max cycle contains all nodes, sumOfMaxCycleRows / handoffTotal == 1
	177	# # else this adjusts the expected bound to compensate for the cycle not visiting all nodes
	178	# # also mult by 100 to turn into percentage from decimal
	179	# expectedConvoy = expectedConvoy * sumOfMaxCycleRows * 100 / handoffTotal
	180
	181	################################
	182
	183	#start of approximation code is here:
	184
	185	# instead we take the maximum handoff from each row and assume it is all a cycle as an approximation
	186	maxCycle = []
	187	maxCycleSum = 0
[93e0603]	188	cycleHandoffs = []
[ebbe941]	189	expectedConvoy = 100
	190	for i in range(currThds):
	191	currMax = max(handoff[i])
	192	maxCycle.append(i)
	193	cycleHandoffs.append(currMax)
	194	maxCycleSum += currMax
	195	expectedConvoy = expectedConvoy * currMax / sumPerRow[i]
	196
	197	sumOfMaxCycleRows = handoffTotal
	198
	199	# end of approximation code
	200	###################################################
[93e0603]	201
	202	# upper bound is the percentage of all handoffs that occur in the maximum possible cycle
	203	# The cycle is possible min(cycleHandoffs) number of times
	204	maxFeasibleHandoff = min(cycleHandoffs)
	205	upperBoundConvoy = maxFeasibleHandoff * len(maxCycle) * 100 / handoffTotal
	206	lowerBoundConvoy = 1
	207	for val in maxCycle:
	208	lowerBoundConvoy = lowerBoundConvoy * maxFeasibleHandoff / sumPerRow[val]
	209
	210	# adjust lower bound. See comment for expectedConvoy adjustment to explain why
	211	lowerBoundConvoy = lowerBoundConvoy * sumOfMaxCycleRows * 100 / handoffTotal
	212
	213	maxBound.append(upperBoundConvoy)
	214	minBound.append(lowerBoundConvoy)
	215	expected.append(expectedConvoy)
	216
[8e64cb4]	217	if args.Verbose or args.Single:
[93e0603]	218	output('Convoying bounds: {:.2f}%-{:.2f}%, Expected convoying: {:.2f}%'.format(lowerBoundConvoy,upperBoundConvoy, expectedConvoy))
	219
	220
[8e64cb4]	221	if args.Single:
	222	output("N: " + str(args.MaxThreads))
	223	goToLineByStartingChars(str(args.MaxThreads)+' ')
	224	readInMatrix(int(args.MaxThreads))
	225	analyzeRun()
	226	reset()
	227	else:
	228	for i in range(args.MaxThreads):
	229	output("N: " + str(i+1))
	230
	231	goToLineByStartingChars(str(i+1)+' ')
	232	for j in range(args.RunsPerNumThds):
	233	readInMatrix(i+1)
	234	analyzeRun()
	235	reset()
	236
	237	output('Mean convoying bounds: {:.2f}%-{:.2f}%, Mean expected convoying: {:.2f}%'.format(st.mean(minBound), st.mean(maxBound),st.mean(expected)))
	238	output('Median convoying bounds: {:.2f}%-{:.2f}%, Median expected convoying: {:.2f}%'.format(st.median(minBound), st.median(maxBound),st.median(expected)))
	239	output('')
	240	thdReset()
[93e0603]	241
	242	readFile.close()
	243
	244	if writeFile:
	245	writeFile.close()

Note: See TracBrowser for help on using the repository browser.

Download in other formats: