1 | #!/usr/bin/env python3
|
---|
2 |
|
---|
3 | import os
|
---|
4 | import sys
|
---|
5 | import math
|
---|
6 | import argparse
|
---|
7 | import statistics as st
|
---|
8 |
|
---|
9 | # Parsing Logic
|
---|
10 | parser = argparse.ArgumentParser(prog = 'GenConvoyStats',
|
---|
11 | description = 'Analyzes handoff matrix output and uses Markov chain modelling to provide upper and lower bounds on the largest long term convoy',
|
---|
12 | epilog = '')
|
---|
13 |
|
---|
14 | parser.add_argument("Filename")
|
---|
15 | parser.add_argument("-r", "--RunsPerNumThds", default=5, help="Number of trials per # of threads. Default is 5")
|
---|
16 | parser.add_argument("-m", "--MaxThreads", default=32, help="Maximum number of threads. Default is 32")
|
---|
17 | parser.add_argument("-o", "--OutputFile", default='', help="File to write output to")
|
---|
18 | parser.add_argument("-v", "--Verbose", help="Verbose output. Will print per run stats alongside aggregates", action='count', default=0)
|
---|
19 | parser.add_argument("-s", "--Single", help="Indicates that input only contains a single run. Assumes the number of threads is the value passed to -m", action='count', default=0)
|
---|
20 |
|
---|
21 | args = parser.parse_args()
|
---|
22 |
|
---|
23 | # handoff data
|
---|
24 | handoff = []
|
---|
25 | sumPerRow = []
|
---|
26 | handoffTotal = 0
|
---|
27 | maxCycleSum = 0
|
---|
28 | maxCycle = []
|
---|
29 |
|
---|
30 | # per thread data (across runs)
|
---|
31 | minBound = []
|
---|
32 | maxBound = []
|
---|
33 | expected = []
|
---|
34 |
|
---|
35 | # input file descriptor
|
---|
36 | readFile = open(args.Filename, "r")
|
---|
37 |
|
---|
38 | writeFile = False
|
---|
39 | if args.OutputFile != '':
|
---|
40 | writeFile = open(args.Filename, "w")
|
---|
41 |
|
---|
42 | def reset():
|
---|
43 | global handoff, sumPerRow, handoffTotal, readFile, maxCycleSum, maxCycle
|
---|
44 | handoff.clear()
|
---|
45 | sumPerRow.clear()
|
---|
46 | handoffTotal = 0
|
---|
47 | maxCycleSum = 0
|
---|
48 | maxCycle.clear()
|
---|
49 |
|
---|
50 | def thdReset():
|
---|
51 | global minBound, maxBound, expected
|
---|
52 | minBound.clear()
|
---|
53 | maxBound.clear()
|
---|
54 | expected.clear()
|
---|
55 |
|
---|
56 | def output(string):
|
---|
57 | global writeFile
|
---|
58 | if writeFile:
|
---|
59 | writeFile.write(string + '\n')
|
---|
60 |
|
---|
61 | print(string)
|
---|
62 |
|
---|
63 | # reads in handoff matrix for a single run and accumulates row/matrix total at the same time
|
---|
64 | def readInMatrix(currThds):
|
---|
65 | global handoff, sumPerRow, handoffTotal, readFile
|
---|
66 | for i in range(currThds):
|
---|
67 | line = readFile.readline()
|
---|
68 |
|
---|
69 | # Deal with EOF
|
---|
70 | if not line:
|
---|
71 | print("Incorrect arguments or file format: error in readInMatrix")
|
---|
72 | sys.exit(1)
|
---|
73 |
|
---|
74 | # deal with any empty lines
|
---|
75 | while line == '\n':
|
---|
76 | line = readFile.readline()
|
---|
77 |
|
---|
78 | row = []
|
---|
79 | rowSum = 0
|
---|
80 |
|
---|
81 | # convert row into list of ints and accumulate
|
---|
82 | for val in line.replace(',','').split():
|
---|
83 | row.append(int(val))
|
---|
84 | rowSum += int(val)
|
---|
85 |
|
---|
86 | #store row in global state
|
---|
87 | handoff.append(row)
|
---|
88 | sumPerRow.append(rowSum)
|
---|
89 | handoffTotal += rowSum
|
---|
90 |
|
---|
91 | # moves current non empty line in readFile to line after line described by first two chars
|
---|
92 | def goToLineByStartingChars(string):
|
---|
93 | global readFile
|
---|
94 | # find start of relevant data
|
---|
95 | line = ""
|
---|
96 | while True:
|
---|
97 | line = readFile.readline()
|
---|
98 | if not line:
|
---|
99 | print("Incorrect arguments or file format: error in goToLineByStartingChars")
|
---|
100 | sys.exit(1)
|
---|
101 |
|
---|
102 | # strip after checking for EOF so we can distinguish EOF vs empty line
|
---|
103 | line = line.strip()
|
---|
104 |
|
---|
105 | # discard lines until we see the column line in output
|
---|
106 | if line and line[0:len(string)] == string:
|
---|
107 | break
|
---|
108 |
|
---|
109 | # recursively find largest cycle included a specific node using DFS
|
---|
110 | def findLargestCycle(startNode, currNode, visited, globalVisited, currSum, currCycle):
|
---|
111 | global handoff, maxCycle, maxCycleSum
|
---|
112 |
|
---|
113 | # print("CurrNode: " + str(currNode) + " StartNode: " + str(startNode))
|
---|
114 | # print(currCycle)
|
---|
115 | # if we visit a node from a previous call then return since we have already
|
---|
116 | # looked at all cycles containing that node
|
---|
117 | if globalVisited[currNode]:
|
---|
118 | # print("globalVisited")
|
---|
119 | return
|
---|
120 |
|
---|
121 | # found a cycle
|
---|
122 | if visited[currNode]:
|
---|
123 | # if currNode == startNode:
|
---|
124 | # print("LocalVisited, curr: " + str(currSum) + ", max: " + str(maxCycleSum) + ", Start: " + str(startNode) )
|
---|
125 | # if the cycle contains the start node check if it is our new max cycle
|
---|
126 | if currNode == startNode and currSum > maxCycleSum:
|
---|
127 | # print("NewMax")
|
---|
128 | maxCycleSum = currSum
|
---|
129 | maxCycle = currCycle.copy()
|
---|
130 | return
|
---|
131 |
|
---|
132 | visited[currNode] = True
|
---|
133 | # print(visited)
|
---|
134 | for idx, val in enumerate(handoff[currNode]):
|
---|
135 | # continue if no edge
|
---|
136 | # if val == 0:
|
---|
137 | # continue
|
---|
138 |
|
---|
139 | currCycle.append(currNode)
|
---|
140 | findLargestCycle(startNode, idx, visited, globalVisited, currSum + val, currCycle)
|
---|
141 | currCycle.pop()
|
---|
142 | # print(currNode)
|
---|
143 | visited[currNode] = False
|
---|
144 |
|
---|
145 |
|
---|
146 |
|
---|
147 | def analyzeRun():
|
---|
148 | global handoff, sumPerRow, handoffTotal, maxCycle, maxCycleSum, minBound, maxBound, expected
|
---|
149 | currThds = len(handoff)
|
---|
150 |
|
---|
151 |
|
---|
152 | # This is NP-Hard and is currently not tractable so we just estimate the largest cycle
|
---|
153 | # the code to do so is commented below
|
---|
154 |
|
---|
155 | # find largest cycle
|
---|
156 | # globalVisited = [False] * currThds
|
---|
157 | # for i in range(currThds):
|
---|
158 | # # print(i)
|
---|
159 | # visited = [False] * currThds
|
---|
160 | # findLargestCycle(i, i, visited, globalVisited, 0, [])
|
---|
161 | # globalVisited[i] = True
|
---|
162 |
|
---|
163 | # # calculate stats
|
---|
164 | # cycleHandoffs = []
|
---|
165 | # cycleHandoffs.append(handoff[maxCycle[-1]][maxCycle[0]])
|
---|
166 | # sumOfMaxCycleRows = sumPerRow[maxCycle[0]]
|
---|
167 |
|
---|
168 | # # expected handoff is MULT P( handoff ) for each handoff in max cycle
|
---|
169 | # expectedConvoy = handoff[maxCycle[-1]][maxCycle[0]] / sumPerRow[maxCycle[-1]]
|
---|
170 | # for idx, val in enumerate(maxCycle[1:]):
|
---|
171 | # cycleHandoffs.append(handoff[maxCycle[idx]][val])
|
---|
172 | # sumOfMaxCycleRows += sumPerRow[val]
|
---|
173 | # expectedConvoy = expectedConvoy * handoff[maxCycle[idx]][val] / sumPerRow[maxCycle[idx]]
|
---|
174 |
|
---|
175 | # # adjust expected bound
|
---|
176 | # # if max cycle contains all nodes, sumOfMaxCycleRows / handoffTotal == 1
|
---|
177 | # # else this adjusts the expected bound to compensate for the cycle not visiting all nodes
|
---|
178 | # # also mult by 100 to turn into percentage from decimal
|
---|
179 | # expectedConvoy = expectedConvoy * sumOfMaxCycleRows * 100 / handoffTotal
|
---|
180 |
|
---|
181 | ################################
|
---|
182 |
|
---|
183 | #start of approximation code is here:
|
---|
184 |
|
---|
185 | # instead we take the maximum handoff from each row and assume it is all a cycle as an approximation
|
---|
186 | maxCycle = []
|
---|
187 | maxCycleSum = 0
|
---|
188 | cycleHandoffs = []
|
---|
189 | expectedConvoy = 100
|
---|
190 | for i in range(currThds):
|
---|
191 | currMax = max(handoff[i])
|
---|
192 | maxCycle.append(i)
|
---|
193 | cycleHandoffs.append(currMax)
|
---|
194 | maxCycleSum += currMax
|
---|
195 | expectedConvoy = expectedConvoy * currMax / sumPerRow[i]
|
---|
196 |
|
---|
197 | sumOfMaxCycleRows = handoffTotal
|
---|
198 |
|
---|
199 | # end of approximation code
|
---|
200 | ###################################################
|
---|
201 |
|
---|
202 | # upper bound is the percentage of all handoffs that occur in the maximum possible cycle
|
---|
203 | # The cycle is possible min(cycleHandoffs) number of times
|
---|
204 | maxFeasibleHandoff = min(cycleHandoffs)
|
---|
205 | upperBoundConvoy = maxFeasibleHandoff * len(maxCycle) * 100 / handoffTotal
|
---|
206 | lowerBoundConvoy = 1
|
---|
207 | for val in maxCycle:
|
---|
208 | lowerBoundConvoy = lowerBoundConvoy * maxFeasibleHandoff / sumPerRow[val]
|
---|
209 |
|
---|
210 | # adjust lower bound. See comment for expectedConvoy adjustment to explain why
|
---|
211 | lowerBoundConvoy = lowerBoundConvoy * sumOfMaxCycleRows * 100 / handoffTotal
|
---|
212 |
|
---|
213 | maxBound.append(upperBoundConvoy)
|
---|
214 | minBound.append(lowerBoundConvoy)
|
---|
215 | expected.append(expectedConvoy)
|
---|
216 |
|
---|
217 | if args.Verbose or args.Single:
|
---|
218 | output('Convoying bounds: {:.2f}%-{:.2f}%, Expected convoying: {:.2f}%'.format(lowerBoundConvoy,upperBoundConvoy, expectedConvoy))
|
---|
219 |
|
---|
220 |
|
---|
221 | if args.Single:
|
---|
222 | output("N: " + str(args.MaxThreads))
|
---|
223 | goToLineByStartingChars(str(args.MaxThreads)+' ')
|
---|
224 | readInMatrix(int(args.MaxThreads))
|
---|
225 | analyzeRun()
|
---|
226 | reset()
|
---|
227 | else:
|
---|
228 | for i in range(args.MaxThreads):
|
---|
229 | output("N: " + str(i+1))
|
---|
230 |
|
---|
231 | goToLineByStartingChars(str(i+1)+' ')
|
---|
232 | for j in range(args.RunsPerNumThds):
|
---|
233 | readInMatrix(i+1)
|
---|
234 | analyzeRun()
|
---|
235 | reset()
|
---|
236 |
|
---|
237 | output('Mean convoying bounds: {:.2f}%-{:.2f}%, Mean expected convoying: {:.2f}%'.format(st.mean(minBound), st.mean(maxBound),st.mean(expected)))
|
---|
238 | output('Median convoying bounds: {:.2f}%-{:.2f}%, Median expected convoying: {:.2f}%'.format(st.median(minBound), st.median(maxBound),st.median(expected)))
|
---|
239 | output('')
|
---|
240 | thdReset()
|
---|
241 |
|
---|
242 | readFile.close()
|
---|
243 |
|
---|
244 | if writeFile:
|
---|
245 | writeFile.close()
|
---|