| 1 | #!/usr/bin/env python3 | 
|---|
| 2 |  | 
|---|
| 3 | import os | 
|---|
| 4 | import sys | 
|---|
| 5 | import math | 
|---|
| 6 | import argparse | 
|---|
| 7 | import statistics as st | 
|---|
| 8 |  | 
|---|
| 9 | # Parsing Logic | 
|---|
| 10 | parser = argparse.ArgumentParser(prog = 'GenConvoyStats', | 
|---|
| 11 | description = 'Analyzes handoff matrix output and uses Markov chain modelling to provide upper and lower bounds on the largest long term convoy', | 
|---|
| 12 | epilog = '') | 
|---|
| 13 |  | 
|---|
| 14 | parser.add_argument("Filename") | 
|---|
| 15 | parser.add_argument("-r", "--RunsPerNumThds", default=5, help="Number of trials per # of threads. Default is 5") | 
|---|
| 16 | parser.add_argument("-m", "--MaxThreads", default=32, help="Maximum number of threads. Default is 32") | 
|---|
| 17 | parser.add_argument("-o", "--OutputFile", default='', help="File to write output to") | 
|---|
| 18 | parser.add_argument("-v", "--Verbose", help="Verbose output. Will print per run stats alongside aggregates", action='count', default=0) | 
|---|
| 19 | parser.add_argument("-s", "--Single", help="Indicates that input only contains a single run. Assumes the number of threads is the value passed to -m", action='count', default=0) | 
|---|
| 20 |  | 
|---|
| 21 | args = parser.parse_args() | 
|---|
| 22 |  | 
|---|
| 23 | # handoff data | 
|---|
| 24 | handoff = [] | 
|---|
| 25 | sumPerRow = [] | 
|---|
| 26 | handoffTotal = 0 | 
|---|
| 27 | maxCycleSum = 0 | 
|---|
| 28 | maxCycle = [] | 
|---|
| 29 |  | 
|---|
| 30 | # per thread data (across runs) | 
|---|
| 31 | minBound = [] | 
|---|
| 32 | maxBound = [] | 
|---|
| 33 | expected = [] | 
|---|
| 34 |  | 
|---|
| 35 | # input file descriptor | 
|---|
| 36 | readFile = open(args.Filename, "r") | 
|---|
| 37 |  | 
|---|
| 38 | writeFile = False | 
|---|
| 39 | if args.OutputFile != '': | 
|---|
| 40 | writeFile = open(args.Filename, "w") | 
|---|
| 41 |  | 
|---|
| 42 | def reset(): | 
|---|
| 43 | global handoff, sumPerRow, handoffTotal, readFile, maxCycleSum, maxCycle | 
|---|
| 44 | handoff.clear() | 
|---|
| 45 | sumPerRow.clear() | 
|---|
| 46 | handoffTotal = 0 | 
|---|
| 47 | maxCycleSum = 0 | 
|---|
| 48 | maxCycle.clear() | 
|---|
| 49 |  | 
|---|
| 50 | def thdReset(): | 
|---|
| 51 | global minBound, maxBound, expected | 
|---|
| 52 | minBound.clear() | 
|---|
| 53 | maxBound.clear() | 
|---|
| 54 | expected.clear() | 
|---|
| 55 |  | 
|---|
| 56 | def output(string): | 
|---|
| 57 | global writeFile | 
|---|
| 58 | if writeFile: | 
|---|
| 59 | writeFile.write(string + '\n') | 
|---|
| 60 |  | 
|---|
| 61 | print(string) | 
|---|
| 62 |  | 
|---|
| 63 | # reads in handoff matrix for a single run and accumulates row/matrix total at the same time | 
|---|
| 64 | def readInMatrix(currThds): | 
|---|
| 65 | global handoff, sumPerRow, handoffTotal, readFile | 
|---|
| 66 | for i in range(currThds): | 
|---|
| 67 | line = readFile.readline() | 
|---|
| 68 |  | 
|---|
| 69 | # Deal with EOF | 
|---|
| 70 | if not line: | 
|---|
| 71 | print("Incorrect arguments or file format: error in readInMatrix") | 
|---|
| 72 | sys.exit(1) | 
|---|
| 73 |  | 
|---|
| 74 | # deal with any empty lines | 
|---|
| 75 | while line == '\n': | 
|---|
| 76 | line = readFile.readline() | 
|---|
| 77 |  | 
|---|
| 78 | row = [] | 
|---|
| 79 | rowSum = 0 | 
|---|
| 80 |  | 
|---|
| 81 | # convert row into list of ints and accumulate | 
|---|
| 82 | for val in line.replace(',','').split(): | 
|---|
| 83 | row.append(int(val)) | 
|---|
| 84 | rowSum += int(val) | 
|---|
| 85 |  | 
|---|
| 86 | #store row in global state | 
|---|
| 87 | handoff.append(row) | 
|---|
| 88 | sumPerRow.append(rowSum) | 
|---|
| 89 | handoffTotal += rowSum | 
|---|
| 90 |  | 
|---|
| 91 | # moves current non empty line in readFile to line after line described by first two chars | 
|---|
| 92 | def goToLineByStartingChars(string): | 
|---|
| 93 | global readFile | 
|---|
| 94 | # find start of relevant data | 
|---|
| 95 | line = "" | 
|---|
| 96 | while True: | 
|---|
| 97 | line = readFile.readline() | 
|---|
| 98 | if not line: | 
|---|
| 99 | print("Incorrect arguments or file format: error in goToLineByStartingChars") | 
|---|
| 100 | sys.exit(1) | 
|---|
| 101 |  | 
|---|
| 102 | # strip after checking for EOF so we can distinguish EOF vs empty line | 
|---|
| 103 | line = line.strip() | 
|---|
| 104 |  | 
|---|
| 105 | # discard lines until we see the column line in output | 
|---|
| 106 | if line and line[0:len(string)] == string: | 
|---|
| 107 | break | 
|---|
| 108 |  | 
|---|
| 109 | # recursively find largest cycle included a specific node using DFS | 
|---|
| 110 | def findLargestCycle(startNode, currNode, visited, globalVisited, currSum, currCycle): | 
|---|
| 111 | global handoff, maxCycle, maxCycleSum | 
|---|
| 112 |  | 
|---|
| 113 | # print("CurrNode: " + str(currNode) + " StartNode: " + str(startNode)) | 
|---|
| 114 | # print(currCycle) | 
|---|
| 115 | # if we visit a node from a previous call then return since we have already | 
|---|
| 116 | #   looked at all cycles containing that node | 
|---|
| 117 | if globalVisited[currNode]: | 
|---|
| 118 | # print("globalVisited") | 
|---|
| 119 | return | 
|---|
| 120 |  | 
|---|
| 121 | # found a cycle | 
|---|
| 122 | if visited[currNode]: | 
|---|
| 123 | # if currNode == startNode: | 
|---|
| 124 | # print("LocalVisited, curr: " + str(currSum) + ", max: " + str(maxCycleSum) + ", Start: " + str(startNode) ) | 
|---|
| 125 | # if the cycle contains the start node check if it is our new max cycle | 
|---|
| 126 | if currNode == startNode and currSum > maxCycleSum: | 
|---|
| 127 | # print("NewMax") | 
|---|
| 128 | maxCycleSum = currSum | 
|---|
| 129 | maxCycle = currCycle.copy() | 
|---|
| 130 | return | 
|---|
| 131 |  | 
|---|
| 132 | visited[currNode] = True | 
|---|
| 133 | # print(visited) | 
|---|
| 134 | for idx, val in enumerate(handoff[currNode]): | 
|---|
| 135 | # continue if no edge | 
|---|
| 136 | # if val == 0: | 
|---|
| 137 | #     continue | 
|---|
| 138 |  | 
|---|
| 139 | currCycle.append(currNode) | 
|---|
| 140 | findLargestCycle(startNode, idx, visited, globalVisited, currSum + val, currCycle) | 
|---|
| 141 | currCycle.pop() | 
|---|
| 142 | # print(currNode) | 
|---|
| 143 | visited[currNode] = False | 
|---|
| 144 |  | 
|---|
| 145 |  | 
|---|
| 146 |  | 
|---|
| 147 | def analyzeRun(): | 
|---|
| 148 | global handoff, sumPerRow, handoffTotal, maxCycle, maxCycleSum, minBound, maxBound, expected | 
|---|
| 149 | currThds = len(handoff) | 
|---|
| 150 |  | 
|---|
| 151 |  | 
|---|
| 152 | # This is NP-Hard and is currently not tractable so we just estimate the largest cycle | 
|---|
| 153 | # the code to do so is commented below | 
|---|
| 154 |  | 
|---|
| 155 | # find largest cycle | 
|---|
| 156 | # globalVisited = [False] * currThds | 
|---|
| 157 | # for i in range(currThds): | 
|---|
| 158 | #     # print(i) | 
|---|
| 159 | #     visited = [False] * currThds | 
|---|
| 160 | #     findLargestCycle(i, i, visited, globalVisited, 0, []) | 
|---|
| 161 | #     globalVisited[i] = True | 
|---|
| 162 |  | 
|---|
| 163 | # # calculate stats | 
|---|
| 164 | # cycleHandoffs = [] | 
|---|
| 165 | # cycleHandoffs.append(handoff[maxCycle[-1]][maxCycle[0]]) | 
|---|
| 166 | # sumOfMaxCycleRows = sumPerRow[maxCycle[0]] | 
|---|
| 167 |  | 
|---|
| 168 | # # expected handoff is MULT P( handoff ) for each handoff in max cycle | 
|---|
| 169 | # expectedConvoy = handoff[maxCycle[-1]][maxCycle[0]] / sumPerRow[maxCycle[-1]] | 
|---|
| 170 | # for idx, val in enumerate(maxCycle[1:]): | 
|---|
| 171 | #     cycleHandoffs.append(handoff[maxCycle[idx]][val]) | 
|---|
| 172 | #     sumOfMaxCycleRows += sumPerRow[val] | 
|---|
| 173 | #     expectedConvoy = expectedConvoy * handoff[maxCycle[idx]][val] / sumPerRow[maxCycle[idx]] | 
|---|
| 174 |  | 
|---|
| 175 | # # adjust expected bound | 
|---|
| 176 | # # if max cycle contains all nodes, sumOfMaxCycleRows / handoffTotal == 1 | 
|---|
| 177 | # # else this adjusts the expected bound to compensate for the cycle not visiting all nodes | 
|---|
| 178 | # # also mult by 100 to turn into percentage from decimal | 
|---|
| 179 | # expectedConvoy = expectedConvoy * sumOfMaxCycleRows * 100 / handoffTotal | 
|---|
| 180 |  | 
|---|
| 181 | ################################ | 
|---|
| 182 |  | 
|---|
| 183 | #start of approximation code is here: | 
|---|
| 184 |  | 
|---|
| 185 | # instead we take the maximum handoff from each row and assume it is all a cycle as an approximation | 
|---|
| 186 | maxCycle = [] | 
|---|
| 187 | maxCycleSum = 0 | 
|---|
| 188 | cycleHandoffs = [] | 
|---|
| 189 | expectedConvoy = 100 | 
|---|
| 190 | for i in range(currThds): | 
|---|
| 191 | currMax = max(handoff[i]) | 
|---|
| 192 | maxCycle.append(i) | 
|---|
| 193 | cycleHandoffs.append(currMax) | 
|---|
| 194 | maxCycleSum += currMax | 
|---|
| 195 | expectedConvoy = expectedConvoy * currMax / sumPerRow[i] | 
|---|
| 196 |  | 
|---|
| 197 | sumOfMaxCycleRows = handoffTotal | 
|---|
| 198 |  | 
|---|
| 199 | # end of approximation code | 
|---|
| 200 | ################################################### | 
|---|
| 201 |  | 
|---|
| 202 | # upper bound is the percentage of all handoffs that occur in the maximum possible cycle | 
|---|
| 203 | # The cycle is possible min(cycleHandoffs) number of times | 
|---|
| 204 | maxFeasibleHandoff = min(cycleHandoffs) | 
|---|
| 205 | upperBoundConvoy = maxFeasibleHandoff * len(maxCycle) * 100 / handoffTotal | 
|---|
| 206 | lowerBoundConvoy = 1 | 
|---|
| 207 | for val in maxCycle: | 
|---|
| 208 | lowerBoundConvoy = lowerBoundConvoy * maxFeasibleHandoff / sumPerRow[val] | 
|---|
| 209 |  | 
|---|
| 210 | # adjust lower bound. See comment for expectedConvoy adjustment to explain why | 
|---|
| 211 | lowerBoundConvoy = lowerBoundConvoy * sumOfMaxCycleRows * 100 / handoffTotal | 
|---|
| 212 |  | 
|---|
| 213 | maxBound.append(upperBoundConvoy) | 
|---|
| 214 | minBound.append(lowerBoundConvoy) | 
|---|
| 215 | expected.append(expectedConvoy) | 
|---|
| 216 |  | 
|---|
| 217 | if args.Verbose or args.Single: | 
|---|
| 218 | output('Convoying bounds: {:.2f}%-{:.2f}%, Expected convoying: {:.2f}%'.format(lowerBoundConvoy,upperBoundConvoy, expectedConvoy)) | 
|---|
| 219 |  | 
|---|
| 220 |  | 
|---|
| 221 | if args.Single: | 
|---|
| 222 | output("N: " + str(args.MaxThreads)) | 
|---|
| 223 | goToLineByStartingChars(str(args.MaxThreads)+' ') | 
|---|
| 224 | readInMatrix(int(args.MaxThreads)) | 
|---|
| 225 | analyzeRun() | 
|---|
| 226 | reset() | 
|---|
| 227 | else: | 
|---|
| 228 | for i in range(args.MaxThreads): | 
|---|
| 229 | output("N: " + str(i+1)) | 
|---|
| 230 |  | 
|---|
| 231 | goToLineByStartingChars(str(i+1)+' ') | 
|---|
| 232 | for j in range(args.RunsPerNumThds): | 
|---|
| 233 | readInMatrix(i+1) | 
|---|
| 234 | analyzeRun() | 
|---|
| 235 | reset() | 
|---|
| 236 |  | 
|---|
| 237 | output('Mean convoying bounds: {:.2f}%-{:.2f}%, Mean expected convoying: {:.2f}%'.format(st.mean(minBound), st.mean(maxBound),st.mean(expected))) | 
|---|
| 238 | output('Median convoying bounds: {:.2f}%-{:.2f}%, Median expected convoying: {:.2f}%'.format(st.median(minBound), st.median(maxBound),st.median(expected))) | 
|---|
| 239 | output('') | 
|---|
| 240 | thdReset() | 
|---|
| 241 |  | 
|---|
| 242 | readFile.close() | 
|---|
| 243 |  | 
|---|
| 244 | if writeFile: | 
|---|
| 245 | writeFile.close() | 
|---|