[93e0603] | 1 | import os |
---|
| 2 | import sys |
---|
| 3 | import math |
---|
| 4 | import argparse |
---|
| 5 | import statistics as st |
---|
| 6 | |
---|
| 7 | # Parsing Logic |
---|
| 8 | parser = argparse.ArgumentParser(prog = 'GenConvoyStats', |
---|
| 9 | description = 'Analyzes handoff matrix output and uses Markov chain modelling to provide upper and lower bounds on the largest long term convoy', |
---|
| 10 | epilog = '') |
---|
| 11 | |
---|
| 12 | parser.add_argument("Filename") |
---|
| 13 | parser.add_argument("-r", "--RunsPerSize", default=5, help="Number of trials per # of threads. Default is 5") |
---|
| 14 | parser.add_argument("-m", "--MaxThreads", default=32, help="Maximum number of threads. Default is 32") |
---|
| 15 | parser.add_argument("-o", "--OutputFile", default='', help="File to write output to") |
---|
| 16 | parser.add_argument("-v", "--Verbose", help="Verbose output. Will print per run stats alongside aggregates", action='count', default=0) |
---|
| 17 | |
---|
| 18 | args = parser.parse_args() |
---|
| 19 | |
---|
| 20 | # handoff data |
---|
| 21 | handoff = [] |
---|
| 22 | sumPerRow = [] |
---|
| 23 | handoffTotal = 0 |
---|
| 24 | maxCycleSum = 0 |
---|
| 25 | maxCycle = [] |
---|
| 26 | |
---|
| 27 | # per thread data (across runs) |
---|
| 28 | minBound = [] |
---|
| 29 | maxBound = [] |
---|
| 30 | expected = [] |
---|
| 31 | |
---|
| 32 | # input file descriptor |
---|
| 33 | readFile = open(args.Filename, "r") |
---|
| 34 | |
---|
| 35 | writeFile = False |
---|
| 36 | if args.OutputFile != '': |
---|
| 37 | writeFile = open(args.Filename, "w") |
---|
| 38 | |
---|
| 39 | def reset(): |
---|
| 40 | global handoff, sumPerRow, handoffTotal, readFile, maxCycleSum, maxCycle |
---|
| 41 | handoff.clear() |
---|
| 42 | sumPerRow.clear() |
---|
| 43 | handoffTotal = 0 |
---|
| 44 | maxCycleSum = 0 |
---|
| 45 | maxCycle.clear() |
---|
| 46 | |
---|
| 47 | def thdReset(): |
---|
| 48 | global minBound, maxBound, expected |
---|
| 49 | minBound.clear() |
---|
| 50 | maxBound.clear() |
---|
| 51 | expected.clear() |
---|
| 52 | |
---|
| 53 | def output(string): |
---|
| 54 | global writeFile |
---|
| 55 | if writeFile: |
---|
| 56 | writeFile.write(string + '\n') |
---|
| 57 | |
---|
| 58 | print(string) |
---|
| 59 | |
---|
| 60 | # reads in handoff matrix for a single run and accumulates row/matrix total at the same time |
---|
| 61 | def readInMatrix(currThds): |
---|
| 62 | global handoff, sumPerRow, handoffTotal, readFile |
---|
| 63 | for i in range(currThds): |
---|
| 64 | line = readFile.readline() |
---|
| 65 | |
---|
| 66 | # Deal with EOF |
---|
| 67 | if not line: |
---|
| 68 | print("Incorrect arguments or file format: error in readInMatrix") |
---|
| 69 | sys.exit(1) |
---|
| 70 | |
---|
| 71 | # deal with any empty lines |
---|
| 72 | while line == '\n': |
---|
| 73 | line = readFile.readline() |
---|
| 74 | |
---|
| 75 | row = [] |
---|
| 76 | rowSum = 0 |
---|
| 77 | |
---|
| 78 | # convert row into list of ints and accumulate |
---|
| 79 | for val in line.replace(',','').split(): |
---|
| 80 | row.append(int(val)) |
---|
| 81 | rowSum += int(val) |
---|
| 82 | |
---|
| 83 | #store row in global state |
---|
| 84 | handoff.append(row) |
---|
| 85 | sumPerRow.append(rowSum) |
---|
| 86 | handoffTotal += rowSum |
---|
| 87 | |
---|
| 88 | # moves current non empty line in readFile to line after line described by first two chars |
---|
| 89 | def goToLineByStartingChars(string): |
---|
| 90 | global readFile |
---|
| 91 | # find start of relevant data |
---|
| 92 | line = "" |
---|
| 93 | while True: |
---|
| 94 | line = readFile.readline() |
---|
| 95 | if not line: |
---|
| 96 | print("Incorrect arguments or file format: error in goToLineByStartingChars") |
---|
| 97 | sys.exit(1) |
---|
| 98 | |
---|
| 99 | # strip after checking for EOF so we can distinguish EOF vs empty line |
---|
| 100 | line = line.strip() |
---|
| 101 | |
---|
| 102 | # discard lines until we see the column line in output |
---|
| 103 | if line and line[0:len(string)] == string: |
---|
| 104 | break |
---|
| 105 | |
---|
| 106 | # recursively find largest cycle included a specific node using DFS |
---|
| 107 | def findLargestCycle(startNode, currNode, visited, globalVisited, currSum, currCycle): |
---|
| 108 | global handoff, maxCycle, maxCycleSum |
---|
| 109 | |
---|
| 110 | # print("CurrNode: " + str(currNode)) |
---|
| 111 | # print(currCycle) |
---|
| 112 | # if we visit a node from a previous call then return since we have already |
---|
| 113 | # looked at all cycles containing that node |
---|
| 114 | if globalVisited[currNode]: |
---|
| 115 | # print("globalVisited") |
---|
| 116 | return |
---|
| 117 | |
---|
| 118 | # found a cycle |
---|
| 119 | if visited[currNode]: |
---|
| 120 | # print("LocalVisited, curr: " + str(currSum) + ", max: " + str(maxCycleSum) + ", Start: " + str(startNode) ) |
---|
| 121 | # if the cycle contains the start node check if it is our new max cycle |
---|
| 122 | if currNode == startNode and currSum > maxCycleSum: |
---|
| 123 | # print("NewMax") |
---|
| 124 | maxCycleSum = currSum |
---|
| 125 | maxCycle = currCycle.copy() |
---|
| 126 | return |
---|
| 127 | |
---|
| 128 | visited[currNode] = True |
---|
| 129 | |
---|
| 130 | for idx, val in enumerate(handoff[currNode]): |
---|
| 131 | # continue if no edge |
---|
| 132 | if val == 0: |
---|
| 133 | continue |
---|
| 134 | |
---|
| 135 | currCycle.append(currNode) |
---|
| 136 | findLargestCycle(startNode, idx, visited, globalVisited, currSum + val, currCycle) |
---|
| 137 | currCycle.pop() |
---|
| 138 | |
---|
| 139 | visited[currNode] = False |
---|
| 140 | |
---|
| 141 | |
---|
| 142 | |
---|
| 143 | def analyzeRun(): |
---|
| 144 | global handoff, sumPerRow, handoffTotal, maxCycle, maxCycleSum, minBound, maxBound, expected |
---|
| 145 | currThds = len(handoff) |
---|
| 146 | |
---|
| 147 | # find largest cycle |
---|
| 148 | globalVisited = [False] * currThds |
---|
| 149 | for i in range(currThds): |
---|
| 150 | visited = [False] * currThds |
---|
| 151 | findLargestCycle(i, i, visited, globalVisited, 0, []) |
---|
| 152 | globalVisited[i] = True |
---|
| 153 | |
---|
| 154 | # calculate stats |
---|
| 155 | cycleHandoffs = [] |
---|
| 156 | cycleHandoffs.append(handoff[maxCycle[-1]][maxCycle[0]]) |
---|
| 157 | sumOfMaxCycleRows = sumPerRow[maxCycle[0]] |
---|
| 158 | |
---|
| 159 | # expected handoff is MULT P( handoff ) for each handoff in max cycle |
---|
| 160 | expectedConvoy = handoff[maxCycle[-1]][maxCycle[0]] / sumPerRow[maxCycle[-1]] |
---|
| 161 | for idx, val in enumerate(maxCycle[1:]): |
---|
| 162 | cycleHandoffs.append(handoff[maxCycle[idx]][val]) |
---|
| 163 | sumOfMaxCycleRows += sumPerRow[val] |
---|
| 164 | expectedConvoy = expectedConvoy * handoff[maxCycle[idx]][val] / sumPerRow[maxCycle[idx]] |
---|
| 165 | |
---|
| 166 | # adjust expected bound |
---|
| 167 | # if max cycle contains all nodes, sumOfMaxCycleRows / handoffTotal == 1 |
---|
| 168 | # else this adjusts the expected bound to compensate for the cycle not visiting all nodes |
---|
| 169 | # also mult by 100 to turn into percentage from decimal |
---|
| 170 | expectedConvoy = expectedConvoy * sumOfMaxCycleRows * 100 / handoffTotal |
---|
| 171 | |
---|
| 172 | # upper bound is the percentage of all handoffs that occur in the maximum possible cycle |
---|
| 173 | # The cycle is possible min(cycleHandoffs) number of times |
---|
| 174 | maxFeasibleHandoff = min(cycleHandoffs) |
---|
| 175 | upperBoundConvoy = maxFeasibleHandoff * len(maxCycle) * 100 / handoffTotal |
---|
| 176 | lowerBoundConvoy = 1 |
---|
| 177 | for val in maxCycle: |
---|
| 178 | lowerBoundConvoy = lowerBoundConvoy * maxFeasibleHandoff / sumPerRow[val] |
---|
| 179 | |
---|
| 180 | # adjust lower bound. See comment for expectedConvoy adjustment to explain why |
---|
| 181 | lowerBoundConvoy = lowerBoundConvoy * sumOfMaxCycleRows * 100 / handoffTotal |
---|
| 182 | |
---|
| 183 | maxBound.append(upperBoundConvoy) |
---|
| 184 | minBound.append(lowerBoundConvoy) |
---|
| 185 | expected.append(expectedConvoy) |
---|
| 186 | |
---|
| 187 | if args.Verbose: |
---|
| 188 | output('Convoying bounds: {:.2f}%-{:.2f}%, Expected convoying: {:.2f}%'.format(lowerBoundConvoy,upperBoundConvoy, expectedConvoy)) |
---|
| 189 | |
---|
| 190 | |
---|
| 191 | for i in range(args.MaxThreads): |
---|
| 192 | output("N: " + str(i+1)) |
---|
| 193 | |
---|
| 194 | goToLineByStartingChars(str(i+1)+' ') |
---|
| 195 | for j in range(args.RunsPerSize): |
---|
| 196 | readInMatrix(i+1) |
---|
| 197 | analyzeRun() |
---|
| 198 | reset() |
---|
| 199 | |
---|
| 200 | output('Mean convoying bounds: {:.2f}%-{:.2f}%, Mean expected convoying: {:.2f}%'.format(st.mean(minBound), st.mean(maxBound),st.mean(expected))) |
---|
| 201 | output('Median convoying bounds: {:.2f}%-{:.2f}%, Median expected convoying: {:.2f}%'.format(st.median(minBound), st.median(maxBound),st.median(expected))) |
---|
| 202 | output('') |
---|
| 203 | thdReset() |
---|
| 204 | |
---|
| 205 | readFile.close() |
---|
| 206 | |
---|
| 207 | if writeFile: |
---|
| 208 | writeFile.close() |
---|