| [8e64cb4] | 1 | #!/usr/bin/env python3
 | 
|---|
 | 2 | 
 | 
|---|
| [93e0603] | 3 | import os
 | 
|---|
 | 4 | import sys
 | 
|---|
 | 5 | import math
 | 
|---|
 | 6 | import argparse
 | 
|---|
 | 7 | import statistics as st
 | 
|---|
 | 8 | 
 | 
|---|
 | 9 | # Parsing Logic
 | 
|---|
 | 10 | parser = argparse.ArgumentParser(prog = 'GenConvoyStats',
 | 
|---|
 | 11 |     description = 'Analyzes handoff matrix output and uses Markov chain modelling to provide upper and lower bounds on the largest long term convoy',
 | 
|---|
 | 12 |     epilog = '')
 | 
|---|
 | 13 | 
 | 
|---|
 | 14 | parser.add_argument("Filename")
 | 
|---|
| [fd6e3a4] | 15 | parser.add_argument("-r", "--RunsPerNumThds", default=5, help="Number of trials per # of threads. Default is 5")
 | 
|---|
| [93e0603] | 16 | parser.add_argument("-m", "--MaxThreads", default=32, help="Maximum number of threads. Default is 32")
 | 
|---|
 | 17 | parser.add_argument("-o", "--OutputFile", default='', help="File to write output to")
 | 
|---|
 | 18 | parser.add_argument("-v", "--Verbose", help="Verbose output. Will print per run stats alongside aggregates", action='count', default=0)
 | 
|---|
| [8e64cb4] | 19 | parser.add_argument("-s", "--Single", help="Indicates that input only contains a single run. Assumes the number of threads is the value passed to -m", action='count', default=0)
 | 
|---|
| [93e0603] | 20 | 
 | 
|---|
 | 21 | args = parser.parse_args()
 | 
|---|
 | 22 | 
 | 
|---|
 | 23 | # handoff data
 | 
|---|
 | 24 | handoff = []
 | 
|---|
 | 25 | sumPerRow = [] 
 | 
|---|
 | 26 | handoffTotal = 0
 | 
|---|
 | 27 | maxCycleSum = 0
 | 
|---|
 | 28 | maxCycle = []
 | 
|---|
 | 29 | 
 | 
|---|
 | 30 | # per thread data (across runs)
 | 
|---|
 | 31 | minBound = []
 | 
|---|
 | 32 | maxBound = []
 | 
|---|
 | 33 | expected = []
 | 
|---|
 | 34 | 
 | 
|---|
 | 35 | # input file descriptor
 | 
|---|
 | 36 | readFile = open(args.Filename, "r")
 | 
|---|
 | 37 | 
 | 
|---|
 | 38 | writeFile = False
 | 
|---|
 | 39 | if args.OutputFile != '':
 | 
|---|
 | 40 |     writeFile = open(args.Filename, "w")
 | 
|---|
 | 41 | 
 | 
|---|
 | 42 | def reset():
 | 
|---|
 | 43 |     global handoff, sumPerRow, handoffTotal, readFile, maxCycleSum, maxCycle
 | 
|---|
 | 44 |     handoff.clear()
 | 
|---|
 | 45 |     sumPerRow.clear()
 | 
|---|
 | 46 |     handoffTotal = 0
 | 
|---|
 | 47 |     maxCycleSum = 0
 | 
|---|
 | 48 |     maxCycle.clear()
 | 
|---|
 | 49 | 
 | 
|---|
 | 50 | def thdReset():
 | 
|---|
 | 51 |     global minBound, maxBound, expected
 | 
|---|
 | 52 |     minBound.clear()
 | 
|---|
 | 53 |     maxBound.clear()
 | 
|---|
 | 54 |     expected.clear()
 | 
|---|
 | 55 | 
 | 
|---|
 | 56 | def output(string):
 | 
|---|
 | 57 |     global writeFile
 | 
|---|
 | 58 |     if writeFile:
 | 
|---|
 | 59 |         writeFile.write(string + '\n')
 | 
|---|
 | 60 |     
 | 
|---|
 | 61 |     print(string)
 | 
|---|
 | 62 | 
 | 
|---|
 | 63 | # reads in handoff matrix for a single run and accumulates row/matrix total at the same time
 | 
|---|
 | 64 | def readInMatrix(currThds):
 | 
|---|
 | 65 |     global handoff, sumPerRow, handoffTotal, readFile
 | 
|---|
 | 66 |     for i in range(currThds):
 | 
|---|
 | 67 |         line = readFile.readline()
 | 
|---|
 | 68 | 
 | 
|---|
 | 69 |         # Deal with EOF
 | 
|---|
 | 70 |         if not line:
 | 
|---|
 | 71 |             print("Incorrect arguments or file format: error in readInMatrix")
 | 
|---|
 | 72 |             sys.exit(1)
 | 
|---|
 | 73 | 
 | 
|---|
 | 74 |         # deal with any empty lines
 | 
|---|
 | 75 |         while line == '\n':
 | 
|---|
 | 76 |             line = readFile.readline()
 | 
|---|
 | 77 | 
 | 
|---|
 | 78 |         row = []
 | 
|---|
 | 79 |         rowSum = 0
 | 
|---|
 | 80 | 
 | 
|---|
 | 81 |         # convert row into list of ints and accumulate
 | 
|---|
 | 82 |         for val in line.replace(',','').split():
 | 
|---|
 | 83 |             row.append(int(val))
 | 
|---|
 | 84 |             rowSum += int(val)
 | 
|---|
 | 85 |         
 | 
|---|
 | 86 |         #store row in global state
 | 
|---|
 | 87 |         handoff.append(row)
 | 
|---|
 | 88 |         sumPerRow.append(rowSum)
 | 
|---|
 | 89 |         handoffTotal += rowSum
 | 
|---|
 | 90 | 
 | 
|---|
 | 91 | # moves current non empty line in readFile to line after line described by first two chars
 | 
|---|
 | 92 | def goToLineByStartingChars(string):
 | 
|---|
 | 93 |     global readFile
 | 
|---|
 | 94 |     # find start of relevant data
 | 
|---|
 | 95 |     line = ""
 | 
|---|
 | 96 |     while True:
 | 
|---|
 | 97 |         line = readFile.readline()
 | 
|---|
 | 98 |         if not line:
 | 
|---|
 | 99 |             print("Incorrect arguments or file format: error in goToLineByStartingChars")
 | 
|---|
 | 100 |             sys.exit(1)
 | 
|---|
 | 101 |         
 | 
|---|
 | 102 |         # strip after checking for EOF so we can distinguish EOF vs empty line
 | 
|---|
 | 103 |         line = line.strip()
 | 
|---|
 | 104 | 
 | 
|---|
 | 105 |         # discard lines until we see the column line in output
 | 
|---|
 | 106 |         if line and line[0:len(string)] == string:
 | 
|---|
 | 107 |             break
 | 
|---|
 | 108 | 
 | 
|---|
 | 109 | # recursively find largest cycle included a specific node using DFS
 | 
|---|
 | 110 | def findLargestCycle(startNode, currNode, visited, globalVisited, currSum, currCycle):
 | 
|---|
 | 111 |     global handoff, maxCycle, maxCycleSum
 | 
|---|
 | 112 | 
 | 
|---|
| [ebbe941] | 113 |     # print("CurrNode: " + str(currNode) + " StartNode: " + str(startNode))
 | 
|---|
| [93e0603] | 114 |     # print(currCycle)
 | 
|---|
 | 115 |     # if we visit a node from a previous call then return since we have already
 | 
|---|
 | 116 |     #   looked at all cycles containing that node
 | 
|---|
 | 117 |     if globalVisited[currNode]:
 | 
|---|
 | 118 |         # print("globalVisited")
 | 
|---|
 | 119 |         return
 | 
|---|
 | 120 | 
 | 
|---|
 | 121 |     # found a cycle
 | 
|---|
 | 122 |     if visited[currNode]:
 | 
|---|
| [ebbe941] | 123 |         # if currNode == startNode:
 | 
|---|
| [93e0603] | 124 |         # print("LocalVisited, curr: " + str(currSum) + ", max: " + str(maxCycleSum) + ", Start: " + str(startNode) )
 | 
|---|
 | 125 |         # if the cycle contains the start node check if it is our new max cycle
 | 
|---|
 | 126 |         if currNode == startNode and currSum > maxCycleSum:
 | 
|---|
 | 127 |             # print("NewMax")
 | 
|---|
 | 128 |             maxCycleSum = currSum
 | 
|---|
 | 129 |             maxCycle = currCycle.copy()
 | 
|---|
 | 130 |         return
 | 
|---|
 | 131 | 
 | 
|---|
 | 132 |     visited[currNode] = True
 | 
|---|
| [ebbe941] | 133 |     # print(visited)
 | 
|---|
| [93e0603] | 134 |     for idx, val in enumerate(handoff[currNode]):
 | 
|---|
 | 135 |         # continue if no edge
 | 
|---|
| [ebbe941] | 136 |         # if val == 0:
 | 
|---|
 | 137 |         #     continue
 | 
|---|
| [93e0603] | 138 | 
 | 
|---|
 | 139 |         currCycle.append(currNode)
 | 
|---|
 | 140 |         findLargestCycle(startNode, idx, visited, globalVisited, currSum + val, currCycle)
 | 
|---|
 | 141 |         currCycle.pop()
 | 
|---|
| [ebbe941] | 142 |     # print(currNode)
 | 
|---|
| [93e0603] | 143 |     visited[currNode] = False
 | 
|---|
 | 144 | 
 | 
|---|
 | 145 | 
 | 
|---|
 | 146 | 
 | 
|---|
 | 147 | def analyzeRun():
 | 
|---|
 | 148 |     global handoff, sumPerRow, handoffTotal, maxCycle, maxCycleSum, minBound, maxBound, expected
 | 
|---|
 | 149 |     currThds = len(handoff)
 | 
|---|
 | 150 |     
 | 
|---|
| [ebbe941] | 151 | 
 | 
|---|
 | 152 |     # This is NP-Hard and is currently not tractable so we just estimate the largest cycle
 | 
|---|
 | 153 |     # the code to do so is commented below
 | 
|---|
 | 154 | 
 | 
|---|
 | 155 |     # find largest cycle
 | 
|---|
 | 156 |     # globalVisited = [False] * currThds
 | 
|---|
 | 157 |     # for i in range(currThds):
 | 
|---|
 | 158 |     #     # print(i)
 | 
|---|
 | 159 |     #     visited = [False] * currThds
 | 
|---|
 | 160 |     #     findLargestCycle(i, i, visited, globalVisited, 0, [])
 | 
|---|
 | 161 |     #     globalVisited[i] = True
 | 
|---|
 | 162 | 
 | 
|---|
 | 163 |     # # calculate stats
 | 
|---|
 | 164 |     # cycleHandoffs = []
 | 
|---|
 | 165 |     # cycleHandoffs.append(handoff[maxCycle[-1]][maxCycle[0]])
 | 
|---|
 | 166 |     # sumOfMaxCycleRows = sumPerRow[maxCycle[0]]
 | 
|---|
 | 167 | 
 | 
|---|
 | 168 |     # # expected handoff is MULT P( handoff ) for each handoff in max cycle
 | 
|---|
 | 169 |     # expectedConvoy = handoff[maxCycle[-1]][maxCycle[0]] / sumPerRow[maxCycle[-1]]
 | 
|---|
 | 170 |     # for idx, val in enumerate(maxCycle[1:]):
 | 
|---|
 | 171 |     #     cycleHandoffs.append(handoff[maxCycle[idx]][val])
 | 
|---|
 | 172 |     #     sumOfMaxCycleRows += sumPerRow[val]
 | 
|---|
 | 173 |     #     expectedConvoy = expectedConvoy * handoff[maxCycle[idx]][val] / sumPerRow[maxCycle[idx]]
 | 
|---|
 | 174 | 
 | 
|---|
 | 175 |     # # adjust expected bound
 | 
|---|
 | 176 |     # # if max cycle contains all nodes, sumOfMaxCycleRows / handoffTotal == 1
 | 
|---|
 | 177 |     # # else this adjusts the expected bound to compensate for the cycle not visiting all nodes
 | 
|---|
 | 178 |     # # also mult by 100 to turn into percentage from decimal
 | 
|---|
 | 179 |     # expectedConvoy = expectedConvoy * sumOfMaxCycleRows * 100 / handoffTotal
 | 
|---|
 | 180 | 
 | 
|---|
 | 181 |     ################################
 | 
|---|
 | 182 | 
 | 
|---|
 | 183 |     #start of approximation code is here:
 | 
|---|
 | 184 | 
 | 
|---|
 | 185 |     # instead we take the maximum handoff from each row and assume it is all a cycle as an approximation
 | 
|---|
 | 186 |     maxCycle = []
 | 
|---|
 | 187 |     maxCycleSum = 0
 | 
|---|
| [93e0603] | 188 |     cycleHandoffs = []
 | 
|---|
| [ebbe941] | 189 |     expectedConvoy = 100
 | 
|---|
 | 190 |     for i in range(currThds):
 | 
|---|
 | 191 |         currMax = max(handoff[i])
 | 
|---|
 | 192 |         maxCycle.append(i)
 | 
|---|
 | 193 |         cycleHandoffs.append(currMax)
 | 
|---|
 | 194 |         maxCycleSum += currMax
 | 
|---|
 | 195 |         expectedConvoy = expectedConvoy * currMax / sumPerRow[i]
 | 
|---|
 | 196 | 
 | 
|---|
 | 197 |     sumOfMaxCycleRows = handoffTotal
 | 
|---|
 | 198 | 
 | 
|---|
 | 199 |     # end of approximation code
 | 
|---|
 | 200 |     ###################################################
 | 
|---|
| [93e0603] | 201 | 
 | 
|---|
 | 202 |     # upper bound is the percentage of all handoffs that occur in the maximum possible cycle
 | 
|---|
 | 203 |     # The cycle is possible min(cycleHandoffs) number of times
 | 
|---|
 | 204 |     maxFeasibleHandoff = min(cycleHandoffs)
 | 
|---|
 | 205 |     upperBoundConvoy = maxFeasibleHandoff * len(maxCycle) * 100 / handoffTotal
 | 
|---|
 | 206 |     lowerBoundConvoy = 1
 | 
|---|
 | 207 |     for val in maxCycle:
 | 
|---|
 | 208 |         lowerBoundConvoy = lowerBoundConvoy * maxFeasibleHandoff / sumPerRow[val]
 | 
|---|
 | 209 | 
 | 
|---|
 | 210 |     # adjust lower bound. See comment for expectedConvoy adjustment to explain why
 | 
|---|
 | 211 |     lowerBoundConvoy = lowerBoundConvoy * sumOfMaxCycleRows * 100 / handoffTotal
 | 
|---|
 | 212 | 
 | 
|---|
 | 213 |     maxBound.append(upperBoundConvoy)
 | 
|---|
 | 214 |     minBound.append(lowerBoundConvoy)
 | 
|---|
 | 215 |     expected.append(expectedConvoy)
 | 
|---|
 | 216 | 
 | 
|---|
| [8e64cb4] | 217 |     if args.Verbose or args.Single:
 | 
|---|
| [93e0603] | 218 |         output('Convoying bounds: {:.2f}%-{:.2f}%, Expected convoying: {:.2f}%'.format(lowerBoundConvoy,upperBoundConvoy, expectedConvoy))
 | 
|---|
 | 219 | 
 | 
|---|
 | 220 | 
 | 
|---|
| [8e64cb4] | 221 | if args.Single:
 | 
|---|
 | 222 |     output("N: " + str(args.MaxThreads))
 | 
|---|
 | 223 |     goToLineByStartingChars(str(args.MaxThreads)+' ')
 | 
|---|
 | 224 |     readInMatrix(int(args.MaxThreads))
 | 
|---|
 | 225 |     analyzeRun()
 | 
|---|
 | 226 |     reset()
 | 
|---|
 | 227 | else:
 | 
|---|
 | 228 |     for i in range(args.MaxThreads):
 | 
|---|
 | 229 |         output("N: " + str(i+1))
 | 
|---|
 | 230 | 
 | 
|---|
 | 231 |         goToLineByStartingChars(str(i+1)+' ')
 | 
|---|
 | 232 |         for j in range(args.RunsPerNumThds):
 | 
|---|
 | 233 |             readInMatrix(i+1)
 | 
|---|
 | 234 |             analyzeRun()
 | 
|---|
 | 235 |             reset()
 | 
|---|
 | 236 | 
 | 
|---|
 | 237 |         output('Mean convoying bounds: {:.2f}%-{:.2f}%, Mean expected convoying: {:.2f}%'.format(st.mean(minBound), st.mean(maxBound),st.mean(expected)))
 | 
|---|
 | 238 |         output('Median convoying bounds: {:.2f}%-{:.2f}%, Median expected convoying: {:.2f}%'.format(st.median(minBound), st.median(maxBound),st.median(expected)))
 | 
|---|
 | 239 |         output('')
 | 
|---|
 | 240 |         thdReset()
 | 
|---|
| [93e0603] | 241 | 
 | 
|---|
 | 242 | readFile.close()
 | 
|---|
 | 243 | 
 | 
|---|
 | 244 | if writeFile:
 | 
|---|
 | 245 |     writeFile.close()
 | 
|---|