source: benchmark/convoy/genConvoyStats.py @ df56e25

Last change on this file since df56e25 was ebbe941, checked in by caparsons <caparson@…>, 2 years ago

changed algorithm to approximation since max cycle finding is NP-hard

  • Property mode set to 100644
File size: 8.2 KB
Line 
1#!/usr/bin/env python3
2
3import os
4import sys
5import math
6import argparse
7import statistics as st
8
9# Parsing Logic
10parser = argparse.ArgumentParser(prog = 'GenConvoyStats',
11    description = 'Analyzes handoff matrix output and uses Markov chain modelling to provide upper and lower bounds on the largest long term convoy',
12    epilog = '')
13
14parser.add_argument("Filename")
15parser.add_argument("-r", "--RunsPerNumThds", default=5, help="Number of trials per # of threads. Default is 5")
16parser.add_argument("-m", "--MaxThreads", default=32, help="Maximum number of threads. Default is 32")
17parser.add_argument("-o", "--OutputFile", default='', help="File to write output to")
18parser.add_argument("-v", "--Verbose", help="Verbose output. Will print per run stats alongside aggregates", action='count', default=0)
19parser.add_argument("-s", "--Single", help="Indicates that input only contains a single run. Assumes the number of threads is the value passed to -m", action='count', default=0)
20
21args = parser.parse_args()
22
23# handoff data
24handoff = []
25sumPerRow = [] 
26handoffTotal = 0
27maxCycleSum = 0
28maxCycle = []
29
30# per thread data (across runs)
31minBound = []
32maxBound = []
33expected = []
34
35# input file descriptor
36readFile = open(args.Filename, "r")
37
38writeFile = False
39if args.OutputFile != '':
40    writeFile = open(args.Filename, "w")
41
42def reset():
43    global handoff, sumPerRow, handoffTotal, readFile, maxCycleSum, maxCycle
44    handoff.clear()
45    sumPerRow.clear()
46    handoffTotal = 0
47    maxCycleSum = 0
48    maxCycle.clear()
49
50def thdReset():
51    global minBound, maxBound, expected
52    minBound.clear()
53    maxBound.clear()
54    expected.clear()
55
56def output(string):
57    global writeFile
58    if writeFile:
59        writeFile.write(string + '\n')
60   
61    print(string)
62
63# reads in handoff matrix for a single run and accumulates row/matrix total at the same time
64def readInMatrix(currThds):
65    global handoff, sumPerRow, handoffTotal, readFile
66    for i in range(currThds):
67        line = readFile.readline()
68
69        # Deal with EOF
70        if not line:
71            print("Incorrect arguments or file format: error in readInMatrix")
72            sys.exit(1)
73
74        # deal with any empty lines
75        while line == '\n':
76            line = readFile.readline()
77
78        row = []
79        rowSum = 0
80
81        # convert row into list of ints and accumulate
82        for val in line.replace(',','').split():
83            row.append(int(val))
84            rowSum += int(val)
85       
86        #store row in global state
87        handoff.append(row)
88        sumPerRow.append(rowSum)
89        handoffTotal += rowSum
90
91# moves current non empty line in readFile to line after line described by first two chars
92def goToLineByStartingChars(string):
93    global readFile
94    # find start of relevant data
95    line = ""
96    while True:
97        line = readFile.readline()
98        if not line:
99            print("Incorrect arguments or file format: error in goToLineByStartingChars")
100            sys.exit(1)
101       
102        # strip after checking for EOF so we can distinguish EOF vs empty line
103        line = line.strip()
104
105        # discard lines until we see the column line in output
106        if line and line[0:len(string)] == string:
107            break
108
109# recursively find largest cycle included a specific node using DFS
110def findLargestCycle(startNode, currNode, visited, globalVisited, currSum, currCycle):
111    global handoff, maxCycle, maxCycleSum
112
113    # print("CurrNode: " + str(currNode) + " StartNode: " + str(startNode))
114    # print(currCycle)
115    # if we visit a node from a previous call then return since we have already
116    #   looked at all cycles containing that node
117    if globalVisited[currNode]:
118        # print("globalVisited")
119        return
120
121    # found a cycle
122    if visited[currNode]:
123        # if currNode == startNode:
124        # print("LocalVisited, curr: " + str(currSum) + ", max: " + str(maxCycleSum) + ", Start: " + str(startNode) )
125        # if the cycle contains the start node check if it is our new max cycle
126        if currNode == startNode and currSum > maxCycleSum:
127            # print("NewMax")
128            maxCycleSum = currSum
129            maxCycle = currCycle.copy()
130        return
131
132    visited[currNode] = True
133    # print(visited)
134    for idx, val in enumerate(handoff[currNode]):
135        # continue if no edge
136        # if val == 0:
137        #     continue
138
139        currCycle.append(currNode)
140        findLargestCycle(startNode, idx, visited, globalVisited, currSum + val, currCycle)
141        currCycle.pop()
142    # print(currNode)
143    visited[currNode] = False
144
145
146
147def analyzeRun():
148    global handoff, sumPerRow, handoffTotal, maxCycle, maxCycleSum, minBound, maxBound, expected
149    currThds = len(handoff)
150   
151
152    # This is NP-Hard and is currently not tractable so we just estimate the largest cycle
153    # the code to do so is commented below
154
155    # find largest cycle
156    # globalVisited = [False] * currThds
157    # for i in range(currThds):
158    #     # print(i)
159    #     visited = [False] * currThds
160    #     findLargestCycle(i, i, visited, globalVisited, 0, [])
161    #     globalVisited[i] = True
162
163    # # calculate stats
164    # cycleHandoffs = []
165    # cycleHandoffs.append(handoff[maxCycle[-1]][maxCycle[0]])
166    # sumOfMaxCycleRows = sumPerRow[maxCycle[0]]
167
168    # # expected handoff is MULT P( handoff ) for each handoff in max cycle
169    # expectedConvoy = handoff[maxCycle[-1]][maxCycle[0]] / sumPerRow[maxCycle[-1]]
170    # for idx, val in enumerate(maxCycle[1:]):
171    #     cycleHandoffs.append(handoff[maxCycle[idx]][val])
172    #     sumOfMaxCycleRows += sumPerRow[val]
173    #     expectedConvoy = expectedConvoy * handoff[maxCycle[idx]][val] / sumPerRow[maxCycle[idx]]
174
175    # # adjust expected bound
176    # # if max cycle contains all nodes, sumOfMaxCycleRows / handoffTotal == 1
177    # # else this adjusts the expected bound to compensate for the cycle not visiting all nodes
178    # # also mult by 100 to turn into percentage from decimal
179    # expectedConvoy = expectedConvoy * sumOfMaxCycleRows * 100 / handoffTotal
180
181    ################################
182
183    #start of approximation code is here:
184
185    # instead we take the maximum handoff from each row and assume it is all a cycle as an approximation
186    maxCycle = []
187    maxCycleSum = 0
188    cycleHandoffs = []
189    expectedConvoy = 100
190    for i in range(currThds):
191        currMax = max(handoff[i])
192        maxCycle.append(i)
193        cycleHandoffs.append(currMax)
194        maxCycleSum += currMax
195        expectedConvoy = expectedConvoy * currMax / sumPerRow[i]
196
197    sumOfMaxCycleRows = handoffTotal
198
199    # end of approximation code
200    ###################################################
201
202    # upper bound is the percentage of all handoffs that occur in the maximum possible cycle
203    # The cycle is possible min(cycleHandoffs) number of times
204    maxFeasibleHandoff = min(cycleHandoffs)
205    upperBoundConvoy = maxFeasibleHandoff * len(maxCycle) * 100 / handoffTotal
206    lowerBoundConvoy = 1
207    for val in maxCycle:
208        lowerBoundConvoy = lowerBoundConvoy * maxFeasibleHandoff / sumPerRow[val]
209
210    # adjust lower bound. See comment for expectedConvoy adjustment to explain why
211    lowerBoundConvoy = lowerBoundConvoy * sumOfMaxCycleRows * 100 / handoffTotal
212
213    maxBound.append(upperBoundConvoy)
214    minBound.append(lowerBoundConvoy)
215    expected.append(expectedConvoy)
216
217    if args.Verbose or args.Single:
218        output('Convoying bounds: {:.2f}%-{:.2f}%, Expected convoying: {:.2f}%'.format(lowerBoundConvoy,upperBoundConvoy, expectedConvoy))
219
220
221if args.Single:
222    output("N: " + str(args.MaxThreads))
223    goToLineByStartingChars(str(args.MaxThreads)+' ')
224    readInMatrix(int(args.MaxThreads))
225    analyzeRun()
226    reset()
227else:
228    for i in range(args.MaxThreads):
229        output("N: " + str(i+1))
230
231        goToLineByStartingChars(str(i+1)+' ')
232        for j in range(args.RunsPerNumThds):
233            readInMatrix(i+1)
234            analyzeRun()
235            reset()
236
237        output('Mean convoying bounds: {:.2f}%-{:.2f}%, Mean expected convoying: {:.2f}%'.format(st.mean(minBound), st.mean(maxBound),st.mean(expected)))
238        output('Median convoying bounds: {:.2f}%-{:.2f}%, Median expected convoying: {:.2f}%'.format(st.median(minBound), st.median(maxBound),st.median(expected)))
239        output('')
240        thdReset()
241
242readFile.close()
243
244if writeFile:
245    writeFile.close()
Note: See TracBrowser for help on using the repository browser.