import pandas as pd import numpy as np import sys import os def parseTestCorpus(dt): dt[['test-slug', 'sut-platform', 'operation', 'sut-cfa-level', 'sut-cfa-sharing', 'op-alloc']] = dt['test'].str.strip().str.split('-', expand=True) dt['sut'] = dt[['sut-platform', 'sut-cfa-level', 'sut-cfa-sharing', 'op-alloc']].agg('-'.join, axis=1) dt[['corpus-basename', 'corpus-ext']] = dt['corpus'].str.strip().str.rsplit('.', n=1, expand=True) dt[['corpus-slug', 'corpus-nstrs-tgt', 'corpus-meanlen-tgt', 'corpus-runid']] = dt['corpus-basename'].str.strip().str.split('-', expand=True) dt["corpus-nstrs-tgt"] = pd.to_numeric(dt["corpus-nstrs-tgt"]) dt["corpus-meanlen-tgt"] = pd.to_numeric(dt["corpus-meanlen-tgt"]) dt[['corpus-relscale', 'corpus-seed', 'corpus-offset-instr']] = dt['corpus-runid'].str.strip().str.split('+', expand=True) dt["corpus-relscale"] = pd.to_numeric(dt["corpus-relscale"]) def loadParseTimingData( infileLocal, xClasNames=[], xClasDtypes={}, xFactNames=[], xFactDtypes={} ): infile = os.path.dirname(os.path.abspath(__file__)) + '/../benchmarks/string/' + infileLocal timings = pd.read_csv( infile, names=['test', 'corpus'] + xClasNames + [ 'concatsPerReset', 'corpusItemCountAct', 'corpusMeanLenCharsAct', 'concatDoneActualCount', 'execTimeActualSec'] + xFactNames, dtype={**xClasDtypes, **xFactDtypes, **{ 'test': str, 'corpus': str, 'concatsPerReset': 'Int64', # allows missing; https://stackoverflow.com/a/70626154 'corpusItemCountAct': np.int64, 'corpusMeanLenCharsAct': np.float64, 'concatDoneActualCount': np.int64, 'execTimeActualSec': np.float64, 'Q': np.int64}}, na_values=['xxx'], ) # print(timings.head()) # project: parse executable and corpus names parseTestCorpus(timings) # project: calculate fact timings['op-duration-s'] = timings['execTimeActualSec'] / timings['concatDoneActualCount'] timings['op-duration-ns'] = timings['op-duration-s'] * 1000 * 1000 * 1000 return timings def loadParseSizingData( infileLocal, xClasNames=[], xClasDtypes={}, xFactNames=[], xFactDtypes={} ): infile = os.path.dirname(os.path.abspath(__file__)) + '/../benchmarks/string/' + infileLocal sizings = pd.read_csv( infile, sep=' ', names=['test', 'corpus'] + xClasNames + ['ppid', 'pid', 'malloc_count', 'free_count', 'calloc_count', 'realloc_count', 'requsted_mem(B)', 'current_req_mem(B)', 'hw_cur_req_mem(B)', 'text', 'heap', 'mmap_so', 'mmap', 'stack', 'vvar', 'vdso', 'vsyscall', 'unfigured', 'total_dynamic', 'epoch_timestamp(ms)'] + xFactNames, dtype={**xClasDtypes, **xFactDtypes, **{ 'test': str, 'corpus': str, 'ppid': np.int64, 'pid': np.int64, 'malloc_count': np.int64, 'free_count': np.int64, 'calloc_count': np.int64, 'realloc_count': np.int64, 'requsted_mem(B)': np.int64, 'current_req_mem(B)': np.int64, 'hw_cur_req_mem(B)': np.int64, 'text': np.int64, 'heap': np.int64, 'mmap_so': np.int64, 'mmap': np.int64, 'stack': np.int64, 'vvar': np.int64, 'vdso': np.int64, 'vsyscall': np.int64, 'unfigured': np.int64, 'total_dynamic': np.int64, 'epoch_timestamp(ms)': np.int64}} ) parseTestCorpus(sizings) return sizings def loadParseAttribData( infileLocal ): infile = os.path.dirname(os.path.abspath(__file__)) + '/../benchmarks/string/' + infileLocal attribs = pd.read_csv( infile, sep=' ', names=[ "test", "corpus", "expansion", "category", "samples_in_category", "total_samples", "fraction", "sources"], dtype={ "test": str, "corpus": str, "expansion": np.float64, "category": str, "samples_in_category": np.int64, "total_samples": np.int64, "fraction": np.float64, "sources": str} ) parseTestCorpus(attribs) return attribs