Classes | |
class | Experiment |
class | IniParser |
class | BaseWorker |
class | RemoveOversimFiles |
class | GenerateIniFiles |
class | CollectStatistics |
Functions | |
def | __init__ |
def | generate |
def | __init__ |
def | parse |
def | remove |
def | delete |
def | generate |
def | delete |
def | processSca |
def | processVec |
def | printScaStats |
def | printVecStats |
def | percentile |
def | numeric_compare |
def | IsInt |
def | non_exp_repr |
def | makeScaDat |
Variables | |
tuple | reSemi = re.compile(r";") |
tuple | reLine = re.compile(r"(\S*)\s*=\s*(.*?)[\r\n]") |
tuple | reName = re.compile(r"^\s*\**\.*([\w\.]*)") |
tuple | reValue = re.compile(r"^\s*\"?([\w\.]*)\"?") |
tuple | reComb = re.compile(r"\$\$") |
tuple | reRuns = re.compile(r"^\s*runs\s*=\s*(\d+)") |
tuple | reVec = re.compile(r"^vector") |
tuple | reVecName = re.compile(r'(\d*) "(.*)" (".*") (\d*)\n$') |
tuple | reVecLine = re.compile(r"^([0-9]*)\t([0-9.]*)\t([0-9.]*)\n$") |
tuple | reRemDigits = re.compile(r"\[[0-9]*\]") |
tuple | reScaRun = re.compile(r"^run|^\s*$") |
tuple | reScaLine = re.compile(r'scalar\s*("[^"]*")\s*("[^"]*")\s*(-?[\d.]*e?-?\d*)') |
tuple | parser = optparse.OptionParser() |
Start of program. | |
iniFile = options.iniFile | |
tuple | experiments = IniParser(iniFile) |
name | |
fileNames | |
lines | |
parameterNames | |
parameterValues | |
runs | |
currentLines | |
allLines | |
globalVecBaskets | |
globalScaStat | |
currVecBaskets | |
currScaStat |
def generate::__init__ | ( | self, | ||
iniFile | ||||
) |
def generate::__init__ | ( | self, | ||
expLines, | ||||
expName | ||||
) |
00019 : 00020 # initialization 00021 self.name = expName # name of the experiment 00022 self.fileNames = [] # list of file names 00023 self.lines = [] # list of lists of lines for ini files 00024 self.parameterNames = [] # list of parameter names 00025 self.parameterValues = [] # list of lists of parameter values 00026 self.runs = 0 # number of runs for the experiment 00027 00028 self.currentLines = expLines[:] 00029 self.allLines = expLines 00030 00031 # parse the lines 00032 self.generate (0, expName) 00033 00034 if self.runs == 0: 00035 self.runs = self.defaultRuns 00036 00037 # cleanup 00038 del self.currentLines 00039 del self.allLines 00040 def generate(self, index, fileName):
def generate::delete | ( | self, | ||
experiments | ||||
) |
delete generated files
delete ini files
00254 : 00255 """ delete generated files """ 00256 logging.info("deleting dat files") 00257 00258 for exp in experiments: 00259 logging.info("-- " + exp.name) 00260 self.remove(exp.name + ".sca") 00261 self.remove(exp.name + ".vec.dat") 00262 for fileName in exp.fileNames: 00263 logging.info("---- " + fileName) 00264 self.remove(fileName + ".sca") 00265 self.remove(fileName + ".vec.dat") 00266 00267 for index, pName in enumerate(exp.parameterNames): 00268 values = exp.parameterValues[index] 00269 for value in values: # loop through values 00270 param = pName + "=" + value # parameter-value string in fileName 00271 for fileName in exp.fileNames: # search files that contain param 00272 if not fileName.find(param) < 0: 00273 stripped = fileName.replace(param, "") 00274 logging.info("---- " + stripped) 00275 self.remove(stripped + ".sca.dat") 00276 00277 logging.info("done") 00278 00279 def processSca(self, fileName):
def generate::delete | ( | ) |
delete ini files
00151 : 00152 logging.info("Deleting original sca and vec files") 00153 00154 for exp in experiments: # loop through experiments 00155 logging.info("-- " + exp.name) 00156 00157 for index, fileName in enumerate(exp.fileNames): # loop through parameter combinations 00158 00159 for run in range(1, exp.runs+1): # loop through number of runs 00160 baseFileName = fileName + "-run=" + str(run) 00161 self.remove(baseFileName + ".sca") 00162 self.remove(baseFileName + ".vec") 00163 self.remove(baseFileName + ".debug") 00164 00165 logging.info("Done") 00166 00167 class GenerateIniFiles(BaseWorker): # generates ini files
def generate::generate | ( | self, | ||
experiments | ||||
) |
generate ini files
collect statistics from sca and vec files
00170 : 00171 """ generate ini files """ 00172 logging.info("Generate ini files") 00173 00174 for exp in experiments: # loop through experiments 00175 logging.info("-- " + exp.name) 00176 00177 for index, fileName in enumerate(exp.fileNames): # loop through parameter combinations 00178 00179 for run in range(1, exp.runs+1): # loop through number of runs 00180 baseFileName = fileName + "-run=" + str(run) 00181 logging.info("---- " + baseFileName) 00182 00183 # write ini file 00184 file = open(baseFileName + ".ini", "w") 00185 00186 file.write("[Run " + str(run) + "]\n") 00187 file.write("description = " + baseFileName + "\n") 00188 file.write("output-scalar-file = " + baseFileName + ".sca\n") 00189 file.write("output-vector-file = " + baseFileName + ".vec\n") 00190 for line in exp.lines[index]: 00191 file.write(line) 00192 00193 file.close() 00194 00195 logging.info("Done") 00196 def delete(self, experiments):
def generate::generate | ( | self, | ||
index, | ||||
fileName | ||||
) |
parse the lines and generate parameter combinations
00041 : 00042 """ parse the lines and generate parameter combinations """ 00043 00044 if index >= len(self.allLines): # all lines processed 00045 self.fileNames.append(fileName) 00046 self.lines.append(self.currentLines[:]) 00047 return 00048 00049 line = self.allLines[index] 00050 00051 if self.reSemi.search(line): # semicolon found, parse parameters 00052 matches = self.reLine.search(line) 00053 name = matches.group(1) 00054 pName = self.reName.search(name).group(1) # pretty name for output 00055 values = self.reSemi.split(matches.group(2)) 00056 00057 if not pName in self.parameterNames: # add parameter to global parameter list 00058 self.parameterNames.append(pName) 00059 pValues = [] 00060 for value in values: 00061 pValues.append(self.reValue.search(value).group(1)) 00062 self.parameterValues.append(pValues) 00063 00064 for value in values: 00065 self.currentLines[index] = name + "=" + value + "\n" 00066 pValue = self.reValue.search(value).group(1) # pretty value for output 00067 self.generate (index + 1, fileName + "-" + pName + "=" + pValue) 00068 00069 elif self.reRuns.search(line): # line contains number of runs 00070 if not self.runs: # check if number of runs already set 00071 self.runs = int(self.reRuns.search(line).group(1)) 00072 self.generate(index + 1, fileName) 00073 00074 elif self.reComb.search(line): 00075 lastValue = self.reLine.search(self.currentLines[index-1]).group(2) 00076 thisName = self.reLine.search(line).group(1) 00077 self.currentLines[index] = thisName + "=" + lastValue + "\n" 00078 self.generate(index + 1, fileName) 00079 00080 else: 00081 # nothing to do, go into next recursion step 00082 self.generate(index + 1, fileName) 00083 class IniParser: # parses the ini file and creates experiments
def generate::IsInt | ( | self, | ||
str | ||||
) |
Is the given string an integer?
00461 : 00462 # from http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/52660 00463 """ Is the given string an integer? """ 00464 ok = 1 00465 try: 00466 num = int(str) 00467 except ValueError: 00468 ok = 0 00469 return ok 00470 def non_exp_repr(self, x):
def generate::makeScaDat | ( | self, | ||
exp | ||||
) |
create sca dat files
00513 : 00514 """ create sca dat files """ 00515 import scipy, scipy.stats # import scipy packages 00516 00517 for index, pName in enumerate(exp.parameterNames): # loop through parameters 00518 values = exp.parameterValues[index] 00519 00520 dict = {} 00521 entryCount = 0 00522 00523 for value in values: # loop through values 00524 param = pName + "=" + value # parameter-value string in fileName 00525 00526 for fileName in exp.fileNames: # search files that contain param 00527 if not fileName.find(param) < 0: 00528 00529 # process sca files 00530 self.currScaStat = {} 00531 self.globalScaStat = {} 00532 for run in range(1, exp.runs+1): # loop through number of runs 00533 baseFileName = fileName + "-run=" + str(run) 00534 self.processSca(baseFileName) 00535 00536 # check if sca files have different number of scalars (should not happen!) 00537 if entryCount == 0: 00538 entryCount = len(self.currScaStat) 00539 elif not entryCount == len(self.currScaStat) and entryCount >= 0: 00540 logging.error("Your sca files don't contain the same number of entries. Maybe you are comparing different Protocols oder using different applications?") 00541 entryCount = -1; 00542 00543 # save collected data 00544 stripped = fileName.replace(param, "") 00545 if not stripped in dict: 00546 dict[stripped] = {} 00547 00548 dict[stripped][value] = self.currScaStat.copy() 00549 00550 # write sca.dat files 00551 for stripped in dict: 00552 file = open (stripped + ".sca.dat", "w") 00553 first = 1 00554 00555 for value in sorted(dict[stripped].keys(), self.numeric_compare): 00556 stats = dict[stripped][value] 00557 line = "" 00558 00559 if first: # write header 00560 first = 0 00561 line += "#\tcolumn\tscalar name\n" 00562 line += "#\t1\t" + pName + "\n" 00563 count = 1 00564 for name in sorted(stats.keys()): 00565 count += 1 00566 line += "#\t" + str(count) + "\t" + name + "(mean)\n" 00567 count += 1 00568 line += "#\t" + str(count) + "\t" + name + "(10-quantile)\n" 00569 count += 1 00570 line += "#\t" + str(count) + "\t" + name + "(90-quantile)\n" 00571 00572 line += "#" * 70 + "\n" 00573 file.write(line) 00574 line = "" 00575 00576 line += value 00577 00578 for name in sorted(stats.keys()): 00579 list = stats[name] 00580 arr = scipy.array(list) 00581 line += "\t" + str(self.non_exp_repr(arr.mean())) 00582 line += "\t" + str(self.non_exp_repr(self.percentile(sorted(list), 0.1))) 00583 line += "\t" + str(self.non_exp_repr(self.percentile(sorted(list), 0.9))) 00584 00585 file.write(line + "\n") 00586 00587 file.close() 00588 00589 00590 00591 00592 ################################################################
def generate::non_exp_repr | ( | self, | ||
x | ||||
) |
Return a floating point representation without exponential notation. Result is a string that satisfies: float(result)==float(x) and 'e' not in result. >>> non_exp_repr(1.234e-025) '0.00000000000000000000000012339999999999999' >>> non_exp_repr(-1.234e+018) '-1234000000000000000.0' >>> for e in xrange(-50,51): ... for m in (1.234, 0.018, -0.89, -75.59, 100/7.0, -909): ... x = m * 10 ** e ... s = non_exp_repr(x) ... assert 'e' not in s ... assert float(x) == float(s)
00471 : 00472 # from http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/358361 00473 """Return a floating point representation without exponential notation. 00474 00475 Result is a string that satisfies: 00476 float(result)==float(x) and 'e' not in result. 00477 00478 >>> non_exp_repr(1.234e-025) 00479 '0.00000000000000000000000012339999999999999' 00480 >>> non_exp_repr(-1.234e+018) 00481 '-1234000000000000000.0' 00482 00483 >>> for e in xrange(-50,51): 00484 ... for m in (1.234, 0.018, -0.89, -75.59, 100/7.0, -909): 00485 ... x = m * 10 ** e 00486 ... s = non_exp_repr(x) 00487 ... assert 'e' not in s 00488 ... assert float(x) == float(s) 00489 00490 """ 00491 s = repr(float(x)) 00492 e_loc = s.lower().find('e') 00493 if e_loc == -1: 00494 return s 00495 00496 mantissa = s[:e_loc].replace('.', '') 00497 exp = int(s[e_loc+1:]) 00498 00499 assert s[1] == '.' or s[0] == '-' and s[2] == '.', "Unsupported format" 00500 sign = '' 00501 if mantissa[0] == '-': 00502 sign = '-' 00503 mantissa = mantissa[1:] 00504 00505 digitsafter = len(mantissa) - 1 # num digits after the decimal point 00506 if exp >= digitsafter: 00507 return sign + mantissa + '0' * (exp - digitsafter) + '.0' 00508 elif exp <= -1: 00509 return sign + '0.' + '0' * (-exp - 1) + mantissa 00510 ip = exp + 1 # insertion point 00511 return sign + mantissa[:ip] + '.' + mantissa[ip:] 00512
def generate::numeric_compare | ( | self, | ||
x, | ||||
y | ||||
) |
00446 : 00447 # from http://wiki.python.org/moin/HowTo/Sorting 00448 if not self.IsInt(x) or not self.IsInt(y): 00449 return cmp(x, y) #use default compare function 00450 00451 x = int(x) 00452 y = int(y) 00453 00454 if x > y: 00455 return 1 00456 elif x == y: 00457 return 0 00458 else: # x < y 00459 return -1 00460 def IsInt(self, str):
def generate::parse | ( | self | ) |
parse ini file
00090 : 00091 """parse ini file""" 00092 00093 logging.info("parsing ini file") 00094 00095 # compile regular expressions 00096 empty = re.compile(r"^\s*$|^\#") # empty or comment line 00097 sect = re.compile(r"^\s*\[(\w*)\]") # new section 00098 inval = re.compile(r"^\s*\[") # invalid section 00099 00100 expLines = {} 00101 00102 # parse file 00103 file = open (iniFile, "r") 00104 00105 for line in file: 00106 if empty.search(line): # skip empty lines 00107 continue 00108 00109 if sect.search(line): # new section starting 00110 currentSect = sect.search(line).group(1) 00111 logging.info("-- parsing section [" + currentSect + "]") 00112 if not currentSect in expLines: 00113 expLines[currentSect] = [] 00114 continue 00115 00116 elif inval.search(line): # invalid section 00117 logging.error("invalid characters found in section " + line) 00118 quit() 00119 00120 if currentSect == "": # invalid file format 00121 logging.error("lines found before a section started in ini file") 00122 quit() 00123 00124 expLines[currentSect].append(line) 00125 00126 file.close() 00127 00128 logging.info("-- generating experiments") 00129 00130 experiments = [] 00131 for name in expLines.keys(): 00132 if name == "Global": 00133 continue 00134 lines = expLines[name][:] 00135 if "Global" in expLines: # add Global lines to each experiment 00136 lines.extend(expLines["Global"]) 00137 00138 experiments.append(Experiment(lines, name)) 00139 00140 logging.info("Done") 00141 return experiments 00142 00143 class BaseWorker: # parent class for "worker" classes (for common methods)
def generate::percentile | ( | self, | ||
N, | ||||
percent, | ||||
key = lambda x:x | ||||
) |
Find the percentile of a list of values. Source: http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/511478 @parameter N - is a list of values. Note N MUST BE already sorted. @parameter percent - a float value from 0.0 to 1.0. @parameter key - optional key function to compute value from each element of N. @return - the percentile of the values
00424 :x): 00425 """ 00426 Find the percentile of a list of values. 00427 Source: http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/511478 00428 00429 @parameter N - is a list of values. Note N MUST BE already sorted. 00430 @parameter percent - a float value from 0.0 to 1.0. 00431 @parameter key - optional key function to compute value from each element of N. 00432 00433 @return - the percentile of the values 00434 """ 00435 if not N: 00436 return None 00437 k = (len(N)-1) * percent 00438 f = math.floor(k) 00439 c = math.ceil(k) 00440 if f == c: 00441 return key(N[int(k)]) 00442 d0 = key(N[int(f)]) * (k-f) 00443 d1 = key(N[int(c)]) * (c-k) 00444 return d0+d1 00445
def generate::printScaStats | ( | self, | ||
fileName, | ||||
stats | ||||
) |
print scalar data
00369 : 00370 """ print scalar data """ 00371 import scipy, scipy.stats # import scipy packages 00372 00373 file = open (fileName + ".sca", "w") 00374 00375 for name in sorted(stats.keys()): 00376 arr = scipy.array(stats[name]) 00377 value = str(self.non_exp_repr(arr.mean())) 00378 file.write("scalar " + name + "\t" + value + "\n") 00379 00380 file.close() 00381 def printVecStats(self, fileName, baskets):
def generate::printVecStats | ( | self, | ||
fileName, | ||||
baskets | ||||
) |
print vector data
00382 : 00383 """ print vector data """ 00384 import scipy, scipy.stats # import scipy packages 00385 00386 vecCounter = 1 00387 maxIndex = 0 00388 00389 file = open (fileName + ".vec.dat", "w") 00390 00391 # header 00392 line = "#\tColumn\tVector\n" 00393 line += "#\t1\tsimulation time\n" 00394 for vector in sorted(baskets.keys()): 00395 vecCounter += 1 00396 line += "#\t" + str(vecCounter) + "\t" + vector + "(mean)\n" 00397 vecCounter += 1 00398 line += "#\t" + str(vecCounter) + "\t" + vector + "(10-quantile)\n" 00399 vecCounter += 1 00400 line += "#\t" + str(vecCounter) + "\t" + vector + "(90-quantile)\n" 00401 if maxIndex < max(baskets[vector].keys()): 00402 maxIndex = int(max(baskets[vector].keys())) 00403 00404 line += "#" * 70 + "\n" 00405 file.write(line) 00406 00407 # data 00408 for index in range(0, maxIndex+self.vecInterval, self.vecInterval): # loop through buckets 00409 line = str(index) 00410 for vector in sorted(baskets.keys()): # loop through vectors 00411 if index in baskets[vector]: 00412 list = baskets[vector][index] 00413 arr = scipy.array(list) 00414 line += "\t" + str(self.non_exp_repr(arr.mean())) 00415 line += "\t" + str(self.non_exp_repr(self.percentile(sorted(list), 0.1))) 00416 line += "\t" + str(self.non_exp_repr(self.percentile(sorted(list), 0.9))) 00417 else: 00418 line += "\t0\t0\t0" 00419 00420 file.write(line + "\n") 00421 00422 file.close() 00423 def percentile(self, N, percent, key=lambda x:x):
def generate::processSca | ( | self, | ||
fileName | ||||
) |
parse sca file
00280 : 00281 """ parse sca file """ 00282 import scipy, scipy.stats # import scipy packages 00283 00284 fileName = fileName + ".sca" 00285 00286 if not os.access(fileName, os.F_OK): 00287 logging.warn("Unable to parse file " + fileName + ": file not found") 00288 return 00289 00290 file = open(fileName, "r") 00291 00292 for line in file: 00293 if self.reScaRun.search(line): 00294 continue 00295 00296 line = self.reRemDigits.sub("[]", line) 00297 matches = self.reScaLine.search(line) 00298 name = matches.group(1) + "\t" + matches.group(2) 00299 value = float(matches.group(3)) 00300 00301 if not name in self.currScaStat: 00302 self.currScaStat[name] = [] 00303 if not name in self.globalScaStat: 00304 self.globalScaStat[name] = [] 00305 00306 self.currScaStat[name].append(value) 00307 self.globalScaStat[name].append(value) 00308 00309 file.close() 00310 def processVec(self, fileName):
def generate::processVec | ( | self, | ||
fileName | ||||
) |
parse vec file
00311 : 00312 """ parse vec file """ 00313 import scipy, scipy.stats # import scipy packages 00314 00315 fileName = fileName + ".vec" 00316 00317 vectorNames = {} 00318 baskets = {} 00319 00320 if not os.access(fileName, os.F_OK): 00321 logging.warn("Unable to parse file " + fileName + ": file not found") 00322 return 00323 00324 # parse file 00325 file = open(fileName, "r") 00326 00327 for line in file: 00328 if self.reVec.search(line): # new vector starting 00329 line = self.reRemDigits.sub("[]", line) 00330 matches = self.reVecName.search(line) 00331 vectorNames[matches.group(1)] = matches.group(2) + " " + matches.group(3) 00332 00333 elif self.reVecLine.search(line): # data line 00334 matches = self.reVecLine.search(line) 00335 index = math.floor(float(matches.group(2)) / self.vecInterval) * self.vecInterval 00336 00337 # initialize lists if needed 00338 if not vectorNames[matches.group(1)] in baskets: 00339 baskets[vectorNames[matches.group(1)]] = {} 00340 if not index in baskets[vectorNames[matches.group(1)]]: 00341 baskets[vectorNames[matches.group(1)]][index] = [] 00342 00343 # add value to basket 00344 baskets[vectorNames[matches.group(1)]][index].append(float(matches.group(3))) 00345 00346 file.close() 00347 00348 # summarize data 00349 for vector in baskets: 00350 # initialize global baskets if needed 00351 if not vector in self.globalVecBaskets: 00352 self.globalVecBaskets[vector] = {} 00353 if not vector in self.currVecBaskets: 00354 self.currVecBaskets[vector] = {} 00355 00356 for index in baskets[vector]: 00357 # initialize global baskets if needed 00358 if not index in self.globalVecBaskets[vector]: 00359 self.globalVecBaskets[vector][index] = [] 00360 if not index in self.currVecBaskets[vector]: 00361 self.currVecBaskets[vector][index] = [] 00362 00363 arr = scipy.array(baskets[vector][index]) 00364 mean = arr.mean() 00365 self.globalVecBaskets[vector][index].append(mean) 00366 self.currVecBaskets[vector][index].append(mean) 00367 00368 def printScaStats(self, fileName, stats):
def generate::remove | ( | self, | ||
file | ||||
) |
tuple generate::experiments = IniParser(iniFile) |
generate::iniFile = options.iniFile |
Referenced by BaseApp::finish(), BaseApp::getThisCompType(), and XmlRpc::XmlRpcValue::structFromXml().
tuple generate::parser = optparse.OptionParser() |
Start of program.
Parse Command Line
tuple generate::reComb = re.compile(r"\$\$") |
tuple generate::reLine = re.compile(r"(\S*)\s*=\s*(.*?)[\r\n]") |
tuple generate::reName = re.compile(r"^\s*\**\.*([\w\.]*)") |
tuple generate::reRemDigits = re.compile(r"\[[0-9]*\]") |
tuple generate::reRuns = re.compile(r"^\s*runs\s*=\s*(\d+)") |
tuple generate::reScaLine = re.compile(r'scalar\s*("[^"]*")\s*("[^"]*")\s*(-?[\d.]*e?-?\d*)') |
tuple generate::reScaRun = re.compile(r"^run|^\s*$") |
tuple generate::reSemi = re.compile(r";") |
class to store data for each experiment
tuple generate::reValue = re.compile(r"^\s*\"?([\w\.]*)\"?") |
tuple generate::reVec = re.compile(r"^vector") |
tuple generate::reVecLine = re.compile(r"^([0-9]*)\t([0-9.]*)\t([0-9.]*)\n$") |
tuple generate::reVecName = re.compile(r'(\d*) "(.*)" (".*") (\d*)\n$') |