generate Namespace Reference


Classes

class  Experiment
class  IniParser
class  BaseWorker
class  RemoveOversimFiles
class  GenerateIniFiles
class  CollectStatistics

Functions

def __init__
def generate
def __init__
def parse
def remove
def delete
def generate
def delete
def processSca
def processVec
def printScaStats
def printVecStats
def percentile
def numeric_compare
def IsInt
def non_exp_repr
def makeScaDat

Variables

tuple reSemi = re.compile(r";")
tuple reLine = re.compile(r"(\S*)\s*=\s*(.*?)[\r\n]")
tuple reName = re.compile(r"^\s*\**\.*([\w\.]*)")
tuple reValue = re.compile(r"^\s*\"?([\w\.]*)\"?")
tuple reComb = re.compile(r"\$\$")
tuple reRuns = re.compile(r"^\s*runs\s*=\s*(\d+)")
tuple reVec = re.compile(r"^vector")
tuple reVecName = re.compile(r'(\d*) "(.*)" (".*") (\d*)\n$')
tuple reVecLine = re.compile(r"^([0-9]*)\t([0-9.]*)\t([0-9.]*)\n$")
tuple reRemDigits = re.compile(r"\[[0-9]*\]")
tuple reScaRun = re.compile(r"^run|^\s*$")
tuple reScaLine = re.compile(r'scalar\s*("[^"]*")\s*("[^"]*")\s*(-?[\d.]*e?-?\d*)')
tuple parser = optparse.OptionParser()
 Start of program.
 iniFile = options.iniFile
tuple experiments = IniParser(iniFile)
 name
 fileNames
 lines
 parameterNames
 parameterValues
 runs
 currentLines
 allLines
 globalVecBaskets
 globalScaStat
 currVecBaskets
 currScaStat


Function Documentation

def generate::__init__ (   self,
  iniFile 
)

parses the ini file and generates experiments
00087                                :
00088         self.iniFile = iniFile
00089         
    def parse(self):

def generate::__init__ (   self,
  expLines,
  expName 
)

00019                                          :
00020         # initialization
00021         self.name = expName # name of the experiment
00022         self.fileNames = [] # list of file names
00023         self.lines = [] # list of lists of lines for ini files
00024         self.parameterNames = [] # list of parameter names
00025         self.parameterValues =  [] # list of lists of parameter values
00026         self.runs = 0 # number of runs for the experiment
00027         
00028         self.currentLines = expLines[:]
00029         self.allLines = expLines
00030         
00031         # parse the lines
00032         self.generate (0, expName)
00033         
00034         if self.runs == 0:
00035             self.runs = self.defaultRuns
00036         
00037         # cleanup
00038         del self.currentLines
00039         del self.allLines        
00040         
    def generate(self, index, fileName):

def generate::delete (   self,
  experiments 
)

delete generated files 

delete ini files 
00254                                  :
00255         """ delete generated files """
00256         logging.info("deleting dat files")
00257         
00258         for exp in experiments:
00259             logging.info("-- " + exp.name)
00260             self.remove(exp.name + ".sca")
00261             self.remove(exp.name + ".vec.dat")
00262             for fileName in exp.fileNames:
00263                 logging.info("---- " + fileName)
00264                 self.remove(fileName + ".sca")
00265                 self.remove(fileName + ".vec.dat")
00266                 
00267             for index, pName in enumerate(exp.parameterNames):
00268                 values = exp.parameterValues[index]
00269                 for value in values: # loop through values
00270                     param = pName + "=" + value # parameter-value string in fileName
00271                     for fileName in exp.fileNames: # search files that contain param
00272                         if not fileName.find(param) < 0:
00273                             stripped = fileName.replace(param, "")
00274                             logging.info("---- " + stripped)
00275                             self.remove(stripped + ".sca.dat")
00276                             
00277         logging.info("done")
00278         
00279         
    def processSca(self, fileName):

def generate::delete (  ) 

delete ini files 
00151                                  :
00152         logging.info("Deleting original sca and vec files")
00153         
00154         for exp in experiments: # loop through experiments
00155             logging.info("-- " + exp.name)
00156             
00157             for index, fileName in enumerate(exp.fileNames): # loop through parameter combinations
00158                 
00159                 for run in range(1, exp.runs+1): # loop through number of runs
00160                     baseFileName = fileName + "-run=" + str(run)
00161                     self.remove(baseFileName + ".sca")
00162                     self.remove(baseFileName + ".vec")
00163                     self.remove(baseFileName + ".debug")
00164                     
00165         logging.info("Done")
00166         
00167         
class GenerateIniFiles(BaseWorker): # generates ini files

def generate::generate (   self,
  experiments 
)

generate ini files 

collect statistics from sca and vec files 
00170                                    :
00171         """ generate ini files """
00172         logging.info("Generate ini files")
00173         
00174         for exp in experiments: # loop through experiments
00175             logging.info("-- " + exp.name)
00176             
00177             for index, fileName in enumerate(exp.fileNames): # loop through parameter combinations
00178 
00179                 for run in range(1, exp.runs+1): # loop through number of runs
00180                     baseFileName = fileName + "-run=" + str(run)
00181                     logging.info("---- " + baseFileName)
00182                     
00183                     # write ini file
00184                     file = open(baseFileName + ".ini", "w")
00185                     
00186                     file.write("[Run " + str(run) + "]\n")
00187                     file.write("description = " + baseFileName + "\n")
00188                     file.write("output-scalar-file = " + baseFileName + ".sca\n")
00189                     file.write("output-vector-file = " + baseFileName + ".vec\n")
00190                     for line in exp.lines[index]:
00191                         file.write(line)
00192                         
00193                     file.close()
00194                     
00195         logging.info("Done")
00196         
    def delete(self, experiments):

def generate::generate (   self,
  index,
  fileName 
)

parse the lines and generate parameter combinations 
00041                                        :
00042         """ parse the lines and generate parameter combinations """
00043         
00044         if index >= len(self.allLines): # all lines processed
00045             self.fileNames.append(fileName)
00046             self.lines.append(self.currentLines[:])
00047             return
00048         
00049         line = self.allLines[index]
00050         
00051         if self.reSemi.search(line): # semicolon found, parse parameters
00052             matches = self.reLine.search(line)
00053             name = matches.group(1)
00054             pName = self.reName.search(name).group(1) # pretty name for output
00055             values = self.reSemi.split(matches.group(2))
00056             
00057             if not pName in self.parameterNames: # add parameter to global parameter list
00058                 self.parameterNames.append(pName)
00059                 pValues = []
00060                 for value in values:
00061                     pValues.append(self.reValue.search(value).group(1))
00062                 self.parameterValues.append(pValues)
00063                 
00064             for value in values:
00065                 self.currentLines[index] = name + "=" + value + "\n"
00066                 pValue = self.reValue.search(value).group(1) # pretty value for output
00067                 self.generate (index + 1, fileName + "-" + pName + "=" + pValue)
00068                 
00069         elif self.reRuns.search(line): # line contains number of runs
00070             if not self.runs: # check if number of runs already set
00071                 self.runs = int(self.reRuns.search(line).group(1))
00072             self.generate(index + 1, fileName)                
00073             
00074         elif self.reComb.search(line):
00075             lastValue = self.reLine.search(self.currentLines[index-1]).group(2)
00076             thisName = self.reLine.search(line).group(1)
00077             self.currentLines[index] = thisName + "=" + lastValue + "\n"
00078             self.generate(index + 1, fileName)
00079             
00080         else:
00081             # nothing to do, go into next recursion step
00082             self.generate(index + 1, fileName)
00083     
class IniParser: # parses the ini file and creates experiments

def generate::IsInt (   self,
  str 
)

Is the given string an integer?	
00461                         :
00462         # from http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/52660
00463         """ Is the given string an integer?     """
00464         ok = 1
00465         try:
00466             num = int(str)
00467         except ValueError:
00468             ok = 0
00469         return ok
00470         
    def non_exp_repr(self, x):

def generate::makeScaDat (   self,
  exp 
)

create sca dat files 
00513                              :
00514         """ create sca dat files """
00515         import scipy, scipy.stats # import scipy packages
00516         
00517         for index, pName in enumerate(exp.parameterNames): # loop through parameters
00518             values = exp.parameterValues[index]
00519             
00520             dict = {}
00521             entryCount = 0
00522             
00523             for value in values: # loop through values
00524                 param = pName + "=" + value # parameter-value string in fileName
00525                 
00526                 for fileName in exp.fileNames: # search files that contain param
00527                     if not fileName.find(param) < 0:
00528                         
00529                         # process sca files
00530                         self.currScaStat = {}
00531                         self.globalScaStat = {}
00532                         for run in range(1, exp.runs+1): # loop through number of runs
00533                             baseFileName = fileName + "-run=" + str(run)
00534                             self.processSca(baseFileName)
00535                         
00536                         # check if sca files have different number of scalars (should not happen!)
00537                         if entryCount == 0:
00538                             entryCount = len(self.currScaStat)
00539                         elif not entryCount == len(self.currScaStat) and entryCount >= 0:
00540                             logging.error("Your sca files don't contain the same number of entries. Maybe you are comparing different Protocols oder using different applications?")
00541                             entryCount = -1;
00542                         
00543                         # save collected data
00544                         stripped = fileName.replace(param, "")
00545                         if not stripped in dict:
00546                             dict[stripped] = {}
00547                             
00548                         dict[stripped][value] = self.currScaStat.copy()
00549                         
00550             # write sca.dat files
00551             for stripped in dict:
00552                 file = open (stripped + ".sca.dat", "w")
00553                 first = 1
00554                 
00555                 for value in sorted(dict[stripped].keys(), self.numeric_compare):
00556                     stats = dict[stripped][value]
00557                     line = ""
00558                     
00559                     if first: # write header
00560                         first = 0
00561                         line += "#\tcolumn\tscalar name\n"
00562                         line += "#\t1\t" + pName + "\n"
00563                         count = 1
00564                         for name in sorted(stats.keys()):
00565                             count += 1
00566                             line += "#\t" + str(count) + "\t" + name + "(mean)\n"
00567                             count += 1
00568                             line += "#\t" + str(count) + "\t" + name + "(10-quantile)\n"
00569                             count += 1
00570                             line += "#\t" + str(count) + "\t" + name + "(90-quantile)\n"
00571                             
00572                         line += "#" * 70 + "\n"
00573                         file.write(line)
00574                         line = ""
00575                         
00576                     line += value
00577                     
00578                     for name in sorted(stats.keys()):
00579                         list = stats[name]
00580                         arr = scipy.array(list)
00581                         line += "\t" + str(self.non_exp_repr(arr.mean()))
00582                         line += "\t" + str(self.non_exp_repr(self.percentile(sorted(list), 0.1)))
00583                         line += "\t" + str(self.non_exp_repr(self.percentile(sorted(list), 0.9)))
00584                         
00585                     file.write(line + "\n")
00586                     
00587                 file.close()
00588                 
00589 
00590 
00591 
00592 
################################################################

def generate::non_exp_repr (   self,
  x 
)

Return a floating point representation without exponential notation.

Result is a string that satisfies:
    float(result)==float(x) and 'e' not in result.

>>> non_exp_repr(1.234e-025)
'0.00000000000000000000000012339999999999999'
>>> non_exp_repr(-1.234e+018)
'-1234000000000000000.0'

>>> for e in xrange(-50,51):
...     for m in (1.234, 0.018, -0.89, -75.59, 100/7.0, -909):
...         x = m * 10 ** e
...         s = non_exp_repr(x)
...         assert 'e' not in s
...         assert float(x) == float(s)

00471                              :
00472         # from http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/358361
00473         """Return a floating point representation without exponential notation.
00474 
00475         Result is a string that satisfies:
00476             float(result)==float(x) and 'e' not in result.
00477         
00478         >>> non_exp_repr(1.234e-025)
00479         '0.00000000000000000000000012339999999999999'
00480         >>> non_exp_repr(-1.234e+018)
00481         '-1234000000000000000.0'
00482         
00483         >>> for e in xrange(-50,51):
00484         ...     for m in (1.234, 0.018, -0.89, -75.59, 100/7.0, -909):
00485         ...         x = m * 10 ** e
00486         ...         s = non_exp_repr(x)
00487         ...         assert 'e' not in s
00488         ...         assert float(x) == float(s)
00489 
00490         """
00491         s = repr(float(x))
00492         e_loc = s.lower().find('e')
00493         if e_loc == -1:
00494             return s
00495 
00496         mantissa = s[:e_loc].replace('.', '')
00497         exp = int(s[e_loc+1:])
00498 
00499         assert s[1] == '.' or s[0] == '-' and s[2] == '.', "Unsupported format"     
00500         sign = ''
00501         if mantissa[0] == '-':
00502             sign = '-'
00503             mantissa = mantissa[1:]
00504 
00505         digitsafter = len(mantissa) - 1     # num digits after the decimal point
00506         if exp >= digitsafter:
00507             return sign + mantissa + '0' * (exp - digitsafter) + '.0'
00508         elif exp <= -1:
00509             return sign + '0.' + '0' * (-exp - 1) + mantissa
00510         ip = exp + 1                        # insertion point
00511         return sign + mantissa[:ip] + '.' + mantissa[ip:]
00512         

def generate::numeric_compare (   self,
  x,
  y 
)

00446                                    :
00447         # from http://wiki.python.org/moin/HowTo/Sorting
00448         if not self.IsInt(x) or not self.IsInt(y):
00449           return cmp(x, y) #use default compare function
00450           
00451         x = int(x)
00452         y = int(y)
00453         
00454         if x > y:
00455             return 1
00456         elif x == y:
00457             return 0
00458         else: # x < y
00459             return -1
00460     
    def IsInt(self, str):

def generate::parse (   self  ) 

parse ini file
00090                    :
00091         """parse ini file"""
00092         
00093         logging.info("parsing ini file")
00094         
00095         # compile regular expressions
00096         empty = re.compile(r"^\s*$|^\#") # empty or comment line
00097         sect = re.compile(r"^\s*\[(\w*)\]") # new section
00098         inval = re.compile(r"^\s*\[") # invalid section
00099         
00100         expLines = {}
00101         
00102         # parse file
00103         file = open (iniFile, "r")
00104         
00105         for line in file:
00106             if empty.search(line): # skip empty lines
00107                 continue 
00108             
00109             if sect.search(line): # new section starting
00110                 currentSect = sect.search(line).group(1)
00111                 logging.info("-- parsing section [" + currentSect + "]")
00112                 if not currentSect in expLines:
00113                     expLines[currentSect] = []
00114                 continue
00115                 
00116             elif inval.search(line): # invalid section
00117                 logging.error("invalid characters found in section " + line)
00118                 quit()
00119             
00120             if currentSect == "": # invalid file format
00121                 logging.error("lines found before a section started in ini file")
00122                 quit()
00123             
00124             expLines[currentSect].append(line)
00125             
00126         file.close()
00127         
00128         logging.info("-- generating experiments")
00129         
00130         experiments = []
00131         for name in expLines.keys():
00132             if name == "Global":
00133                 continue
00134             lines = expLines[name][:]
00135             if "Global" in expLines: # add Global lines to each experiment
00136                 lines.extend(expLines["Global"])
00137                 
00138             experiments.append(Experiment(lines, name))
00139         
00140         logging.info("Done")
00141         return experiments
00142         
00143         
class BaseWorker: # parent class for "worker" classes (for common methods)

def generate::percentile (   self,
  N,
  percent,
  key = lambda x:x 
)

Find the percentile of a list of values.
Source: http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/511478

@parameter N - is a list of values. Note N MUST BE already sorted.
@parameter percent - a float value from 0.0 to 1.0.
@parameter key - optional key function to compute value from each element of N.

@return - the percentile of the values
00424                                                  :x):
00425         """
00426         Find the percentile of a list of values.
00427         Source: http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/511478
00428 
00429         @parameter N - is a list of values. Note N MUST BE already sorted.
00430         @parameter percent - a float value from 0.0 to 1.0.
00431         @parameter key - optional key function to compute value from each element of N.
00432 
00433         @return - the percentile of the values
00434         """
00435         if not N:
00436             return None
00437         k = (len(N)-1) * percent
00438         f = math.floor(k)
00439         c = math.ceil(k)
00440         if f == c:
00441             return key(N[int(k)])
00442         d0 = key(N[int(f)]) * (k-f)
00443         d1 = key(N[int(c)]) * (c-k)
00444         return d0+d1
00445         

def generate::printScaStats (   self,
  fileName,
  stats 
)

print scalar data 
00369                                             :
00370         """ print scalar data """
00371         import scipy, scipy.stats # import scipy packages
00372         
00373         file = open (fileName + ".sca", "w")
00374         
00375         for name in sorted(stats.keys()):
00376             arr = scipy.array(stats[name])
00377             value = str(self.non_exp_repr(arr.mean()))
00378             file.write("scalar " + name + "\t" + value + "\n")
00379             
00380         file.close()
00381         
    def printVecStats(self, fileName, baskets):

def generate::printVecStats (   self,
  fileName,
  baskets 
)

print vector data 
00382                                               :
00383         """ print vector data """
00384         import scipy, scipy.stats # import scipy packages
00385         
00386         vecCounter = 1
00387         maxIndex = 0
00388         
00389         file = open (fileName + ".vec.dat", "w")
00390         
00391         # header
00392         line = "#\tColumn\tVector\n"
00393         line += "#\t1\tsimulation time\n"
00394         for vector in sorted(baskets.keys()):
00395             vecCounter += 1
00396             line += "#\t" + str(vecCounter) + "\t" + vector + "(mean)\n"
00397             vecCounter += 1
00398             line += "#\t" + str(vecCounter) + "\t" + vector + "(10-quantile)\n"
00399             vecCounter += 1
00400             line += "#\t" + str(vecCounter) + "\t" + vector + "(90-quantile)\n"
00401             if maxIndex < max(baskets[vector].keys()):
00402                 maxIndex = int(max(baskets[vector].keys()))
00403                 
00404         line += "#" * 70 + "\n"
00405         file.write(line)
00406         
00407         # data
00408         for index in range(0, maxIndex+self.vecInterval, self.vecInterval): # loop through buckets
00409             line = str(index)
00410             for vector in sorted(baskets.keys()): # loop through vectors
00411                 if index in baskets[vector]:
00412                     list = baskets[vector][index]
00413                     arr = scipy.array(list)
00414                     line += "\t" + str(self.non_exp_repr(arr.mean()))
00415                     line += "\t" + str(self.non_exp_repr(self.percentile(sorted(list), 0.1)))
00416                     line += "\t" + str(self.non_exp_repr(self.percentile(sorted(list), 0.9)))
00417                 else:
00418                     line += "\t0\t0\t0"
00419                     
00420             file.write(line + "\n")
00421             
00422         file.close()
00423         
    def percentile(self, N, percent, key=lambda x:x):

def generate::processSca (   self,
  fileName 
)

parse sca file 
00280                                   :
00281         """ parse sca file """
00282         import scipy, scipy.stats # import scipy packages
00283         
00284         fileName = fileName + ".sca"
00285         
00286         if not os.access(fileName, os.F_OK):
00287             logging.warn("Unable to parse file " + fileName + ": file not found")
00288             return
00289             
00290         file = open(fileName, "r")
00291         
00292         for line in file:
00293             if self.reScaRun.search(line):
00294                 continue
00295                 
00296             line = self.reRemDigits.sub("[]", line)
00297             matches = self.reScaLine.search(line)
00298             name = matches.group(1) + "\t" + matches.group(2)
00299             value = float(matches.group(3))
00300             
00301             if not name in self.currScaStat:
00302                 self.currScaStat[name] = []
00303             if not name in self.globalScaStat:
00304                 self.globalScaStat[name] = []
00305                 
00306             self.currScaStat[name].append(value)
00307             self.globalScaStat[name].append(value)
00308             
00309         file.close()
00310         
    def processVec(self, fileName):

def generate::processVec (   self,
  fileName 
)

parse vec file 
00311                                   :
00312         """ parse vec file """
00313         import scipy, scipy.stats # import scipy packages
00314         
00315         fileName = fileName + ".vec"
00316         
00317         vectorNames = {}
00318         baskets = {}
00319         
00320         if not os.access(fileName, os.F_OK):
00321             logging.warn("Unable to parse file " + fileName + ": file not found")
00322             return
00323         
00324         # parse file
00325         file = open(fileName, "r")
00326         
00327         for line in file:
00328             if self.reVec.search(line): # new vector starting
00329                 line = self.reRemDigits.sub("[]", line)
00330                 matches = self.reVecName.search(line)
00331                 vectorNames[matches.group(1)] = matches.group(2) + " " + matches.group(3)
00332                 
00333             elif self.reVecLine.search(line): # data line
00334                 matches = self.reVecLine.search(line)
00335                 index = math.floor(float(matches.group(2)) / self.vecInterval) * self.vecInterval
00336                 
00337                 # initialize lists if needed
00338                 if not vectorNames[matches.group(1)] in baskets:
00339                     baskets[vectorNames[matches.group(1)]] = {}
00340                 if not index in baskets[vectorNames[matches.group(1)]]:
00341                     baskets[vectorNames[matches.group(1)]][index] = []
00342                     
00343                 # add value to basket
00344                 baskets[vectorNames[matches.group(1)]][index].append(float(matches.group(3)))
00345             
00346         file.close()
00347         
00348         # summarize data
00349         for vector in baskets:
00350             # initialize global baskets if needed
00351             if not vector in self.globalVecBaskets:
00352                 self.globalVecBaskets[vector] = {}
00353             if not vector in self.currVecBaskets:
00354                 self.currVecBaskets[vector] = {}
00355                 
00356             for index in baskets[vector]:
00357                 # initialize global baskets if needed
00358                 if not index in self.globalVecBaskets[vector]:
00359                     self.globalVecBaskets[vector][index] = []
00360                 if not index in self.currVecBaskets[vector]:
00361                     self.currVecBaskets[vector][index] = []
00362                     
00363                 arr = scipy.array(baskets[vector][index])
00364                 mean = arr.mean()
00365                 self.globalVecBaskets[vector][index].append(mean)
00366                 self.currVecBaskets[vector][index].append(mean)
00367                 
00368         
    def printScaStats(self, fileName, stats):

def generate::remove (   self,
  file 
)

00145                           : # delete file if it exists
00146         if os.access(file, os.F_OK):
00147             os.remove(file)
00148 


Variable Documentation

generate::iniFile = options.iniFile

tuple generate::parser = optparse.OptionParser()

Start of program.

Parse Command Line

tuple generate::reComb = re.compile(r"\$\$")

tuple generate::reLine = re.compile(r"(\S*)\s*=\s*(.*?)[\r\n]")

tuple generate::reName = re.compile(r"^\s*\**\.*([\w\.]*)")

tuple generate::reRemDigits = re.compile(r"\[[0-9]*\]")

tuple generate::reRuns = re.compile(r"^\s*runs\s*=\s*(\d+)")

tuple generate::reScaLine = re.compile(r'scalar\s*("[^"]*")\s*("[^"]*")\s*(-?[\d.]*e?-?\d*)')

tuple generate::reScaRun = re.compile(r"^run|^\s*$")

tuple generate::reSemi = re.compile(r";")

class to store data for each experiment 

tuple generate::reValue = re.compile(r"^\s*\"?([\w\.]*)\"?")

tuple generate::reVec = re.compile(r"^vector")

tuple generate::reVecLine = re.compile(r"^([0-9]*)\t([0-9.]*)\t([0-9.]*)\n$")

tuple generate::reVecName = re.compile(r'(\d*) "(.*)" (".*") (\d*)\n$')


Generated on Fri Sep 19 13:05:08 2008 for ITM OverSim by  doxygen 1.5.5