Ignore:
Timestamp:
May 8, 2012, 11:47:39 AM (7 years ago)
Author:
ksherdy
Message:

Boo.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/symbol_table/test/st_test_file_generator.py

    r2070 r2075  
    33import math
    44       
    5 # This script generates 3 files.
    6 #
    7 # 1. Output Test File
    8 #
    9 #    L_O_U_L_O_U_L_O_U_Ag_Mg.test, where for each (L_O_U),
     5# (i) Test Files
     6#
     7#   [L_O_U]+_Ag_Mg.test, where for each triple (L,O,U)
    108
    11 #    * UNIFORM denotes that equal counts of symbols of each length L are generated
    12 #    * L denotes length
    13 #    * O denotes the total number of symbols of length L
    14 #    * U denotes the number of unique symbols of length L
     9#    L - length
     10#    O - total occurences of length L symbols
     11#    U - unique occurences of length L symbols
    1512#
    1613#    and
     
    1916#    * Mg denotes the maximum absolute deviation gap distance, see Absolute Deviation
    2017#
    21 # 2. Meta Results File
    22 #
    23 #    L_O_U_L_O_U_L_O_U_Ag_Mg.meta
     18# (ii) Test Meta Files
     19#
     20#    [L_O_U]+_Ag_Mg.meta
    2421#
    25 #    This file contains start position, length and gap distance for each symbol contains in the generated test file, as
    26 #    well as the average gap distance and the maximum gap distance.
    27 #       
     22#    Contains test file meta data, i.e. start position,
     23#    length and gap distance for each symbol contained
     24#    in the generated test file.
    2825
    2926usage = "python testFileGenerator.py [L,O,U] [L,O,U] [L,O,U] ... AvgGap MaxGap"
    3027
    31 # parallel lists unique_symbols and unique_symbol_counts
    32 # list of unique symbols
     28# parallel lists
    3329unique_symbols = []
    34 # list of counts of the number of occurrences for each unique symbol
    3530unique_symbol_counts = []
    3631
    37 # a list that contains symbol specification [[L,O,U], [L,O,U], [L,O,U], ...]
     32# symbol specs [[L,O,U], [L,O,U], [L,O,U], ...]
    3833symbol_specs = []
    3934
    4035unique_sym_pos = []
    4136
    42 class SymbolFileGenerator:
    43     testFileHandle = 0   
     37################################################################################
     38# Symbol Characters
     39################################################################################
     40def genLowerCaseCharactersList():
     41    return ['a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z']
     42
     43def genUpperCaseCharactersList():
     44    return ['A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z']
     45
     46def genDigitCharactersList():
     47    return ['0','1','2','3','4','5','6','7','8','9']
     48
     49def genSpecialCharactersList():
     50    return ['_',':','-','.']
     51
     52delim = ','
     53char_pool = genDigitCharactersList() + genLowerCaseCharactersList() + genUpperCaseCharactersList() + genSpecialCharactersList() # XML-like characters
     54
     55class TestFileGenerator:
     56    test_file = None
    4457    def __init__(self,base_filename):   
    45         self.testFileHandle = open(base_filename+".test", 'w')
    46        
    47     def appendUniqueSymbolAndGap(self, symbol_string, gap):   
    48         self.testFileHandle.write(symbol_string + ','*gap)       
     58        self.test_file = open(base_filename+".test", 'w')
     59       
     60    def appendUniqueSymbolAndGap(self, symbol_string, gap):
     61        global delim
     62        delims = delim*gap
     63        self.test_file.write(symbol_string + delims)
    4964       
    5065    def close(self):
    51         self.testFileHandle.close()
     66        self.test_file.close()
    5267     
    5368
    5469class SymbolLogger:
    55     logFileHandle = 0
     70    meta_file = 0
    5671
    5772    def __init__(self,base_filename,unique_symbol_total,avg_gap, max_gap):
     
    6176            unique_sym_pos.append([])
    6277
    63         self.logFileHandle = open (base_filename+".meta", 'w')
    64         self.logFileHandle.write("Average Gap Distance: " + str(avg_gap) + "\nMaximum Gap Distance: " + str(max_gap))
     78        self.meta_file = open (base_filename+".meta", 'w')
     79        self.meta_file.write("Average Gap Distance: " + str(avg_gap) + "\nMaximum Gap Distance: " + str(max_gap))
    6580
    6681    def logUniqueSymbolPositions(self, index):
    67         self.logFileHandle.write("Symbol \'" + unique_symbols[index] + "\' appeared at positions: \n")
    68         self.logFileHandle.write(str(unique_sym_pos[index]) + "\n")
     82        self.meta_file.write("Symbol \'" + unique_symbols[index] + "\' appeared at positions: \n")
     83        self.meta_file.write(str(unique_sym_pos[index]) + "\n")
    6984
    7085    def actualUniqueSymbolInfo(self,length,position,gap):
    71         self.logFileHandle.write("Start: " + repr(position).rjust(10) + " | Length: " + repr(length).rjust(10) + " | Gap: " + repr(gap).rjust(10) + "\n")
     86        self.meta_file.write("Start: " + repr(position).rjust(10) + " | Length: " + repr(length).rjust(10) + " | Gap: " + repr(gap).rjust(10) + "\n")
    7287
    7388    def expectedUniqueSymbolsInfo(self, length, num_occurrences, num_unique_syms):
    74         self.logFileHandle.write("Length " + str(length)
     89        self.meta_file.write("Length " + str(length)
    7590                       + "\nNumber of occurrences: " + str(num_occurrences)
    7691                       + "\nNumber of unique symbols: " + str(num_unique_syms)
     
    7893        for i in range (len(unique_symbols)):
    7994            if len(unique_symbols[i]) == length:
    80                 self.logFileHandle.write ("\n\tSymbol \'" + unique_symbols[i] + "\'"
     95                self.meta_file.write ("\n\tSymbol \'" + unique_symbols[i] + "\'"
    8196                                   + "\n\tNumber of occurrences: " + str(unique_symbol_counts[i]))
    82         self.logFileHandle.write("\n")
     97        self.meta_file.write("\n")
    8398
    8499    def appendAvgGapMaxGap(self,average_gap, max_gap):
    85100        self.metadataMsg("")
    86         self.logFileHandle.write("Average gap distance: " + str(average_gap) + "\nMaximum gap distance: " + str(max_gap))
     101        self.meta_file.write("Average gap distance: " + str(average_gap) + "\nMaximum gap distance: " + str(max_gap))
    87102
    88103    def metadataMsg(self,string):
    89         self.logFileHandle.write("\n----------\n" + string +"\n")
     104        self.meta_file.write("\n----------\n" + string +"\n")
    90105           
    91106    def close(self):
    92         self.logFileHandle.close()
    93 
    94 def genLowerCaseCharactersList():
    95     return ['a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z']
    96 
    97 def genUpperCaseCharactersList():
    98     return ['A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z']
    99 
    100 def genDigitCharactersList():
    101     return ['0','1','2','3','4','5','6','7','8','9']
    102 
    103 def genSpecialCharactersList():
    104     return ['_',':','-','.']
     107        self.meta_file.close()
    105108
    106109def genUniqueSymbol(length, num_unique_sym):
    107110    new_symbols = []
    108111    while (len(new_symbols) < num_unique_sym):
    109         tempStr = ""
    110         for j in range (length):
     112        tempStr = ""
     113        for j in range (length):
    111114            sym_lst = genDigitCharactersList() + genLowerCaseCharactersList() + genUpperCaseCharactersList() + genSpecialCharactersList() # XML-like characters
    112115            index = random.randint (0, len(sym_lst)-1)
    113             tempStr += sym_lst[index]
    114         #search for duplicates
    115         if (new_symbols.count(tempStr) == 0):
    116             new_symbols.append(tempStr)   
     116            tempStr += sym_lst[index]
     117        #search for duplicates
     118        if (new_symbols.count(tempStr) == 0):
     119            new_symbols.append(tempStr)
    117120    unique_symbols.extend(new_symbols)
    118121
    119 # NOTE: This function assumes that num_occurences > num_unique_sym
     122# WARNING: PRECONDITION: (num_occurences > num_unique_sym)
    120123def genDistributionUNIFORM(num_occurences, num_unique_sym):
    121124    k = num_occurences/num_unique_sym
     
    124127    unique_symbol_counts.append(k+(num_occurences % k))
    125128   
    126 # NOTE: This function assumes that num_occurences > num_unique_sym
    127 def genDistributionRANDOM(num_occurences, num_unique_sym):
    128     upperbound = num_occurences-num_unique_sym + 1
    129     for i in range (num_unique_sym-1):
    130         unique_symbol_counts.append(random.randint(1,upperbound))
    131         num_occurences = num_occurences - unique_symbol_counts[len(unique_symbol_counts)-1]
    132         upperbound = num_occurences - (num_unique_sym - i - 2)
    133     unique_symbol_counts.append(num_occurences)
     129# WARNING: PRECONDITION: (num_occurences > num_unique_sym)
     130#def genDistributionRANDOM(num_occurences, num_unique_sym):
     131#    upperbound = num_occurences-num_unique_sym + 1
     132#    for i in range (num_unique_sym-1):
     133#        unique_symbol_counts.append(random.randint(1,upperbound))
     134#        num_occurences = num_occurences - unique_symbol_counts[len(unique_symbol_counts)-1]
     135#        upperbound = num_occurences - (num_unique_sym - i - 2)
     136#    unique_symbol_counts.append(num_occurences)
    134137
    135138def genSymbols():
     
    181184                                    #print avg_gap - i
    182185
    183 
    184 
    185186        gap = gaps.pop(random.randint(0, len(gaps) - 1))
    186187        gap_total += gap
     
    202203
    203204def main(*arguments):
     205
    204206    num_args = len(arguments)
    205     print num_args, arguments
    206207    if num_args < 3:
    207208        print usage
     
    217218            sys.exit()
    218219
    219         # parse unique symbol distributions
    220         for i in range (num_args-2):
    221             unique_symbol_dist_str = arguments[i]
    222             lst = eval(unique_symbol_dist_str)
    223             symbol_specs.append(lst)
     220        # parse unique symbol distributions
     221        for i in range (num_args-2):
     222            unique_symbol_dist_str = arguments[i]
     223            lst = eval(unique_symbol_dist_str)
     224            symbol_specs.append(lst)
    224225
    225226        print "Generating test file . . . "
     
    237238         
    238239        # construct logger instance
    239         symbol_file_generator = SymbolFileGenerator(base_filename)
     240        symbol_file_generator = TestFileGenerator(base_filename)
    240241        symbol_logger = SymbolLogger(base_filename, unique_symbol_total, avg_gap, max_gap)
    241242               
     
    246247            # generate the unique symbols and the distribution of each of the symbols
    247248            genUniqueSymbol(symbol_spec[0], symbol_spec[2])
    248             genDistributionUNIFORM(symbol_spec[1], symbol_spec[2])
    249             symbol_logger.expectedUniqueSymbolsInfo(symbol_spec[0], symbol_spec[1], symbol_spec[2])
     249            genDistributionUNIFORM(symbol_spec[1], symbol_spec[2])
     250            symbol_logger.expectedUniqueSymbolsInfo(symbol_spec[0], symbol_spec[1], symbol_spec[2])
    250251            total_symbols += symbol_spec[1]
    251252
     
    263264        symbol_logger.close()
    264265
    265         print "Done generating test file"
     266        print "Done generating test file."
    266267
    267268        # reset shared variables
Note: See TracChangeset for help on using the changeset viewer.