Changeset 2075
- Timestamp:
- May 8, 2012, 11:47:39 AM (7 years ago)
- Location:
- trunk/symbol_table/test
- Files:
-
- 1 added
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/symbol_table/test/st_test_file_generator.py
r2070 r2075 3 3 import math 4 4 5 # This script generates 3 files. 6 # 7 # 1. Output Test File 8 # 9 # L_O_U_L_O_U_L_O_U_Ag_Mg.test, where for each (L_O_U), 5 # (i) Test Files 6 # 7 # [L_O_U]+_Ag_Mg.test, where for each triple (L,O,U) 10 8 # 11 # * UNIFORM denotes that equal counts of symbols of each length L are generated 12 # * L denotes length 13 # * O denotes the total number of symbols of length L 14 # * U denotes the number of unique symbols of length L 9 # L - length 10 # O - total occurences of length L symbols 11 # U - unique occurences of length L symbols 15 12 # 16 13 # and … … 19 16 # * Mg denotes the maximum absolute deviation gap distance, see Absolute Deviation 20 17 # 21 # 2. Meta Results File22 # 23 # L_O_U_L_O_U_L_O_U_Ag_Mg.meta18 # (ii) Test Meta Files 19 # 20 # [L_O_U]+_Ag_Mg.meta 24 21 # 25 # This file contains start position, length and gap distance for each symbol contains in the generated test file, as26 # well as the average gap distance and the maximum gap distance.27 # 22 # Contains test file meta data, i.e. start position, 23 # length and gap distance for each symbol contained 24 # in the generated test file. 28 25 29 26 usage = "python testFileGenerator.py [L,O,U] [L,O,U] [L,O,U] ... AvgGap MaxGap" 30 27 31 # parallel lists unique_symbols and unique_symbol_counts 32 # list of unique symbols 28 # parallel lists 33 29 unique_symbols = [] 34 # list of counts of the number of occurrences for each unique symbol35 30 unique_symbol_counts = [] 36 31 37 # a list that contains symbol specification[[L,O,U], [L,O,U], [L,O,U], ...]32 # symbol specs [[L,O,U], [L,O,U], [L,O,U], ...] 38 33 symbol_specs = [] 39 34 40 35 unique_sym_pos = [] 41 36 42 class SymbolFileGenerator: 43 testFileHandle = 0 37 ################################################################################ 38 # Symbol Characters 39 ################################################################################ 40 def genLowerCaseCharactersList(): 41 return ['a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z'] 42 43 def genUpperCaseCharactersList(): 44 return ['A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z'] 45 46 def genDigitCharactersList(): 47 return ['0','1','2','3','4','5','6','7','8','9'] 48 49 def genSpecialCharactersList(): 50 return ['_',':','-','.'] 51 52 delim = ',' 53 char_pool = genDigitCharactersList() + genLowerCaseCharactersList() + genUpperCaseCharactersList() + genSpecialCharactersList() # XML-like characters 54 55 class TestFileGenerator: 56 test_file = None 44 57 def __init__(self,base_filename): 45 self.testFileHandle = open(base_filename+".test", 'w') 46 47 def appendUniqueSymbolAndGap(self, symbol_string, gap): 48 self.testFileHandle.write(symbol_string + ','*gap) 58 self.test_file = open(base_filename+".test", 'w') 59 60 def appendUniqueSymbolAndGap(self, symbol_string, gap): 61 global delim 62 delims = delim*gap 63 self.test_file.write(symbol_string + delims) 49 64 50 65 def close(self): 51 self.testFileHandle.close()66 self.test_file.close() 52 67 53 68 54 69 class SymbolLogger: 55 logFileHandle = 070 meta_file = 0 56 71 57 72 def __init__(self,base_filename,unique_symbol_total,avg_gap, max_gap): … … 61 76 unique_sym_pos.append([]) 62 77 63 self. logFileHandle = open (base_filename+".meta", 'w')64 self.logFileHandle.write("Average Gap Distance: " + str(avg_gap) + "\nMaximum Gap Distance: " + str(max_gap))78 self.meta_file = open (base_filename+".meta", 'w') 79 self.meta_file.write("Average Gap Distance: " + str(avg_gap) + "\nMaximum Gap Distance: " + str(max_gap)) 65 80 66 81 def logUniqueSymbolPositions(self, index): 67 self.logFileHandle.write("Symbol \'" + unique_symbols[index] + "\' appeared at positions: \n")68 self.logFileHandle.write(str(unique_sym_pos[index]) + "\n")82 self.meta_file.write("Symbol \'" + unique_symbols[index] + "\' appeared at positions: \n") 83 self.meta_file.write(str(unique_sym_pos[index]) + "\n") 69 84 70 85 def actualUniqueSymbolInfo(self,length,position,gap): 71 self.logFileHandle.write("Start: " + repr(position).rjust(10) + " | Length: " + repr(length).rjust(10) + " | Gap: " + repr(gap).rjust(10) + "\n")86 self.meta_file.write("Start: " + repr(position).rjust(10) + " | Length: " + repr(length).rjust(10) + " | Gap: " + repr(gap).rjust(10) + "\n") 72 87 73 88 def expectedUniqueSymbolsInfo(self, length, num_occurrences, num_unique_syms): 74 self.logFileHandle.write("Length " + str(length)89 self.meta_file.write("Length " + str(length) 75 90 + "\nNumber of occurrences: " + str(num_occurrences) 76 91 + "\nNumber of unique symbols: " + str(num_unique_syms) … … 78 93 for i in range (len(unique_symbols)): 79 94 if len(unique_symbols[i]) == length: 80 self.logFileHandle.write ("\n\tSymbol \'" + unique_symbols[i] + "\'"95 self.meta_file.write ("\n\tSymbol \'" + unique_symbols[i] + "\'" 81 96 + "\n\tNumber of occurrences: " + str(unique_symbol_counts[i])) 82 self.logFileHandle.write("\n")97 self.meta_file.write("\n") 83 98 84 99 def appendAvgGapMaxGap(self,average_gap, max_gap): 85 100 self.metadataMsg("") 86 self.logFileHandle.write("Average gap distance: " + str(average_gap) + "\nMaximum gap distance: " + str(max_gap))101 self.meta_file.write("Average gap distance: " + str(average_gap) + "\nMaximum gap distance: " + str(max_gap)) 87 102 88 103 def metadataMsg(self,string): 89 self.logFileHandle.write("\n----------\n" + string +"\n")104 self.meta_file.write("\n----------\n" + string +"\n") 90 105 91 106 def close(self): 92 self.logFileHandle.close() 93 94 def genLowerCaseCharactersList(): 95 return ['a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z'] 96 97 def genUpperCaseCharactersList(): 98 return ['A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z'] 99 100 def genDigitCharactersList(): 101 return ['0','1','2','3','4','5','6','7','8','9'] 102 103 def genSpecialCharactersList(): 104 return ['_',':','-','.'] 107 self.meta_file.close() 105 108 106 109 def genUniqueSymbol(length, num_unique_sym): 107 110 new_symbols = [] 108 111 while (len(new_symbols) < num_unique_sym): 109 110 112 tempStr = "" 113 for j in range (length): 111 114 sym_lst = genDigitCharactersList() + genLowerCaseCharactersList() + genUpperCaseCharactersList() + genSpecialCharactersList() # XML-like characters 112 115 index = random.randint (0, len(sym_lst)-1) 113 114 115 116 new_symbols.append(tempStr) 116 tempStr += sym_lst[index] 117 #search for duplicates 118 if (new_symbols.count(tempStr) == 0): 119 new_symbols.append(tempStr) 117 120 unique_symbols.extend(new_symbols) 118 121 119 # NOTE: This function assumes that num_occurences > num_unique_sym122 # WARNING: PRECONDITION: (num_occurences > num_unique_sym) 120 123 def genDistributionUNIFORM(num_occurences, num_unique_sym): 121 124 k = num_occurences/num_unique_sym … … 124 127 unique_symbol_counts.append(k+(num_occurences % k)) 125 128 126 # NOTE: This function assumes that num_occurences > num_unique_sym127 def genDistributionRANDOM(num_occurences, num_unique_sym):128 upperbound = num_occurences-num_unique_sym + 1129 for i in range (num_unique_sym-1):130 unique_symbol_counts.append(random.randint(1,upperbound))131 num_occurences = num_occurences - unique_symbol_counts[len(unique_symbol_counts)-1]132 upperbound = num_occurences - (num_unique_sym - i - 2)133 unique_symbol_counts.append(num_occurences)129 # WARNING: PRECONDITION: (num_occurences > num_unique_sym) 130 #def genDistributionRANDOM(num_occurences, num_unique_sym): 131 # upperbound = num_occurences-num_unique_sym + 1 132 # for i in range (num_unique_sym-1): 133 # unique_symbol_counts.append(random.randint(1,upperbound)) 134 # num_occurences = num_occurences - unique_symbol_counts[len(unique_symbol_counts)-1] 135 # upperbound = num_occurences - (num_unique_sym - i - 2) 136 # unique_symbol_counts.append(num_occurences) 134 137 135 138 def genSymbols(): … … 181 184 #print avg_gap - i 182 185 183 184 185 186 gap = gaps.pop(random.randint(0, len(gaps) - 1)) 186 187 gap_total += gap … … 202 203 203 204 def main(*arguments): 205 204 206 num_args = len(arguments) 205 print num_args, arguments206 207 if num_args < 3: 207 208 print usage … … 217 218 sys.exit() 218 219 219 220 221 222 223 220 # parse unique symbol distributions 221 for i in range (num_args-2): 222 unique_symbol_dist_str = arguments[i] 223 lst = eval(unique_symbol_dist_str) 224 symbol_specs.append(lst) 224 225 225 226 print "Generating test file . . . " … … 237 238 238 239 # construct logger instance 239 symbol_file_generator = SymbolFileGenerator(base_filename)240 symbol_file_generator = TestFileGenerator(base_filename) 240 241 symbol_logger = SymbolLogger(base_filename, unique_symbol_total, avg_gap, max_gap) 241 242 … … 246 247 # generate the unique symbols and the distribution of each of the symbols 247 248 genUniqueSymbol(symbol_spec[0], symbol_spec[2]) 248 249 249 genDistributionUNIFORM(symbol_spec[1], symbol_spec[2]) 250 symbol_logger.expectedUniqueSymbolsInfo(symbol_spec[0], symbol_spec[1], symbol_spec[2]) 250 251 total_symbols += symbol_spec[1] 251 252 … … 263 264 symbol_logger.close() 264 265 265 print "Done generating test file "266 print "Done generating test file." 266 267 267 268 # reset shared variables
Note: See TracChangeset
for help on using the changeset viewer.