Changeset 4462
- Timestamp:
- Feb 5, 2015, 7:34:28 PM (4 years ago)
- Location:
- proto/charsetcompiler
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
proto/charsetcompiler/UCD/UCD_parser.py
r4387 r4462 208 208 if not v in name_list_order: 209 209 #raise Exception("Property %s value %s missing" % (self.full_name_map[property_code], v)) 210 print("Warning: property %s has no instance of value %s" % (property_code, v))210 #print("Warning: property %s has no instance of value %s" % (property_code, v)) 211 211 value_map[v] = empty_uset() 212 212 name_list_order.append(v) -
proto/charsetcompiler/generate_UCD_tests.py
r4457 r4462 61 61 62 62 def load_all(self): 63 # First parse all property names and their aliases 64 self.load_property_name_info() 65 # 66 # Next parse all property value names and their aliases. Generate the data. 67 self.load_property_value_info() 68 # 69 # The Block property 70 self.load_enumerated_property_data('Blocks', 'blk') 71 # 72 # Scripts 73 self.load_enumerated_property_data('Scripts', 'sc') 74 # 75 # Script Extensions 76 self.load_ScriptExtensions_data() 77 # 78 # General Category 79 self.load_enumerated_property_data('extracted/DerivedGeneralCategory', 'gc') 80 # 81 # Core Properties 82 self.load_binary_properties_data('DerivedCoreProperties') 83 # 84 self.load_binary_properties_data('PropList') 85 63 # First parse all property names and their aliases 64 self.load_property_name_info() 65 # 66 # Next parse all property value names and their aliases. Generate the data. 67 self.load_property_value_info() 68 # 69 # The Block property 70 self.load_enumerated_property_data('Blocks', 'blk') 71 # 72 # Scripts 73 self.load_enumerated_property_data('Scripts', 'sc') 74 # 75 # Script Extensions 76 self.load_ScriptExtensions_data() 77 # 78 # General Category 79 self.load_enumerated_property_data('extracted/DerivedGeneralCategory', 'gc') 80 # 81 # Core Properties 82 self.load_binary_properties_data('DerivedCoreProperties') 83 # 84 self.load_binary_properties_data('PropList') 86 85 self.load_others() 87 86 88 def generate_level_1_property_terms(self ):87 def generate_level_1_property_terms(self, negated_per_10 = 5, propgroups=['others', 'sc', 'scx', 'gc']): 89 88 template = r"""<grepcase regexp="^\%s{%s}$" datafile="All_good" grepcount="%i"/>""" 90 89 terms = [] 91 for p in self.others: 92 s = self.binary_value_map[p] 93 lbl = 'p' 94 neg = randint(0,1) 95 if neg == 1: 96 s = uset_complement(s) 97 lbl = 'P' 98 terms.append(template % (lbl, p, uset_popcount(uset_intersection(self.all_good_set, s)))) 99 for v in self.property_value_list['gc']: 100 s = self.enum_value_map['gc'][v] 101 lbl = 'p' 102 neg = randint(0,1) 103 if neg == 1: 104 s = uset_complement(s) 105 lbl = 'P' 106 terms.append(template % (lbl, v, uset_popcount(uset_intersection(self.all_good_set, s)))) 107 for v in self.property_value_list['sc']: 108 s = self.enum_value_map['sc'][v] 109 lbl = 'p' 110 neg = randint(0,1) 111 if neg == 1: 112 s = uset_complement(s) 113 lbl = 'P' 114 terms.append(template % (lbl, v, uset_popcount(uset_intersection(self.all_good_set, s)))) 115 for v in self.property_value_list['sc']: 116 s = self.enum_value_map['scx'][v] 117 lbl = 'p' 118 neg = randint(0,1) 119 if neg == 1: 120 s = uset_complement(s) 121 lbl = 'P' 122 terms.append(template % (lbl, "scx=" + v, uset_popcount(uset_intersection(self.all_good_set, s)))) 90 if 'others' in propgroups: 91 for p in self.others: 92 s = self.binary_value_map[p] 93 lbl = 'p' 94 if randint(1,10) <= negated_per_10: 95 s = uset_complement(s) 96 lbl = 'P' 97 terms.append(template % (lbl, p, uset_popcount(uset_intersection(self.all_good_set, s)))) 98 if 'gc' in propgroups: 99 for v in self.property_value_list['gc']: 100 s = self.enum_value_map['gc'][v] 101 lbl = 'p' 102 if randint(1,10) <= negated_per_10: 103 s = uset_complement(s) 104 lbl = 'P' 105 terms.append(template % (lbl, v, uset_popcount(uset_intersection(self.all_good_set, s)))) 106 if 'sc' in propgroups: 107 for v in self.property_value_list['sc']: 108 s = self.enum_value_map['sc'][v] 109 vname = self.property_value_full_name_map['sc'][v] 110 lbl = 'p' 111 if randint(1,10) <= negated_per_10: 112 s = uset_complement(s) 113 lbl = 'P' 114 terms.append(template % (lbl, vname, uset_popcount(uset_intersection(self.all_good_set, s)))) 115 if 'scx' in propgroups: 116 for v in self.property_value_list['sc']: 117 s = self.enum_value_map['scx'][v] 118 vname = self.property_value_full_name_map['sc'][v] 119 lbl = 'p' 120 if randint(1,10) <= negated_per_10: 121 s = uset_complement(s) 122 lbl = 'P' 123 terms.append(template % (lbl, "scx=" + vname, uset_popcount(uset_intersection(self.all_good_set, s)))) 123 124 return terms 124 125 125 def random_binary(self, a1, a2): 126 (p1, t1) = a1 127 (p2, t2) = a2 128 neg1 = randint(0,1) 129 neg2 = randint(0,1) 130 op = randint(0,2) 131 s1 = self.enum_value_map[p1][t1] 132 if p2 == 'others': 126 def random_binary(self, a1, a2, useLookbehindAssertions = False): 127 (p1, t1) = a1 128 (p2, t2) = a2 129 op = randint(0,2) 130 s1 = self.enum_value_map[p1][t1] 131 if p2 == 'others': 133 132 s2 = self.binary_value_map[t2] 134 133 else: s2 = self.enum_value_map[p2][t2] 135 if neg1 == 1: s1 = uset_complement(s1)136 if neg2 == 1: s2 = uset_complement(s2)137 134 if op == 0: s3 = uset_intersection(s1, s2) 138 135 elif op == 1: s3 = uset_difference(s1, s2) 139 136 elif op == 2: s3 = uset_union(s1, s2) 140 137 s3 = uset_intersection(s3, self.all_good_set) 138 if p1 == 'sc' or p1 == 'scx': t1 = self.property_value_full_name_map['sc'][t1] 139 if p2 == 'sc' or p2 == 'scx': t2 = self.property_value_full_name_map['sc'][t2] 141 140 if p1 == 'scx': t1 = 'scx=' + t1 142 141 if p2 == 'scx': t2 = 'scx=' + t2 143 v1 = "\\%s{%s}" % (['p', 'P'][neg1], t1) 144 v2 = "\\%s{%s}" % (['p', 'P'][neg2], t2) 145 opr = ["&&", "--", ""][op] 146 return r"""<grepcase regexp="^[%s%s%s]$" datafile="All_good" grepcount="%i"/>""" % (v1, opr, v2, uset_popcount(s3)) 142 v1 = "\\p{%s}" % (t1) 143 v2 = "\\p{%s}" % (t2) 144 if not useLookbehindAssertions: 145 opr = ["&&", "--", ""][op] 146 return r"""<grepcase regexp="^[%s%s%s]$" datafile="All_good" grepcount="%i"/>""" % (v1, opr, v2, uset_popcount(s3)) 147 if op == 0: 148 return r"""<grepcase regexp="^%s(?<=%s)$" datafile="All_good" grepcount="%i"/>""" % (v1, v2, uset_popcount(s3)) 149 elif op == 1: 150 return r"""<grepcase regexp="^%s(?<!%s)$" datafile="All_good" grepcount="%i"/>""" % (v1, v2, uset_popcount(s3)) 151 else: 152 return r"""<grepcase regexp="^[%s%s]$" datafile="All_good" grepcount="%i"/>""" % (v1, v2, uset_popcount(s3)) 147 153 148 def generate_random_property_expressions(self ):154 def generate_random_property_expressions(self, useLookbehindAssertions = False): 149 155 gc = self.property_value_list['gc'] 150 156 sc = self.property_value_list['sc'] … … 153 159 for p in gc: 154 160 s = sc[randint(0, len(sc)-1)] 155 exprs.append(self.random_binary(('gc', p), ('sc', s) ))156 sx = sc[randint(0, len(sc)-1)]157 exprs.append(self.random_binary(('gc', p), ('scx', sx)))158 othr = others[randint(0, len(others)-1)]159 exprs.append(self.random_binary(('gc', p), ('others', othr)))161 exprs.append(self.random_binary(('gc', p), ('sc', s), useLookbehindAssertions)) 162 #sx = sc[randint(0, len(sc)-1)] 163 #exprs.append(self.random_binary(('gc', p), ('scx', sx), useLookbehindAssertions)) 164 #othr = others[randint(0, len(others)-1)] 165 #exprs.append(self.random_binary(('gc', p), ('others', othr), useLookbehindAssertions)) 160 166 for p in sc: 161 167 g = gc[randint(0, len(gc)-1)] 162 exprs.append(self.random_binary(('sc', p), ('gc', g) ))163 sx = sc[randint(0, len(sc)-1)]164 exprs.append(self.random_binary(('sc', p), ('scx', sx)))165 othr = others[randint(0, len(others)-1)]166 exprs.append(self.random_binary(('sc', p), ('others', othr)))167 for p in others:168 s = sc[randint(0, len(sc)-1)]169 exprs.append(self.random_binary(('sc', s), ('others', p)))170 sx = sc[randint(0, len(sc)-1)]171 exprs.append(self.random_binary(('scx', sx), ('others', p)))168 exprs.append(self.random_binary(('sc', p), ('gc', g), useLookbehindAssertions)) 169 #sx = sc[randint(0, len(sc)-1)] 170 #exprs.append(self.random_binary(('sc', p), ('scx', sx), useLookbehindAssertions)) 171 #othr = others[randint(0, len(others)-1)] 172 #exprs.append(self.random_binary(('sc', p), ('others', othr), useLookbehindAssertions)) 173 #for p in others: 174 #s = sc[randint(0, len(sc)-1)] 175 #exprs.append(self.random_binary(('sc', s), ('others', p), useLookbehindAssertions)) 176 #sx = sc[randint(0, len(sc)-1)] 177 #exprs.append(self.random_binary(('scx', sx), ('others', p), useLookbehindAssertions)) 172 178 return exprs 173 179 … … 175 181 ucd = UCD_test_generator() 176 182 ucd.load_all() 177 for t in ucd.generate_level_1_property_terms(): 183 print "<greptest>" 184 for t in ucd.generate_level_1_property_terms(1, ['sc', 'gc']): 178 185 print t 179 for p in ucd.generate_random_property_expressions( ):186 for p in ucd.generate_random_property_expressions(True): 180 187 print p 188 print "</greptest>" 181 189 182 190 if __name__ == "__main__":
Note: See TracChangeset
for help on using the changeset viewer.