Changeset 416
- Timestamp:
- 06/26/10 12:37:34 (3 years ago)
- Location:
- proto/parabix2
- Files:
-
- 2 modified
-
byteclass.py (modified) (5 diffs)
-
parabix2.py (modified) (1 diff)
Legend:
- Unmodified
- Added
- Removed
-
proto/parabix2/byteclass.py
r277 r416 7 7 # 8 8 # Robert D. Cameron 9 # August 14, 2009 10 # 9 # June 26, 2010 - Add definitions for lex.ASCII_name_char, 10 # lex.ASCII_name_start, lex.colon 11 # 11 12 12 13 class u8_streams (): … … 46 47 class lexical_streams (): 47 48 RefStart = 0 48 Semicolon = 0 49 Semicolon = 0 50 Colon = 0 49 51 LAngle = 0 50 52 RAngle = 0 … … 60 62 Hash = 0 61 63 x = 0 64 ASCII_name_start = 0 65 ASCII_name_char = 0 62 66 NameScan = 0 63 67 Digit = 0 … … 120 124 temp28 = (temp9 &~ temp13); 121 125 lex.x = (temp27 & temp28); 122 temp29 = (temp18 & temp23); 123 temp30 = (temp29 | lex.Hyphen); 124 temp31 = (temp3 & temp15); 125 temp32 = (temp30 | temp31); 126 temp29 = (temp9 & temp5); 127 lex.Colon = (temp8 & temp29); 128 temp30 = (temp18 & temp23); 129 temp31 = (temp30 | lex.Colon); 130 temp32 = (temp16 &~ bit[2]); 126 131 temp33 = (bit[5] | temp10); 127 132 temp34 = (bit[4] & temp33); 128 temp35 = (temp8 &~ temp34); 129 temp36 = (temp32 | temp35); 130 temp37 = (temp16 &~ bit[2]); 131 temp38 = (~temp34); 132 temp39 = (temp21 | temp13); 133 temp40 = ((bit[3] & temp38)|(~(bit[3]) & temp39)); 134 temp41 = (temp37 & temp40); 135 temp42 = (temp36 | temp41); 136 temp43 = (temp16 & bit[2]); 137 temp44 = (temp43 & temp40); 138 temp45 = (temp42 | temp44); 139 lex.NameScan = (temp45 | bit[0]); 140 temp46 = (temp1 | bit[2]); 141 control.x00_x1F = (~temp46); 142 temp47 = (bit[2] | bit[3]); 143 temp48 = (temp1 | temp47); 144 control.CR = (temp20 &~ temp48); 145 temp49 = (temp9 & temp5); 146 control.LF = (temp49 &~ temp48); 147 temp50 = (temp9 & temp19); 148 control.HT = (temp50 &~ temp48); 149 control.SP = (temp3 &~ temp39); 150 temp51 = (temp20 | temp49); 151 temp52 = (temp51 | temp50); 152 temp53 = (temp52 &~ temp48); 153 lex.WS = (temp53 | control.SP); 154 temp54 = (bit[5] | bit[6]); 155 temp55 = (bit[4] & temp54); 156 lex.Digit = (temp8 &~ temp55); 157 temp56 = (temp16 &~ temp47); 158 temp57 = (temp56 &~ bit[4]); 159 temp58 = (~temp10); 160 temp59 = ((bit[5] & temp58)|(~(bit[5]) & temp13)); 161 temp60 = (temp57 & temp59); 162 temp61 = (lex.Digit | temp60); 163 temp62 = (temp16 & temp2); 164 temp63 = (temp62 &~ bit[4]); 165 temp64 = (temp63 & temp59); 166 lex.Hex = (temp61 | temp64); 133 temp35 = (~temp34); 134 temp36 = (temp21 | temp13); 135 temp37 = ((bit[3] & temp35)|(~(bit[3]) & temp36)); 136 temp38 = (temp32 & temp37); 137 temp39 = (temp31 | temp38); 138 temp40 = (temp16 & bit[2]); 139 temp41 = (temp40 & temp37); 140 lex.ASCII_name_start = (temp39 | temp41); 141 temp42 = (temp30 | lex.Hyphen); 142 temp43 = (temp3 & temp15); 143 temp44 = (temp42 | temp43); 144 temp45 = (temp8 &~ temp34); 145 temp46 = (temp44 | temp45); 146 temp47 = (temp46 | temp38); 147 lex.ASCII_name_char = (temp47 | temp41); 148 lex.NameScan = (lex.ASCII_name_char | bit[0]); 149 temp48 = (temp1 | bit[2]); 150 control.x00_x1F = (~temp48); 151 temp49 = (bit[2] | bit[3]); 152 temp50 = (temp1 | temp49); 153 control.CR = (temp20 &~ temp50); 154 control.LF = (temp29 &~ temp50); 155 temp51 = (temp9 & temp19); 156 control.HT = (temp51 &~ temp50); 157 control.SP = (temp3 &~ temp36); 158 temp52 = (temp20 | temp29); 159 temp53 = (temp52 | temp51); 160 temp54 = (temp53 &~ temp50); 161 lex.WS = (temp54 | control.SP); 162 temp55 = (bit[5] | bit[6]); 163 temp56 = (bit[4] & temp55); 164 lex.Digit = (temp8 &~ temp56); 165 temp57 = (temp16 &~ temp49); 166 temp58 = (temp57 &~ bit[4]); 167 temp59 = (~temp10); 168 temp60 = ((bit[5] & temp59)|(~(bit[5]) & temp13)); 169 temp61 = (temp58 & temp60); 170 temp62 = (lex.Digit | temp61); 171 temp63 = (temp16 & temp2); 172 temp64 = (temp63 &~ bit[4]); 173 temp65 = (temp64 & temp60); 174 lex.Hex = (temp62 | temp65); 167 175 u8.unibyte = (~bit[0]); 168 176 u8.prefix = (bit[0] & bit[1]); … … 171 179 u8.prefix4 = (u8.prefix & temp7); 172 180 u8.suffix = (bit[0] &~ bit[1]); 173 temp6 5 = (u8.prefix &~ temp47);174 temp6 6= (temp21 | bit[6]);175 temp6 7 = (temp65 &~ temp66);176 temp6 8= (bit[5] & temp13);177 temp 69 = (bit[4] | temp68);178 temp7 0 = (u8.prefix4 & temp69);179 u8.badprefix = (temp6 7 | temp70);180 u8.xE0 = (u8.prefix3 &~ temp3 9);181 temp66 = (u8.prefix &~ temp49); 182 temp67 = (temp21 | bit[6]); 183 temp68 = (temp66 &~ temp67); 184 temp69 = (bit[5] & temp13); 185 temp70 = (bit[4] | temp69); 186 temp71 = (u8.prefix4 & temp70); 187 u8.badprefix = (temp68 | temp71); 188 u8.xE0 = (u8.prefix3 &~ temp36); 181 189 u8.xED = (u8.prefix3 & temp20); 182 u8.xF0 = (u8.prefix4 &~ temp3 9);183 temp7 1= (temp4 &~ temp13);184 u8.xF4 = (u8.prefix4 & temp7 1);190 u8.xF0 = (u8.prefix4 &~ temp36); 191 temp72 = (temp4 &~ temp13); 192 u8.xF4 = (u8.prefix4 & temp72); 185 193 u8.xA0_xBF = (u8.suffix & bit[2]); 186 194 u8.x80_x9F = (u8.suffix &~ bit[2]); 187 u8.x90_xBF = (u8.suffix & temp4 7);188 u8.x80_x8F = (u8.suffix &~ temp4 7);195 u8.x90_xBF = (u8.suffix & temp49); 196 u8.x80_x8F = (u8.suffix &~ temp49); 189 197 u8.xEF = (u8.prefix3 & temp23); 190 temp72 = (u8.suffix & temp7); 191 u8.xBF = (temp72 & temp23); 192 u8.xBE = (temp72 & temp15); 198 temp73 = (u8.suffix & temp7); 199 u8.xBF = (temp73 & temp23); 200 u8.xBE = (temp73 & temp15); 201 193 202 return (u8, control, lex) 194 203 -
proto/parabix2/parabix2.py
r411 r416 554 554 555 555 556 # 557 # Fully validate ASCII-based names and identify non-ASCII 558 # positions within names. 559 560 def prevalidate_names(lex, name_stream, nmtoken_stream): 561 """Fully validate ASCII-based names and identify non-ASCII positions within names. 562 563 >>> demo_prevalidate_names("<good -bad='hyphen'/><_OK/><:funny:butOK/><1problem a='b' d423='x'>") 564 input data: <good -bad='hyphen'/><_OK/><:funny:butOK/><1problem a='b' d423='x'> 565 names : _1111_1111____________111___111111111111___11111111_1_____1111_____ 566 name_check: ______1____________________________________1_______________________ 567 """ 568 name_check = name_stream &~ bitutil.Advance(name_stream) & ~lex.ASCII_name_start 569 name_check |= (name_stream | nmtoken_stream) & ~lex.ASCII_name_char 570 return name_check 571 572 def demo_prevalidate_names(u8data): 573 lgth = len(u8data) 574 (bit, EOF_mask) = bitutil.transpose_streams(u8data) 575 (u8, control, lex) = byteclass.classify_bytes(bit) 576 lex = add_multiliterals(lex) 577 markup1 = parse_CtCDPI(lex, EOF_mask) 578 callouts = parse_tags(lex, markup1.CtCDPI_mask, EOF_mask) 579 name_stream = callouts.ElemNames | callouts.AttNames 580 name_check = prevalidate_names(lex, name_stream, 0) 581 bitutil.print_aligned_u8_byte_streams([('input data', u8data), 582 ('names', bitutil.bitstream2string(name_stream, lgth)), 583 ('name_check', bitutil.bitstream2string(name_check, lgth))]) 556 584 557 585 def parabix_parse(u8data):
