Ignore:
Timestamp:
Mar 14, 2014, 11:04:47 PM (5 years ago)
Author:
ksherdy
Message:

Minor changes.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • proto/pabloj/trunk/input/test/pabloS/test.pablos

    r3325 r3681  
    11
    22
    3 struct Basis_bits{
     3struct Basis{
    44    stream bit_0;
    55    stream bit_1;
     
    1212};
    1313
    14 struct U8{
    15     stream unibyte;
    16     stream prefix;
    17     stream prefix2;
    18     stream prefix3;
    19     stream prefix4;
    20     stream suffix;
    21     stream badprefix;
    22     stream xE0;
    23     stream xED;
    24     stream xF0;
    25     stream xF4;
    26     stream xA0_xBF;
    27     stream x80_x9F;
    28     stream x90_xBF;
    29     stream x80_x8F;
    30     stream xEF;
    31     stream xBF;
    32     stream xBE;
    33     stream scope22;
    34     stream scope32;
    35     stream scope33;
    36     stream scope42;
    37     stream scope43;
    38     stream scope44;
    39     stream xE0_scope;
    40     stream xED_scope;
    41     stream xF0_scope;
    42     stream xF4_scope;
    43     stream xEF_scope;
     14struct Lex{
     15    stream a;
     16    stream p;
     17    stream l;
     18    stream e;
     19    stream LF;
    4420};
    4521
    46 struct Lex{
    47     stream CR;
    48     stream LF;
    49     stream HT;
    50     stream SP;
    51     stream CRLF;
    52     stream RefStart;
    53     stream Semicolon;
    54     stream Colon;
    55     stream LAngle;
    56     stream RAngle;
    57     stream LBracket;
    58     stream RBracket;
    59     stream Exclam;
    60     stream QMark;
    61     stream Hyphen;
    62     stream Equals;
    63     stream SQuote;
    64     stream DQuote;
    65     stream Slash;
    66     stream Hash;
    67     stream x;
    68     stream ASCII_name_start;
    69     stream ASCII_name_char;
    70     stream NameScan;
    71     stream Digit;
    72     stream Hex;
    73     stream WS;
     22struct Matches{
     23    stream all_matched;
    7424};
    7525
    76 struct Marker{
    77     stream LAngle_scope;
    78     stream Ref_opener;
    79     stream CD_closer;
     26struct Lines{
     27    stream all_matched;
    8028};
    8129
    82 struct CtCDPI_Callouts{
    83     stream Ct_starts;
    84     stream Ct_ends;
    85     stream CD_starts;
    86     stream CD_ends;
    87     stream PI_starts;
    88     stream PI_name_starts;
    89     stream PI_name_ends;
    90     stream PI_ends;
     30struct Output{
     31    stream lines;
    9132};
    9233
    93 struct Ref_Callouts{
    94     stream GenRef_starts;
    95     stream GenRef_ends;
    96     stream DecRef_starts;
    97     stream DecRef_ends;
    98     stream HexRef_starts;
    99     stream HexRef_ends;
    100 };
     34function void ClassifyBytes(struct Basis_bits basis_bits, struct Lex lex) {
     35    stream temp1 = (basis_bits.bit_1 & (~ basis_bits.bit_0));
     36    stream temp2 = (basis_bits.bit_2 & (~ basis_bits.bit_3));
     37    stream temp3 = (temp1 & temp2);
     38    stream temp4 = (basis_bits.bit_4 | basis_bits.bit_5);
     39    stream temp5 = (basis_bits.bit_7 & (~ basis_bits.bit_6));
     40    stream temp6 = (temp5 & (~ temp4));
     41    lex.a = (temp3 & temp6);
     42    stream temp7 = (basis_bits.bit_2 & basis_bits.bit_3);
     43    stream temp8 = (temp1 & temp7);
     44    stream temp9 = (basis_bits.bit_6 | basis_bits.bit_7);
     45    stream temp10 = (temp4 | temp9);
     46    lex.p = (temp8 & (~ temp10));
     47    stream temp11 = (basis_bits.bit_4 & basis_bits.bit_5);
     48    stream temp12 = (temp11 & (~ temp9));
     49    lex.l = (temp3 & temp12);
     50    stream temp13 = (basis_bits.bit_5 & (~ basis_bits.bit_4));
     51    stream temp14 = (temp13 & temp5);
     52    lex.e = (temp3 & temp14);
     53    stream temp15 = (basis_bits.bit_0 | basis_bits.bit_1);
     54    stream temp16 = (basis_bits.bit_2 | basis_bits.bit_3);
     55    stream temp17 = (temp15 | temp16);
     56    stream temp18 = (basis_bits.bit_4 & (~ basis_bits.bit_5));
     57    stream temp19 = (basis_bits.bit_6 & (~ basis_bits.bit_7));
     58    stream temp20 = (temp18 & temp19);
     59    lex.LF = (temp20 & (~ temp17));
     60}
    10161
    102 struct Tag_Callouts{
    103     stream ElemName_starts;
    104     stream ElemName_ends;
    105     stream AttName_starts;
    106     stream AttName_ends;
    107     stream AttVal_starts;
    108     stream AttVal_ends;
    109     stream AttVal_spans;
    110     stream EmptyTag_marks;
    111     stream EndTag_marks;
    112 };
     62function void Match(struct Lex lex, struct Matches matches) {
     63    stream m0 = lex.a;
     64    stream m1 = (pablo.Advance(m0) & lex.p);
     65    stream m2 = (pablo.Advance(m1) & lex.p);
     66    stream m3 = (pablo.Advance(m2) & lex.l);
     67    stream m4 = (pablo.Advance(m3) & lex.e);
     68    matches.all_matched = m4;
     69}
    11370
    114 struct Check_streams{
    115     stream misc_mask;
    116     stream non_ascii_name_starts;
    117     stream non_ascii_names;
    118     stream tag_marks;
    119     stream name_follows;
    120     stream att_refs;
    121 };
    122 
    123 function void Classify_bytes_Validate_utf8(struct Basis_bits basis_bits, struct Lex lex, struct U8 u8) {
    124     stream temp1 = (basis_bits.bit_0 | basis_bits.bit_1);
    125     stream temp2 = (basis_bits.bit_2 & (~ basis_bits.bit_3));
    126     stream temp3 = (temp2 & (~ temp1));
    127     stream temp4 = (basis_bits.bit_5 & (~ basis_bits.bit_4));
    128     stream temp5 = (basis_bits.bit_6 & (~ basis_bits.bit_7));
    129     stream temp6 = (temp4 & temp5);
    130     lex.RefStart = (temp3 & temp6);
    131     stream temp7 = (basis_bits.bit_2 & basis_bits.bit_3);
    132     stream temp8 = (temp7 & (~ temp1));
    133     stream temp9 = (basis_bits.bit_4 & (~ basis_bits.bit_5));
    134     stream temp10 = (basis_bits.bit_6 & basis_bits.bit_7);
    135     stream temp11 = (temp9 & temp10);
    136     lex.Semicolon = (temp8 & temp11);
    137     stream temp12 = (basis_bits.bit_4 & basis_bits.bit_5);
    138     stream temp13 = (basis_bits.bit_6 | basis_bits.bit_7);
    139     stream temp14 = (temp12 & (~ temp13));
    140     lex.LAngle = (temp8 & temp14);
    141     stream temp15 = (temp12 & temp5);
    142     lex.RAngle = (temp8 & temp15);
    143     stream temp16 = (basis_bits.bit_1 & (~ basis_bits.bit_0));
    144     stream temp17 = (basis_bits.bit_3 & (~ basis_bits.bit_2));
    145     stream temp18 = (temp16 & temp17);
    146     lex.LBracket = (temp18 & temp11);
    147     stream temp19 = (basis_bits.bit_7 & (~ basis_bits.bit_6));
    148     stream temp20 = (temp12 & temp19);
    149     lex.RBracket = (temp18 & temp20);
    150     stream temp21 = (basis_bits.bit_4 | basis_bits.bit_5);
    151     stream temp22 = (temp19 & (~ temp21));
    152     lex.Exclam = (temp3 & temp22);
    153     stream temp23 = (temp12 & temp10);
    154     lex.QMark = (temp8 & temp23);
    155     lex.Hyphen = (temp3 & temp20);
    156     lex.Equals = (temp8 & temp20);
    157     stream temp24 = (temp4 & temp10);
    158     lex.SQuote = (temp3 & temp24);
    159     stream temp25 = (temp5 & (~ temp21));
    160     lex.DQuote = (temp3 & temp25);
    161     lex.Slash = (temp3 & temp23);
    162     stream temp26 = (temp10 & (~ temp21));
    163     lex.Hash = (temp3 & temp26);
    164     stream temp27 = (temp16 & temp7);
    165     stream temp28 = (temp9 & (~ temp13));
    166     lex.x = (temp27 & temp28);
    167     stream temp29 = (temp9 & temp5);
    168     lex.Colon = (temp8 & temp29);
    169     stream temp30 = (temp18 & temp23);
    170     stream temp31 = (temp30 | lex.Colon);
    171     stream temp32 = (temp16 & (~ basis_bits.bit_2));
    172     stream temp33 = (basis_bits.bit_5 | temp10);
    173     stream temp34 = (basis_bits.bit_4 & temp33);
    174     stream temp35 = (~ temp34);
    175     stream temp36 = (temp21 | temp13);
    176     stream temp37 = ((basis_bits.bit_3 & temp35) | ((~ basis_bits.bit_3) & temp36));
    177     stream temp38 = (temp32 & temp37);
    178     stream temp39 = (temp31 | temp38);
    179     stream temp40 = (temp16 & basis_bits.bit_2);
    180     stream temp41 = (temp40 & temp37);
    181     lex.ASCII_name_start = (temp39 | temp41);
    182     stream temp42 = (temp30 | lex.Hyphen);
    183     stream temp43 = (temp3 & temp15);
    184     stream temp44 = (temp42 | temp43);
    185     stream temp45 = (temp8 & (~ temp34));
    186     stream temp46 = (temp44 | temp45);
    187     stream temp47 = (temp46 | temp38);
    188     lex.ASCII_name_char = (temp47 | temp41);
    189     lex.NameScan = (lex.ASCII_name_char | basis_bits.bit_0);
    190     stream temp48 = (temp1 | basis_bits.bit_2);
    191     stream x00_x1F = (~ temp48);
    192     stream temp49 = (basis_bits.bit_2 | basis_bits.bit_3);
    193     stream temp50 = (temp1 | temp49);
    194     lex.CR = (temp20 & (~ temp50));
    195     lex.LF = (temp29 & (~ temp50));
    196     stream temp51 = (temp9 & temp19);
    197     lex.HT = (temp51 & (~ temp50));
    198     lex.SP = (temp3 & (~ temp36));
    199     stream temp52 = (temp20 | temp29);
    200     stream temp53 = (temp52 | temp51);
    201     stream temp54 = (temp53 & (~ temp50));
    202     lex.WS = (temp54 | lex.SP);
    203     stream temp55 = (basis_bits.bit_5 | basis_bits.bit_6);
    204     stream temp56 = (basis_bits.bit_4 & temp55);
    205     lex.Digit = (temp8 & (~ temp56));
    206     stream temp57 = (temp16 & (~ temp49));
    207     stream temp58 = (temp57 & (~ basis_bits.bit_4));
    208     stream temp59 = (~ temp10);
    209     stream temp60 = ((basis_bits.bit_5 & temp59) | ((~ basis_bits.bit_5) & temp13));
    210     stream temp61 = (temp58 & temp60);
    211     stream temp62 = (lex.Digit | temp61);
    212     stream temp63 = (temp16 & temp2);
    213     stream temp64 = (temp63 & (~ basis_bits.bit_4));
    214     stream temp65 = (temp64 & temp60);
    215     lex.Hex = (temp62 | temp65);
    216     stream lex_error = (x00_x1F & (~ lex.WS));
    217     pablo.assert_0(pablo.inFile(lex_error), "Error: illegal character");
    218     u8.unibyte = (~ basis_bits.bit_0);
    219     u8.suffix = pablo.Mask(1,0);
    220     stream u8_error = pablo.Mask(1,0);
    221     stream u8_FFFE_FFFF = pablo.Mask(1,0);
    222     stream u8anyscope = pablo.Mask(1,0);
    223     if (basis_bits.bit_0) {
    224         u8.prefix = (basis_bits.bit_0 & basis_bits.bit_1);
    225         u8.prefix2 = (u8.prefix & (~ basis_bits.bit_2));
    226         u8.prefix3 = (u8.prefix & temp2);
    227         u8.prefix4 = (u8.prefix & temp7);
    228         u8.suffix = (basis_bits.bit_0 & (~ basis_bits.bit_1));
    229         stream temp66 = (u8.prefix & (~ temp49));
    230         stream temp67 = (temp21 | basis_bits.bit_6);
    231         stream temp68 = (temp66 & (~ temp67));
    232         stream temp69 = (basis_bits.bit_5 & temp13);
    233         stream temp70 = (basis_bits.bit_4 | temp69);
    234         stream temp71 = (u8.prefix4 & temp70);
    235         u8.badprefix = (temp68 | temp71);
    236         u8_error = u8.badprefix;
    237         u8.scope22 = pablo.Advance(u8.prefix2);
    238         u8anyscope = u8.scope22;
    239         if ((u8.prefix3 | u8.prefix4)) {
    240             stream xE0 = (u8.prefix3 & (~ temp36));
    241             stream xED = (u8.prefix3 & temp20);
    242             stream xF0 = (u8.prefix4 & (~ temp36));
    243             stream temp72 = (temp4 & (~ temp13));
    244             stream xF4 = (u8.prefix4 & temp72);
    245             u8.xA0_xBF = (u8.suffix & basis_bits.bit_2);
    246             u8.x80_x9F = (u8.suffix & (~ basis_bits.bit_2));
    247             u8.x90_xBF = (u8.suffix & temp49);
    248             u8.x80_x8F = (u8.suffix & (~ temp49));
    249             stream xEF = (u8.prefix3 & temp23);
    250             stream temp73 = (u8.suffix & temp7);
    251             u8.xBF = (temp73 & temp23);
    252             u8.xBE = (temp73 & temp15);
    253             u8.scope32 = pablo.Advance(u8.prefix3);
    254             u8.scope33 = pablo.Advance(u8.scope32);
    255             u8.scope42 = pablo.Advance(u8.prefix4);
    256             u8.scope43 = pablo.Advance(u8.scope42);
    257             u8.scope44 = pablo.Advance(u8.scope43);
    258             stream E0_F0_scope = pablo.Advance((xE0 | xF0));
    259             stream ED_F4_scope = pablo.Advance((xED | xF4));
    260             u8.xE0_scope = (u8.scope32 & E0_F0_scope);
    261             u8.xED_scope = (u8.scope32 & ED_F4_scope);
    262             u8.xF0_scope = (u8.scope42 & E0_F0_scope);
    263             u8.xF4_scope = (u8.scope42 & ED_F4_scope);
    264             u8.xEF_scope = pablo.Advance(xEF);
    265             stream u8lastscope = ((u8.scope22 | u8.scope33) | u8.scope44);
    266             u8anyscope = (((u8lastscope | u8.scope32) | u8.scope42) | u8.scope43);
    267             stream u8error1 = (u8.xE0_scope & u8.x80_x9F);
    268             stream u8error2 = (u8.xED_scope & u8.xA0_xBF);
    269             stream u8error3 = (u8.xF0_scope & u8.x80_x8F);
    270             stream u8error4 = (u8.xF4_scope & u8.x90_xBF);
    271             u8_error |= (((u8error1 | u8error2) | u8error3) | u8error4);
    272             stream EF_BF_pending = pablo.Advance((u8.xEF_scope & u8.xBF));
    273             u8_FFFE_FFFF = (EF_BF_pending & (u8.xBE | u8.xBF));
     71function void MatchLines1(struct Lex lex, struct Matches matches, struct Lines lines) {
     72    stream last_start = pablo.First();
     73    stream LF_or_match = (lex.LF | matches.all_matched);
     74    stream cursor = pablo.ScanToFirst(LF_or_match);
     75    while (pablo.inFile(cursor)) {
     76        if ((cursor & matches.all_matched)) {
     77            stream next_end = pablo.AdvanceThenScanTo(cursor, lex.LF);
     78            lines.all_matched |= (pablo.InclusiveSpan(last_start, next_end) | next_end);
     79            cursor = next_end;
    27480        }
    275         stream u8mismatch = (u8anyscope ^ u8.suffix);
    276         pablo.assert_0(((u8_error | u8mismatch) | u8_FFFE_FFFF), "UTF-8 error found");
     81        if ((cursor & lex.LF)) {
     82            last_start = pablo.Advance(cursor);
     83        }
     84        cursor = pablo.AdvanceThenScanTo(cursor, LF_or_match);
    27785    }
    27886}
    27987
    280 function void Parse_CtCDPI(struct Lex lex, struct Marker marker, struct CtCDPI_Callouts ctCDPI_Callouts, struct Check_streams check_streams) {
    281     ctCDPI_Callouts.Ct_starts = pablo.Mask(1,0);
    282     ctCDPI_Callouts.Ct_ends = pablo.Mask(1,0);
    283     ctCDPI_Callouts.CD_starts = pablo.Mask(1,0);
    284     ctCDPI_Callouts.CD_ends = pablo.Mask(1,0);
    285     ctCDPI_Callouts.PI_starts = pablo.Mask(1,0);
    286     ctCDPI_Callouts.PI_name_starts = pablo.Mask(1,0);
    287     ctCDPI_Callouts.PI_name_ends = pablo.Mask(1,0);
    288     ctCDPI_Callouts.PI_ends = pablo.Mask(1,0);
    289     stream CtCDPI_starts = pablo.Mask(1,0);
    290     stream CtCDPI_ends = pablo.Mask(1,0);
    291     stream ctCDPI_mask = pablo.Mask(1,0);
    292     stream v = (lex.LAngle | lex.Hyphen);
    293     stream w = (lex.Hyphen | lex.QMark);
    294     stream v1 = pablo.AdvanceN(v,1);
    295     stream w1 = pablo.AdvanceN(w,1);
    296     stream LAngle_scope = (v1 & (~ w1));
    297     stream PI_opener = (LAngle_scope & lex.QMark);
    298     stream CtCD_opener = (LAngle_scope & lex.Exclam);
    299     stream CtCDPI_opener = (PI_opener | CtCD_opener);
    300     stream CD_closer = pablo.Mask(1,0);
    301     stream DoubleHyphen = ((v1 & w1) & lex.Hyphen);
    302     if (lex.RBracket) {
    303         stream DoubleRBracket = (pablo.Advance(lex.RBracket) & lex.RBracket);
    304         CD_closer = (pablo.Advance(DoubleRBracket) & lex.RAngle);
    305     }
    306     stream PI_closer = ((w1 & (~ v1)) & lex.RAngle);
    307     stream CtCDPI_Cursor = pablo.ScanToFirst(CtCDPI_opener);
    308     while (CtCDPI_Cursor) {
    309         CtCDPI_starts |= CtCDPI_Cursor;
    310         stream PI_Cursor = (CtCDPI_Cursor & PI_opener);
    311         stream CD_Ct_Cursor = pablo.Advance((CtCDPI_Cursor & (~ PI_Cursor)));
    312         stream CD_Cursor = (CD_Ct_Cursor & lex.LBracket);
    313         stream Ct_Cursor = (CD_Ct_Cursor & lex.Hyphen);
    314         if (PI_Cursor) {
    315             ctCDPI_Callouts.PI_starts |= PI_Cursor;
    316             PI_Cursor = pablo.Advance(PI_Cursor);
    317             ctCDPI_Callouts.PI_name_starts |= PI_Cursor;
    318             stream PI_name_end = pablo.ScanThru(PI_Cursor, lex.NameScan);
    319             stream PI_error = (PI_Cursor & PI_name_end);
    320             stream PI_noWS = (PI_name_end & (~ lex.WS));
    321             PI_error |= ((PI_noWS & (~ lex.QMark)) | (pablo.Advance(PI_noWS) & (~ PI_closer)));
    322             pablo.assert_0(PI_error, "Error in PI syntax");
    323             ctCDPI_Callouts.PI_name_ends |= PI_name_end;
    324             PI_Cursor = pablo.ScanTo(PI_name_end, PI_closer);
    325             ctCDPI_Callouts.PI_ends |= PI_Cursor;
    326             CtCDPI_ends |= PI_Cursor;
     88function void MatchLines2(struct Lex lex, struct Matches matches, struct Lines lines) {
     89    stream last_start = pablo.Mask(1,0);
     90    last_start = (pablo.Advance((~ last_start)) ^ (~ last_start));
     91    stream LF_or_match = (lex.LF | matches.all_matched);
     92    stream cursor = pablo.ScanToFirst(LF_or_match);
     93    while (pablo.inFile(cursor)) {
     94        if ((cursor & matches.all_matched)) {
     95            stream next_end = pablo.AdvanceThenScanTo(cursor, lex.LF);
     96            lines.all_matched |= (pablo.InclusiveSpan(last_start, next_end) | next_end);
     97            cursor = next_end;
    32798        }
    328         if (CD_Cursor) {
    329             ctCDPI_Callouts.CD_starts |= CD_Cursor;
    330             CD_Cursor = pablo.ScanTo(CD_Cursor, CD_closer);
    331             ctCDPI_Callouts.CD_ends |= CD_Cursor;
    332             CtCDPI_ends |= CD_Cursor;
     99        if ((cursor & lex.LF)) {
     100            last_start = pablo.Advance(cursor);
    333101        }
    334         if (Ct_Cursor) {
    335             ctCDPI_Callouts.Ct_starts |= Ct_Cursor;
    336             Ct_Cursor = pablo.Advance(Ct_Cursor);
    337             stream Ct_error = (Ct_Cursor & (~ lex.Hyphen));
    338             Ct_Cursor = pablo.Advance(pablo.Advance(Ct_Cursor));
    339             Ct_Cursor = pablo.Advance(pablo.ScanTo(Ct_Cursor, DoubleHyphen));
    340             pablo.assert_0((Ct_error | (Ct_Cursor & (~ lex.RAngle))), "Error in comment syntax");
    341             ctCDPI_Callouts.Ct_ends |= Ct_Cursor;
    342             CtCDPI_ends |= Ct_Cursor;
    343         }
    344         CtCDPI_Cursor = ((PI_Cursor | CD_Cursor) | Ct_Cursor);
    345         ctCDPI_mask = pablo.InclusiveSpan(CtCDPI_starts, CtCDPI_ends);
    346         pablo.assert_0(pablo.atEOF(ctCDPI_mask), "Error in comment, CDATA or processing instruction syntax");
    347         CtCDPI_Cursor = pablo.ScanTo(CtCDPI_Cursor, CtCDPI_opener);
    348     }
    349     check_streams.misc_mask = ((((lex.WS | lex.LAngle) | pablo.InclusiveSpan((ctCDPI_Callouts.Ct_starts | ctCDPI_Callouts.PI_starts), (ctCDPI_Callouts.Ct_ends | ctCDPI_Callouts.PI_ends))) | CtCDPI_starts) & EOF_mask);
    350     marker.LAngle_scope = (LAngle_scope & (~ ctCDPI_mask));
    351     marker.Ref_opener = (lex.RefStart & (~ ctCDPI_mask));
    352     marker.CD_closer = (CD_closer & (~ ctCDPI_mask));
    353 }
    354 
    355 function void Parse_tags(struct Lex lex, struct Marker marker, struct Tag_Callouts tag_Callouts) {
    356     stream EqExpected = pablo.Mask(1,0);
    357     stream AttListEnd = pablo.Mask(1,0);
    358     stream DQuoteDelim = (lex.DQuote | lex.LAngle);
    359     stream SQuoteDelim = (lex.SQuote | lex.LAngle);
    360     stream AttListDelim = (lex.Slash | lex.RAngle);
    361     tag_Callouts.ElemName_starts = (marker.LAngle_scope & (~ lex.Slash));
    362     tag_Callouts.EndTag_marks = (marker.LAngle_scope & lex.Slash);
    363     tag_Callouts.ElemName_ends = pablo.ScanThru(tag_Callouts.ElemName_starts, lex.NameScan);
    364     stream ParseError = (tag_Callouts.ElemName_starts & tag_Callouts.ElemName_ends);
    365     tag_Callouts.AttName_starts = pablo.Mask(1,0);
    366     tag_Callouts.AttName_ends = pablo.Mask(1,0);
    367     tag_Callouts.AttVal_starts = pablo.Mask(1,0);
    368     tag_Callouts.AttVal_ends = pablo.Mask(1,0);
    369     if ((tag_Callouts.ElemName_ends & lex.WS)) {
    370         stream AfterWS = pablo.ScanThru(tag_Callouts.ElemName_ends, lex.WS);
    371         AttListEnd = (AfterWS & AttListDelim);
    372         stream AttNameStart = (AfterWS & (~ AttListDelim));
    373         while (AttNameStart) {
    374             ParseError |= (AttNameStart & (~ lex.NameScan));
    375             tag_Callouts.AttName_starts |= AttNameStart;
    376             stream AttNameFollow = pablo.ScanThru(AttNameStart, lex.NameScan);
    377             tag_Callouts.AttName_ends |= AttNameFollow;
    378             if ((AttNameFollow & lex.WS)) {
    379                 EqExpected = pablo.ScanThru(AttNameFollow, lex.WS);
    380             }
    381             else {
    382                 EqExpected = AttNameFollow;
    383             }
    384             ParseError |= (EqExpected & (~ lex.Equals));
    385             stream AttValPos = pablo.AdvanceThenScanThru(EqExpected, lex.WS);
    386             tag_Callouts.AttVal_starts |= AttValPos;
    387             stream DQuoteAttVal = (AttValPos & lex.DQuote);
    388             stream SQuoteAttVal = (AttValPos & lex.SQuote);
    389             stream DQuoteAttEnd = pablo.AdvanceThenScanTo(DQuoteAttVal, DQuoteDelim);
    390             stream SQuoteAttEnd = pablo.AdvanceThenScanTo(SQuoteAttVal, SQuoteDelim);
    391             stream AttValEnd = (DQuoteAttEnd | SQuoteAttEnd);
    392             ParseError |= ((AttValPos | AttValEnd) & (~ (lex.DQuote | lex.SQuote)));
    393             stream AttValFollow = pablo.Advance(AttValEnd);
    394             tag_Callouts.AttVal_ends |= AttValFollow;
    395             if ((AttValFollow & lex.WS)) {
    396                 AfterWS = pablo.ScanThru(AttValFollow, lex.WS);
    397                 AttListEnd |= (AfterWS & AttListDelim);
    398                 AttNameStart = (AfterWS & (~ AttListDelim));
    399             }
    400             else {
    401                 AttListEnd |= (AttValFollow & AttListDelim);
    402                 AttNameStart = (AttValFollow & (~ AttListDelim));
    403             }
    404             ParseError |= (AttValFollow & AttNameStart);
    405         }
    406     }
    407     else {
    408         AttListEnd = (tag_Callouts.ElemName_ends & AttListDelim);
    409         ParseError |= (tag_Callouts.ElemName_ends & (~ AttListDelim));
    410     }
    411     stream STagEnds = (AttListEnd & lex.RAngle);
    412     tag_Callouts.EmptyTag_marks = pablo.Advance((AttListEnd & lex.Slash));
    413     ParseError |= (tag_Callouts.EmptyTag_marks & (~ lex.RAngle));
    414     stream EndTagEnds = pablo.AdvanceThenScanThru(tag_Callouts.EndTag_marks, lex.NameScan);
    415     if ((EndTagEnds & lex.WS)) {
    416         EndTagEnds = pablo.ScanThru(EndTagEnds, lex.WS);
    417     }
    418     ParseError |= (EndTagEnds & (~ lex.RAngle));
    419     pablo.assert_0(ParseError, "Tag parsing error found");
    420     tag_Callouts.AttVal_spans = pablo.SpanUpTo(tag_Callouts.AttVal_starts, tag_Callouts.AttVal_ends);
    421 }
    422 
    423 function void Parse_refs(struct Lex lex, struct Marker marker, struct Ref_Callouts ref_Callouts) {
    424     ref_Callouts.GenRef_starts = pablo.Mask(1,0);
    425     ref_Callouts.GenRef_ends = pablo.Mask(1,0);
    426     ref_Callouts.DecRef_starts = pablo.Mask(1,0);
    427     ref_Callouts.DecRef_ends = pablo.Mask(1,0);
    428     ref_Callouts.HexRef_starts = pablo.Mask(1,0);
    429     ref_Callouts.HexRef_ends = pablo.Mask(1,0);
    430     stream ref_error = pablo.Mask(1,0);
    431     if (marker.Ref_opener) {
    432         stream Ref_scope = pablo.Advance(marker.Ref_opener);
    433         stream NumRef2 = (Ref_scope & lex.Hash);
    434         ref_Callouts.GenRef_starts = (Ref_scope & (~ lex.Hash));
    435         stream NumRef3 = pablo.Advance(NumRef2);
    436         stream HexRef3 = (NumRef3 & lex.x);
    437         ref_Callouts.DecRef_starts = (NumRef3 & (~ lex.x));
    438         ref_Callouts.HexRef_starts = pablo.Advance(HexRef3);
    439         ref_Callouts.GenRef_ends = pablo.ScanThru(ref_Callouts.GenRef_starts, lex.NameScan);
    440         ref_Callouts.DecRef_ends = pablo.ScanThru(ref_Callouts.DecRef_starts, lex.Digit);
    441         ref_Callouts.HexRef_ends = pablo.ScanThru(ref_Callouts.HexRef_starts, lex.Hex);
    442         stream ref_error1 = (ref_Callouts.DecRef_starts & (~ lex.Digit));
    443         stream ref_error2 = (ref_Callouts.HexRef_starts & (~ lex.Hex));
    444         stream ref_ends = ((ref_Callouts.GenRef_ends | ref_Callouts.DecRef_ends) | ref_Callouts.HexRef_ends);
    445         stream ref_error3 = (ref_ends & (~ lex.Semicolon));
    446         pablo.assert_0(((ref_error1 | ref_error2) | ref_error3), "Reference error found");
     102        cursor = pablo.AdvanceThenScanTo(cursor, LF_or_match);
    447103    }
    448104}
    449105
    450 function void Validate_xml_names(struct CtCDPI_Callouts ctCDPI_Callouts, struct Ref_Callouts ref_Callouts, struct Tag_Callouts tag_Callouts, struct Lex lex, struct U8 u8, struct Check_streams check_streams) {
    451     stream PI_names = pablo.SpanUpTo(ctCDPI_Callouts.PI_name_starts, ctCDPI_Callouts.PI_name_ends);
    452     stream GenRefs = pablo.SpanUpTo(ref_Callouts.GenRef_starts, ref_Callouts.GenRef_ends);
    453     stream ElemNames = pablo.SpanUpTo(tag_Callouts.ElemName_starts, tag_Callouts.ElemName_ends);
    454     stream AttNames = pablo.SpanUpTo(tag_Callouts.AttName_starts, tag_Callouts.AttName_ends);
    455     stream qname_stream = (ElemNames | AttNames);
    456     stream ncname_stream = (PI_names | GenRefs);
    457     stream name_stream = (qname_stream | ncname_stream);
    458     stream name_start = (name_stream & (~ pablo.Advance(name_stream)));
    459     stream name_cursor = (name_stream & (~ pablo.Advance(name_stream)));
    460     stream void_prefix_err = (name_cursor & lex.Colon);
    461     stream namespace_sep = (pablo.ScanThru(name_cursor, (lex.NameScan & (~ lex.Colon))) & lex.Colon);
    462     stream local_part_start = pablo.Advance(namespace_sep);
    463     stream local_part_err = (local_part_start & (~ lex.NameScan));
    464     stream colon2_err = (pablo.ScanThru(local_part_start, (lex.NameScan & (~ lex.Colon))) & lex.Colon);
    465     stream ncname_err = (ncname_stream & lex.Colon);
    466     pablo.assert_0((((void_prefix_err | local_part_err) | colon2_err) | ncname_err), "name syntax error");
    467     check_streams.non_ascii_name_starts = (name_start & (~ lex.ASCII_name_start));
    468     check_streams.non_ascii_names = (((name_stream & (~ name_start)) & (~ lex.ASCII_name_char)) & (~ u8.suffix));
     106function void FilterMatchLines(struct Data data, struct Output output) {
     107    output.lines = pablo.filter_bytes(data, (~ lines.all_matched));
    469108}
    470109
    471 function void Do_check_streams(struct Marker marker, struct Tag_Callouts tag_Callouts, struct Check_streams check_streams) {
    472     pablo.assert_0((marker.CD_closer & (~ tag_Callouts.AttVal_spans)), "Error: ]]> in text");
    473     check_streams.tag_marks = (((tag_Callouts.EmptyTag_marks | tag_Callouts.ElemName_starts) | tag_Callouts.EndTag_marks) | tag_Callouts.AttName_starts);
    474     check_streams.name_follows = (tag_Callouts.ElemName_ends | tag_Callouts.AttName_ends);
    475     check_streams.att_refs = (tag_Callouts.AttVal_spans & marker.Ref_opener);
     110function void CountLines(struct Lex lex, struct Lines lines) {
     111    return pablo.PopCount((pablo.MatchStar(lines, (~ lex.LF)) & lex.LF)) ;
    476112}
Note: See TracChangeset for help on using the changeset viewer.