Changeset 911 for proto


Ignore:
Timestamp:
Feb 13, 2011, 12:25:35 PM (9 years ago)
Author:
lindanl
Message:

Change post_process to check_streams in parabix2_pablo.py. Add postprocess_do_block function. Change tag_macher to TagMatcher? and make the matcher global.

Location:
proto/parabix2
Files:
1 added
2 edited

Legend:

Unmodified
Added
Removed
  • proto/parabix2/pablo_template.cpp

    r910 r911  
    2424#include "../lib/s2p.h"
    2525
    26 #include "tag_matcher.h"
     26#include "TagMatcher.h"
    2727#include "LineColTracker.h"
    2828
     
    4646char * source;
    4747LineColTracker tracker;
     48TagMatcher matcher;
    4849
    4950
     
    207208}
    208209
    209 
     210static inline void postprocess_do_block(Lex & lex, CtCDPI_Callouts & ctCDPI_Callouts, Ref_Callouts & ref_Callouts, Check_streams & check_streams, int chars_avail){
     211            tracker.StoreNewlines(lex.LF);
     212
     213                if (bitblock_has_bit(simd_or(check_streams.non_ascii_name_starts, check_streams.non_ascii_names))) {
     214                  StreamScan((ScanBlock *) &check_streams.non_ascii_name_starts, sizeof(BitBlock)/sizeof(ScanBlock), NameStrt_check);
     215                  StreamScan((ScanBlock *) &check_streams.non_ascii_names, sizeof(BitBlock)/sizeof(ScanBlock), Name_check);
     216                }
     217
     218                if (bitblock_has_bit(ctCDPI_Callouts.PI_name_starts)){
     219                  StreamScan((ScanBlock *) &ctCDPI_Callouts.PI_name_starts, sizeof(BitBlock)/sizeof(ScanBlock), PIName_check);
     220                }
     221
     222                if (bitblock_has_bit(ctCDPI_Callouts.CD_starts)){
     223                  StreamScan((ScanBlock *) &ctCDPI_Callouts.CD_starts, sizeof(BitBlock)/sizeof(ScanBlock), CD_check);
     224                }
     225
     226                if (bitblock_has_bit(ref_Callouts.GenRef_starts)){
     227                  StreamScan((ScanBlock *) &ref_Callouts.GenRef_starts, sizeof(BitBlock)/sizeof(ScanBlock), GenRef_check);
     228                }
     229
     230                if (bitblock_has_bit(ref_Callouts.DecRef_starts)){
     231                  StreamScan((ScanBlock *) &ref_Callouts.DecRef_starts, sizeof(BitBlock)/sizeof(ScanBlock), DecRef_check);
     232                }
     233
     234                if (bitblock_has_bit(ref_Callouts.HexRef_starts)){
     235                  StreamScan((ScanBlock *) &ref_Callouts.HexRef_starts, sizeof(BitBlock)/sizeof(ScanBlock), HexRef_check);
     236                }
     237
     238                if (bitblock_has_bit(check_streams.att_refs)){
     239                  StreamScan((ScanBlock *) &check_streams.att_refs, sizeof(BitBlock)/sizeof(ScanBlock), AttRef_check);
     240                }
     241
     242                if (bitblock_has_bit(check_streams.error_mask)) {
     243                  int errpos = count_forward_zeroes(check_streams.error_mask);
     244                  ReportError("error found", errpos);
     245              exit(-1);
     246                }
     247
     248                matcher.store_streams(check_streams.tag_marks, check_streams.name_follows, check_streams.misc_mask, chars_avail);
     249                tracker.AdvanceBlock();
     250}
    210251
    211252void do_process(FILE *infile, FILE *outfile) {
     
    231272  chars_avail = min(chars_read,BUFFER_SIZE);
    232273
    233   tag_matcher t(srcbuf);
     274  matcher.setSrc(srcbuf);
    234275
    235276  if(chars_read<4){
     
    281322    if(chars_avail < BUFFER_SIZE){
    282323
    283       while (block_pos <= chars_avail || @any_carry){
    284 
    285         int bytes = chars_avail - block_pos;
    286         block_base = block_pos;
    287 
    288         if(bytes < BLOCK_SIZE){
    289           masks.EOF_mask = sisd_srl(simd_const_1(1),sisd_from_int(BLOCK_SIZE-bytes));
    290           EOF_mask = sisd_srl(simd_const_1(1),sisd_from_int(BLOCK_SIZE-bytes));
    291         }
    292 
    293         s2p_do_block((BytePack *) &srcbuf[block_pos], basis_bits);
    294 
    295         basis_bits.bit_0 = simd_and(basis_bits.bit_0, masks.EOF_mask);
    296         basis_bits.bit_1 = simd_and(basis_bits.bit_1, masks.EOF_mask);
    297         basis_bits.bit_2 = simd_and(basis_bits.bit_2, masks.EOF_mask);
    298         basis_bits.bit_3 = simd_and(basis_bits.bit_3, masks.EOF_mask);
    299         basis_bits.bit_4 = simd_and(basis_bits.bit_4, masks.EOF_mask);
    300         basis_bits.bit_5 = simd_and(basis_bits.bit_5, masks.EOF_mask);
    301         basis_bits.bit_6 = simd_and(basis_bits.bit_6, masks.EOF_mask);
    302         basis_bits.bit_7 = simd_and(basis_bits.bit_7, masks.EOF_mask);
    303 
    304         @block_stmts
    305 
    306         tracker.StoreNewlines(lex.LF);
    307 
    308         if (bitblock_has_bit(simd_or(post_process.non_ascii_name_starts, post_process.non_ascii_names))) {
    309           StreamScan((ScanBlock *) &post_process.non_ascii_name_starts, sizeof(BitBlock)/sizeof(ScanBlock), NameStrt_check);
    310           StreamScan((ScanBlock *) &post_process.non_ascii_names, sizeof(BitBlock)/sizeof(ScanBlock), Name_check);
    311         }
    312 
    313         if (bitblock_has_bit(ctCDPI_Callouts.PI_name_starts)){
    314           StreamScan((ScanBlock *) &ctCDPI_Callouts.PI_name_starts, sizeof(BitBlock)/sizeof(ScanBlock), PIName_check);
    315         }
    316 
    317         if (bitblock_has_bit(ctCDPI_Callouts.CD_starts)){
    318           StreamScan((ScanBlock *) &ctCDPI_Callouts.CD_starts, sizeof(BitBlock)/sizeof(ScanBlock), CD_check);
    319         }
    320 
    321         if (bitblock_has_bit(ref_Callouts.GenRef_starts)){
    322           StreamScan((ScanBlock *) &ref_Callouts.GenRef_starts, sizeof(BitBlock)/sizeof(ScanBlock), GenRef_check);
    323         }
    324 
    325         if (bitblock_has_bit(ref_Callouts.DecRef_starts)){
    326           StreamScan((ScanBlock *) &ref_Callouts.DecRef_starts, sizeof(BitBlock)/sizeof(ScanBlock), DecRef_check);
    327         }
    328 
    329         if (bitblock_has_bit(ref_Callouts.HexRef_starts)){
    330           StreamScan((ScanBlock *) &ref_Callouts.HexRef_starts, sizeof(BitBlock)/sizeof(ScanBlock), HexRef_check);
    331         }
    332 
    333         if (bitblock_has_bit(post_process.att_refs)){
    334           StreamScan((ScanBlock *) &post_process.att_refs, sizeof(BitBlock)/sizeof(ScanBlock), AttRef_check);
    335         }
    336 
    337         if (bitblock_has_bit(post_process.error_mask)) {
    338           int errpos = count_forward_zeroes(post_process.error_mask);
    339           ReportError("error found", errpos);
    340       exit(-1);
    341         }
    342 
    343         t.store_streams(post_process.tag_marks, post_process.name_follows, post_process.misc_mask, chars_avail);
    344         tracker.AdvanceBlock();
    345         block_pos += BLOCK_SIZE;
    346       }
     324                while (block_pos <= chars_avail || @any_carry){
     325
     326                        int bytes = chars_avail - block_pos;
     327                        block_base = block_pos;
     328
     329                        if(bytes < BLOCK_SIZE){
     330                          masks.EOF_mask = sisd_srl(simd_const_1(1),sisd_from_int(BLOCK_SIZE-bytes));
     331                          EOF_mask = sisd_srl(simd_const_1(1),sisd_from_int(BLOCK_SIZE-bytes));
     332                        }
     333
     334                        s2p_do_block((BytePack *) &srcbuf[block_pos], basis_bits);
     335
     336                        basis_bits.bit_0 = simd_and(basis_bits.bit_0, masks.EOF_mask);
     337                        basis_bits.bit_1 = simd_and(basis_bits.bit_1, masks.EOF_mask);
     338                        basis_bits.bit_2 = simd_and(basis_bits.bit_2, masks.EOF_mask);
     339                        basis_bits.bit_3 = simd_and(basis_bits.bit_3, masks.EOF_mask);
     340                        basis_bits.bit_4 = simd_and(basis_bits.bit_4, masks.EOF_mask);
     341                        basis_bits.bit_5 = simd_and(basis_bits.bit_5, masks.EOF_mask);
     342                        basis_bits.bit_6 = simd_and(basis_bits.bit_6, masks.EOF_mask);
     343                        basis_bits.bit_7 = simd_and(basis_bits.bit_7, masks.EOF_mask);
     344
     345                        @block_stmts
     346
     347                        postprocess_do_block(lex, ctCDPI_Callouts, ref_Callouts, check_streams, chars_avail);
     348
     349                        block_pos += BLOCK_SIZE;
     350                }
    347351    }
    348352    else{
    349353      while (block_pos < chars_avail){
    350354
    351         block_base = block_pos;
    352 
    353         s2p_do_block((BytePack *) &srcbuf[block_pos], basis_bits);
    354 
    355 
    356         @block_stmts
    357 
    358         tracker.StoreNewlines(lex.LF);
    359 
    360         if (bitblock_has_bit(simd_or(post_process.non_ascii_name_starts, post_process.non_ascii_names))) {
    361           StreamScan((ScanBlock *) &post_process.non_ascii_name_starts, sizeof(BitBlock)/sizeof(ScanBlock), NameStrt_check);
    362           StreamScan((ScanBlock *) &post_process.non_ascii_names, sizeof(BitBlock)/sizeof(ScanBlock), Name_check);
    363         }
    364 
    365         if (bitblock_has_bit(ctCDPI_Callouts.PI_name_starts)){
    366           StreamScan((ScanBlock *) &ctCDPI_Callouts.PI_name_starts, sizeof(BitBlock)/sizeof(ScanBlock), PIName_check);
    367         }
    368 
    369         if (bitblock_has_bit(ctCDPI_Callouts.CD_starts)){
    370           StreamScan((ScanBlock *) &ctCDPI_Callouts.CD_starts, sizeof(BitBlock)/sizeof(ScanBlock), CD_check);
    371         }
    372 
    373         if (bitblock_has_bit(ref_Callouts.GenRef_starts)){
    374           StreamScan((ScanBlock *) &ref_Callouts.GenRef_starts, sizeof(BitBlock)/sizeof(ScanBlock), GenRef_check);
    375         }
    376 
    377         if (bitblock_has_bit(ref_Callouts.DecRef_starts)){
    378           StreamScan((ScanBlock *) &ref_Callouts.DecRef_starts, sizeof(BitBlock)/sizeof(ScanBlock), DecRef_check);
    379         }
    380 
    381         if (bitblock_has_bit(ref_Callouts.HexRef_starts)){
    382           StreamScan((ScanBlock *) &ref_Callouts.HexRef_starts, sizeof(BitBlock)/sizeof(ScanBlock), HexRef_check);
    383         }
    384 
    385         if (bitblock_has_bit(post_process.att_refs)){
    386           StreamScan((ScanBlock *) &post_process.att_refs, sizeof(BitBlock)/sizeof(ScanBlock), AttRef_check);
    387         }
    388 
    389         if (bitblock_has_bit(post_process.error_mask)) {
    390           int errpos = count_forward_zeroes(post_process.error_mask);
    391           ReportError("error found", errpos);
    392       exit(-1);
    393         }
    394 
    395         t.store_streams(post_process.tag_marks, post_process.name_follows, post_process.misc_mask, chars_avail);
    396         tracker.AdvanceBlock();
    397         block_pos += BLOCK_SIZE;
     355          block_base = block_pos;
     356
     357          s2p_do_block((BytePack *) &srcbuf[block_pos], basis_bits);
     358
     359
     360                  @block_stmts
     361
     362                  postprocess_do_block(lex, ctCDPI_Callouts, ref_Callouts, check_streams, chars_avail);
     363
     364                  block_pos += BLOCK_SIZE;
    398365      }
    399366    }
    400367
    401368
    402     t.StreamScan(chars_avail);
    403     t.Advance_buffer();
     369    matcher.StreamScan(chars_avail);
     370    matcher.Advance_buffer();
    404371    tracker.Advance_buffer();
    405372
     
    418385      break;
    419386  }
    420   if(t.depth!=0){
    421     fprintf(stderr, "tag matching error (depth %i) at position %i\n", t.depth, buffer_base);
     387  if(matcher.depth!=0){
     388    fprintf(stderr, "tag matching error (depth %i) at position %i\n", matcher.depth, buffer_base);
    422389    exit(-1);
    423390  }
  • proto/parabix2/parabix2_pablo.py

    r904 r911  
    134134        EOF_mask = 0   
    135135
    136 class Post_process():
     136class Check_streams():
    137137        misc_mask = 0
    138138        non_ascii_name_starts = 0
     
    316316        scope1.QMark = w1 &~ v1
    317317
    318 def Parse_CtCDPI(ctCDPI_Callouts, lex, scope1, masks, post_process):
     318def Parse_CtCDPI(ctCDPI_Callouts, lex, scope1, masks, check_streams):
    319319        ctCDPI_Callouts.CD_end = 0
    320320        ctCDPI_Callouts.Ct_starts = 0
     
    387387                ctCDPI_Callouts.error |= ctCDPI_Callouts.CtCDPI_mask &~ masks.EOF_mask
    388388               
    389         post_process.misc_mask = (lex.WS | lex.LAngle | (bitutil.Advance(ctCDPI_Callouts.Ct_ends | ctCDPI_Callouts.PI_ends) - (ctCDPI_Callouts.Ct_starts | ctCDPI_Callouts.PI_starts)) | CtCDPI_starts) & masks.EOF_mask
     389        check_streams.misc_mask = (lex.WS | lex.LAngle | (bitutil.Advance(ctCDPI_Callouts.Ct_ends | ctCDPI_Callouts.PI_ends) - (ctCDPI_Callouts.Ct_starts | ctCDPI_Callouts.PI_starts)) | CtCDPI_starts) & masks.EOF_mask
    390390
    391391def Parse_tags(lex, scope1, ctCDPI_Callouts, tag_Callouts, masks):
     
    504504                ref_Callouts.error = ref_error1 | ref_error2 | ref_error3
    505505
    506 def Validate_xml_names(ctCDPI_Callouts, ref_Callouts, tag_Callouts, lex, u8, xml_names, post_process):
     506def Validate_xml_names(ctCDPI_Callouts, ref_Callouts, tag_Callouts, lex, u8, xml_names, check_streams):
    507507        PI_names = ctCDPI_Callouts.PI_name_ends - ctCDPI_Callouts.PI_name_starts
    508508        GenRefs = ref_Callouts.GenRef_ends - ref_Callouts.GenRef_starts
     
    522522        xml_names.namespace_error = void_prefix_err | local_part_err | colon2_err | ncname_err
    523523                       
    524         post_process.non_ascii_name_starts = name_start &~lex.ASCII_name_start
    525         post_process.non_ascii_names = (name_stream &~ name_start) & ~lex.ASCII_name_char & ~u8.suffix
    526 
    527 #def main(basis_bits, lex, u8, scope1, ctCDPI_Callouts, masks, post_process, tag_Callouts, ref_Callouts, xml_names):
    528 def Main(basis_bits, lex, u8, xml_char, scope1, ctCDPI_Callouts, ref_Callouts, tag_Callouts, masks, xml_names, post_process):   
     524        check_streams.non_ascii_name_starts = name_start &~lex.ASCII_name_start
     525        check_streams.non_ascii_names = (name_stream &~ name_start) & ~lex.ASCII_name_char & ~u8.suffix
     526
     527#def main(basis_bits, lex, u8, scope1, ctCDPI_Callouts, masks, check_streams, tag_Callouts, ref_Callouts, xml_names):
     528def Main(basis_bits, lex, u8, xml_char, scope1, ctCDPI_Callouts, ref_Callouts, tag_Callouts, masks, xml_names, check_streams): 
    529529       
    530530        # Classify bytes for UTF-8 processing, whitespace and control
     
    540540   
    541541        # Parse all comments, CDATA sections and processing instructions.
    542         Parse_CtCDPI(ctCDPI_Callouts, lex, scope1, masks, post_process)
     542        Parse_CtCDPI(ctCDPI_Callouts, lex, scope1, masks, check_streams)
    543543               
    544544        # All remaining '<' must be tag start characters; parse tags.
     
    552552       
    553553        # Validate XML namespaces and generate bit streams to post validate non-ascii range XML names
    554         Validate_xml_names(ctCDPI_Callouts, ref_Callouts, tag_Callouts, lex, u8, xml_names, post_process)
     554        Validate_xml_names(ctCDPI_Callouts, ref_Callouts, tag_Callouts, lex, u8, xml_names, check_streams)
    555555               
    556556        # Consolidate and check for errors
    557         post_process.error_mask = lex.error & EOF_mask | u8.error | u8.FFFE_FFFF | ctCDPI_Callouts.error | tag_Callouts.error | CD_end_error | ref_Callouts.error | xml_names.namespace_error
    558 
    559         post_process.tag_marks = tag_Callouts.EmptyTag_marks | tag_Callouts.LAngleFollow | tag_Callouts.AttName_starts
    560         post_process.name_follows = tag_Callouts.ElemName_ends | tag_Callouts.AttName_ends
    561         post_process.att_refs = tag_Callouts.AttVal_spans & scope1.RefStart             
     557        check_streams.error_mask = lex.error & EOF_mask | u8.error | u8.FFFE_FFFF | ctCDPI_Callouts.error | tag_Callouts.error | CD_end_error | ref_Callouts.error | xml_names.namespace_error
     558
     559        check_streams.tag_marks = tag_Callouts.EmptyTag_marks | tag_Callouts.LAngleFollow | tag_Callouts.AttName_starts
     560        check_streams.name_follows = tag_Callouts.ElemName_ends | tag_Callouts.AttName_ends
     561        check_streams.att_refs = tag_Callouts.AttVal_spans & scope1.RefStart           
Note: See TracChangeset for help on using the changeset viewer.