Ignore:
Timestamp:
Nov 7, 2011, 3:29:10 PM (8 years ago)
Author:
vla24
Message:

SymbolTable?: integrated identity grouping template with the new BitstreamIterator?

File:
1 edited

Legend:

Unmodified
Added
Removed
  • proto/SymbolTable/symtab_pbgs_identity_template.cpp

    r1648 r1667  
    11#include "../symtab_global.h"
    22#include <pbgs_identity_symbol_table.h>
    3 
     3//#define STREAMSCAN
    44#ifdef BUFFER_PROFILING
    55        BOM_Table * parser_timer;
     
    3232PBGSIdentitySymbolTable pbgs_symbol_table;
    3333
    34 /* StreamScan & Post Process Declarations */
    35 //      static inline int StreamScanToFirst(ScanBlock * stream, int blk_count, int ProcessPos(int), int * error_pos_in_block)
    36 static inline int StreamScan(ScanBlock * stream, int blk_count, int ProcessPos(int), int * error_pos_in_block);
    37 
    3834static inline int NameStrt_check(int pos);
    3935static inline int Name_check(int pos);
     
    4440static inline int DecRef_check(int pos);
    4541static inline int AttRef_check(int pos);
     42
     43
    4644
    4745@global
     
    133131        return previous_block_last_elem_start - 1;
    134132    }
    135 }
    136 
    137 /* StreamScan & Post Process Definitions */
    138 static inline int StreamScan(ScanBlock * stream, int blk_count, int ProcessPos(int), int * error_pos_in_block) {
    139 
    140         int blk;
    141         int block_pos = 0;
    142         int pos;
    143 
    144         for (blk = 0; blk < blk_count; blk++) {
    145                 ScanBlock s = stream[blk];
    146                 while(s) {
    147                         pos = (cfzl(s) + block_pos);
    148                         int code = (ProcessPos(pos));
    149                         if (code) {
    150                                 *error_pos_in_block = pos;
    151                                 return code; // error code
    152                         }
    153                         s = s & (s-1);  // clear rightmost bit.
    154                 }
    155                 block_pos += 8 * sizeof(ScanBlock);
    156         }
    157         return 0;
    158133}
    159134
     
    224199}
    225200
     201#ifdef STREAMSCAN
    226202static inline int NameStrt_check(int pos) {
    227203        int block_pos = block_base + pos;
     
    338314}
    339315
     316#else
     317
     318
     319
     320static inline int NameStrt_check(int pos) {
     321    if(XML_10_UTF8_NameStrt_bytes((unsigned char*)&source[pos]) == 0){
     322        return XMLTestSuiteError::NAME_START;
     323    }
     324    return 0;
     325}
     326
     327static inline int Name_check(int pos) {
     328    if(XML_10_UTF8_NameChar_bytes((unsigned char*)&source[pos]) == 0){
     329        return XMLTestSuiteError::NAME;
     330    }
     331    return 0;
     332}
     333
     334static inline int PIName_check(int pos, int file_pos) {
     335        if (at_XxMmLll<ASCII>((unsigned char*)&source[pos]) && (source[pos+3]=='?' || source[pos+3]<= ' ')) {
     336              // "<?xml" legal at start of file.
     337              if (!((file_pos == 2) && at_XmlDecl_start<ASCII>((unsigned char*)&source[0]))) {
     338                  return XMLTestSuiteError::XMLPINAME;
     339              }
     340        }
     341        return 0;
     342}
     343
     344static inline int CD_check(int pos) {
     345        if (!at_CDATA1<ASCII>((unsigned char*)&source[pos])){
     346                  return XMLTestSuiteError::CDATA;
     347        }
     348        return 0;
     349}
     350
     351static inline int GenRef_check(int pos) {
     352        unsigned char* s = (unsigned char*)&source[pos];
     353        if (!(at_Ref_gt<ASCII>(s)||at_Ref_lt<ASCII>(s)||at_Ref_amp<ASCII>(s)||at_Ref_quot<ASCII>(s)||at_Ref_apos<ASCII>(s))){
     354              return XMLTestSuiteError::UNDEFREF;
     355        }
     356        return 0;
     357}
     358
     359static inline int HexRef_check(int pos) {
     360        unsigned char* s = (unsigned char*)&source[pos];
     361        int ch_val = 0;
     362        while(at_HexDigit<ASCII>(s)){
     363          ch_val = HexVal<ASCII>(s[0]) + (ch_val<<4);
     364          if (ch_val> 0x10FFFF ){
     365                return XMLTestSuiteError::CHARREF;
     366          }
     367          s++;
     368        }
     369        if ((ch_val == 0x0) || ((ch_val | 0x7FF) == 0xDFFF)|| ((ch_val | 0x1) == 0xFFFF)){
     370          return XMLTestSuiteError::CHARREF;
     371        }
     372        else if (((ch_val < 0x20) && (ch_val != 0x9) && (ch_val != 0xD) && (ch_val != 0xA))){
     373          return XMLTestSuiteError::XML10CHARREF;
     374        }
     375        return 0;
     376}
     377
     378static inline int DecRef_check(int pos) {
     379        unsigned char* s = (unsigned char*)&source[pos];
     380        int ch_val = 0;
     381        while(at_HexDigit<ASCII>(s)){
     382          ch_val = DigitVal<ASCII>(s[0]) + ch_val*10;
     383          if (ch_val> 0x10FFFF ){
     384                        return XMLTestSuiteError::CHARREF;
     385          }
     386          s++;
     387        }
     388        if ((ch_val == 0x0) || ((ch_val | 0x7FF) == 0xDFFF)|| ((ch_val | 0x1) == 0xFFFF)){
     389                  return XMLTestSuiteError::CHARREF;
     390        }
     391        else if (((ch_val < 0x20) && (ch_val != 0x9) && (ch_val != 0xD) && (ch_val != 0xA))){
     392                  return XMLTestSuiteError::XML10CHARREF;
     393        }
     394        return 0;
     395}
     396
     397static inline int AttRef_check(int pos) {
     398        unsigned char* s = (unsigned char*)&source[pos];
     399        int ch_val = 0;
     400        if(s[0]=='#'){
     401          s++;
     402          if(s[0]=='x' || s[0]=='X'){
     403            s++;
     404            while(at_HexDigit<ASCII>(s)){
     405              ch_val = HexVal<ASCII>(s[0]) + (ch_val<<4);
     406              s++;
     407            }
     408          }
     409          else{
     410            while(at_HexDigit<ASCII>(s)){
     411              ch_val = DigitVal<ASCII>(s[0]) + ch_val*10;
     412              s++;
     413            }
     414          }
     415          if (ch_val==60){
     416            return XMLTestSuiteError::ATTREF;
     417          }
     418        }
     419        else if(at_Ref_lt<ASCII>(s)){
     420          return XMLTestSuiteError::ATTREF;
     421        }
     422        return 0;
     423}
     424#endif
     425
     426/* StreamScan & Post Process Definitions */
     427static inline int StreamScan(ScanBlock * stream, int blk_count, int ProcessPos(int), int * error_pos_in_block) {
     428
     429        int blk;
     430        int block_pos = 0;
     431        int pos;
     432
     433        for (blk = 0; blk < blk_count; blk++) {
     434                ScanBlock s = stream[blk];
     435                while(s) {
     436                        pos = (cfzl(s) + block_pos);
     437                        int code = (ProcessPos(pos));
     438                        if (code) {
     439                                *error_pos_in_block = pos;
     440                                return code; // error code
     441                        }
     442                        s = s & (s-1);  // clear rightmost bit.
     443                }
     444                block_pos += 8 * sizeof(ScanBlock);
     445        }
     446        return 0;
     447}
     448
     449static inline void validate_block(BitBlockForwardIterator & start, int block_base, int is_valid(int)) {
     450
     451    BitBlockForwardIterator end;
     452    int pos, block_pos;
     453
     454    while(start != end) {
     455
     456        block_pos = block_base + *start;
     457        int rv = is_valid(block_pos);
     458
     459        if (rv) {
     460                int error_line, error_column;
     461                tracker.get_Line_and_Column(block_pos, error_line, error_column);
     462                ReportError(XMLTestSuiteError::get_msg(rv), error_line, error_column);
     463                exit(-1);
     464        }
     465        start++;
     466    }
     467}
     468
     469static inline void validate_block(BitBlockForwardIterator & start, int block_base, int buffer_base, int is_valid(int,int)) {
     470
     471        BitBlockForwardIterator end;
     472        int pos, block_pos, file_pos;
     473
     474        while(start != end) {
     475
     476                block_pos = block_base + *start;
     477                file_pos = block_pos+buffer_base;
     478
     479
     480                int rv = is_valid(block_pos, file_pos);
     481
     482                if (rv) {
     483                        int error_line, error_column;
     484                        tracker.get_Line_and_Column(block_pos, error_line, error_column);
     485                        ReportError(XMLTestSuiteError::get_msg(rv), error_line, error_column);
     486                        exit(-1);
     487                }
     488                start++;
     489        }
     490}
     491
    340492static inline void postprocess_do_block(Lex & lex, CtCDPI_Callouts & ctCDPI_Callouts, Ref_Callouts & ref_Callouts, Check_streams & check_streams, Tag_Callouts & tag_Callouts, Hash_data & hash_data, int chars_avail){
    341 
    342     tracker.StoreNewlines(lex.LF);
     493            tracker.StoreNewlines(lex.LF);
     494
     495            elem_starts = tag_Callouts.ElemName_starts;
     496            hashvalues[1] = hash_data.Hash_value;
     497
     498            if ( bitblock_has_bit(tag_Callouts.ElemName_ends_1) )
     499            {
     500                StreamScanLengthGrouping<1>((ScanBlock *) &tag_Callouts.ElemName_ends_1, sizeof(BitBlock)/sizeof(ScanBlock));
     501            }
     502
     503            if ( bitblock_has_bit(tag_Callouts.ElemName_ends_2) )
     504            {
     505                StreamScanLengthGrouping<2>((ScanBlock *) &tag_Callouts.ElemName_ends_2, sizeof(BitBlock)/sizeof(ScanBlock));
     506            }
     507
     508            if ( bitblock_has_bit(tag_Callouts.ElemName_ends_3) )
     509            {
     510                StreamScanLengthGrouping<3>((ScanBlock *) &tag_Callouts.ElemName_ends_3, sizeof(BitBlock)/sizeof(ScanBlock));
     511            }
     512
     513            if ( bitblock_has_bit(tag_Callouts.ElemName_ends_4) )
     514            {
     515                StreamScanLengthGrouping<4>((ScanBlock *) &tag_Callouts.ElemName_ends_4, sizeof(BitBlock)/sizeof(ScanBlock));
     516            }
     517
     518            if ( bitblock_has_bit(tag_Callouts.ElemName_ends_5) )
     519            {
     520                StreamScanLengthGrouping<5>((ScanBlock *) &tag_Callouts.ElemName_ends_5, sizeof(BitBlock)/sizeof(ScanBlock));
     521            }
     522
     523            if ( bitblock_has_bit(tag_Callouts.ElemName_ends_6) )
     524            {
     525                StreamScanLengthGrouping<6>((ScanBlock *) &tag_Callouts.ElemName_ends_6, sizeof(BitBlock)/sizeof(ScanBlock));
     526            }
     527
     528            if ( bitblock_has_bit(tag_Callouts.ElemName_ends_7) )
     529            {
     530                StreamScanLengthGrouping<7>((ScanBlock *) &tag_Callouts.ElemName_ends_7, sizeof(BitBlock)/sizeof(ScanBlock));
     531            }
     532
     533            if ( bitblock_has_bit(tag_Callouts.ElemName_ends_8) )
     534            {
     535                StreamScanLengthGrouping<8>((ScanBlock *) &tag_Callouts.ElemName_ends_8, sizeof(BitBlock)/sizeof(ScanBlock));
     536            }
     537
     538            if ( bitblock_has_bit(tag_Callouts.ElemName_ends_9) )
     539            {
     540                StreamScanLengthGrouping<9>((ScanBlock *) &tag_Callouts.ElemName_ends_9, sizeof(BitBlock)/sizeof(ScanBlock));
     541            }
     542
     543            if ( bitblock_has_bit(tag_Callouts.ElemName_ends_10) )
     544            {
     545                StreamScanLengthGrouping<10>((ScanBlock *) &tag_Callouts.ElemName_ends_10, sizeof(BitBlock)/sizeof(ScanBlock));
     546            }
     547
     548            if ( bitblock_has_bit(tag_Callouts.ElemName_ends_11) )
     549            {
     550                StreamScanLengthGrouping<11>((ScanBlock *) &tag_Callouts.ElemName_ends_11, sizeof(BitBlock)/sizeof(ScanBlock));
     551            }
     552
     553            if ( bitblock_has_bit(tag_Callouts.ElemName_ends_12) )
     554            {
     555                StreamScanLengthGrouping<12>((ScanBlock *) &tag_Callouts.ElemName_ends_12, sizeof(BitBlock)/sizeof(ScanBlock));
     556            }
     557
     558            if ( bitblock_has_bit(tag_Callouts.ElemName_ends_13) )
     559            {
     560                StreamScanLengthGrouping<13>((ScanBlock *) &tag_Callouts.ElemName_ends_13, sizeof(BitBlock)/sizeof(ScanBlock));
     561            }
     562
     563            if ( bitblock_has_bit(tag_Callouts.ElemName_ends_14) )
     564            {
     565                StreamScanLengthGrouping<14>((ScanBlock *) &tag_Callouts.ElemName_ends_14, sizeof(BitBlock)/sizeof(ScanBlock));
     566            }
     567
     568            if ( bitblock_has_bit(tag_Callouts.ElemName_ends_15) )
     569            {
     570                StreamScanLengthGrouping<15>((ScanBlock *) &tag_Callouts.ElemName_ends_15, sizeof(BitBlock)/sizeof(ScanBlock));
     571            }
     572
     573            if ( bitblock_has_bit(tag_Callouts.ElemName_ends_16) )
     574            {
     575                StreamScanLengthGrouping<16>((ScanBlock *) &tag_Callouts.ElemName_ends_16, sizeof(BitBlock)/sizeof(ScanBlock));
     576            }
     577
     578            if ( bitblock_has_bit(tag_Callouts.ElemName_ends_17_and_longer) )
     579            {
     580                StreamScanLengthGrouping<17>((ScanBlock *) &tag_Callouts.ElemName_ends_17_and_longer, sizeof(BitBlock)/sizeof(ScanBlock));
     581            }
     582
     583            // Store the last starting position in case we hit boundary case
     584            previous_block_last_elem_start = - count_reverse_zeroes (elem_starts);
     585
     586            //copy current hash value data as previous one.
     587            memmove (&hashvalues[0], &hashvalues[1], 16);
     588
     589#ifdef STREAMSCAN
    343590    int rv, error_pos_in_block, error_line, error_column;
    344     elem_starts = tag_Callouts.ElemName_starts;
    345     hashvalues[1] = hash_data.Hash_value;
    346 
    347     if ( bitblock_has_bit(tag_Callouts.ElemName_ends_1) )
    348     {
    349         StreamScanLengthGrouping<1>((ScanBlock *) &tag_Callouts.ElemName_ends_1, sizeof(BitBlock)/sizeof(ScanBlock));
    350     }
    351 
    352     if ( bitblock_has_bit(tag_Callouts.ElemName_ends_2) )
    353     {
    354         StreamScanLengthGrouping<2>((ScanBlock *) &tag_Callouts.ElemName_ends_2, sizeof(BitBlock)/sizeof(ScanBlock));
    355     }
    356 
    357     if ( bitblock_has_bit(tag_Callouts.ElemName_ends_3) )
    358     {
    359         StreamScanLengthGrouping<3>((ScanBlock *) &tag_Callouts.ElemName_ends_3, sizeof(BitBlock)/sizeof(ScanBlock));
    360     }
    361 
    362     if ( bitblock_has_bit(tag_Callouts.ElemName_ends_4) )
    363     {
    364         StreamScanLengthGrouping<4>((ScanBlock *) &tag_Callouts.ElemName_ends_4, sizeof(BitBlock)/sizeof(ScanBlock));
    365     }
    366 
    367     if ( bitblock_has_bit(tag_Callouts.ElemName_ends_5) )
    368     {
    369         StreamScanLengthGrouping<5>((ScanBlock *) &tag_Callouts.ElemName_ends_5, sizeof(BitBlock)/sizeof(ScanBlock));
    370     }
    371 
    372     if ( bitblock_has_bit(tag_Callouts.ElemName_ends_6) )
    373     {
    374         StreamScanLengthGrouping<6>((ScanBlock *) &tag_Callouts.ElemName_ends_6, sizeof(BitBlock)/sizeof(ScanBlock));
    375     }
    376 
    377     if ( bitblock_has_bit(tag_Callouts.ElemName_ends_7) )
    378     {
    379         StreamScanLengthGrouping<7>((ScanBlock *) &tag_Callouts.ElemName_ends_7, sizeof(BitBlock)/sizeof(ScanBlock));
    380     }
    381 
    382     if ( bitblock_has_bit(tag_Callouts.ElemName_ends_8) )
    383     {
    384         StreamScanLengthGrouping<8>((ScanBlock *) &tag_Callouts.ElemName_ends_8, sizeof(BitBlock)/sizeof(ScanBlock));
    385     }
    386 
    387     if ( bitblock_has_bit(tag_Callouts.ElemName_ends_9) )
    388     {
    389         StreamScanLengthGrouping<9>((ScanBlock *) &tag_Callouts.ElemName_ends_9, sizeof(BitBlock)/sizeof(ScanBlock));
    390     }
    391 
    392     if ( bitblock_has_bit(tag_Callouts.ElemName_ends_10) )
    393     {
    394         StreamScanLengthGrouping<10>((ScanBlock *) &tag_Callouts.ElemName_ends_10, sizeof(BitBlock)/sizeof(ScanBlock));
    395     }
    396 
    397     if ( bitblock_has_bit(tag_Callouts.ElemName_ends_11) )
    398     {
    399         StreamScanLengthGrouping<11>((ScanBlock *) &tag_Callouts.ElemName_ends_11, sizeof(BitBlock)/sizeof(ScanBlock));
    400     }
    401 
    402     if ( bitblock_has_bit(tag_Callouts.ElemName_ends_12) )
    403     {
    404         StreamScanLengthGrouping<12>((ScanBlock *) &tag_Callouts.ElemName_ends_12, sizeof(BitBlock)/sizeof(ScanBlock));
    405     }
    406 
    407     if ( bitblock_has_bit(tag_Callouts.ElemName_ends_13) )
    408     {
    409         StreamScanLengthGrouping<13>((ScanBlock *) &tag_Callouts.ElemName_ends_13, sizeof(BitBlock)/sizeof(ScanBlock));
    410     }
    411 
    412     if ( bitblock_has_bit(tag_Callouts.ElemName_ends_14) )
    413     {
    414         StreamScanLengthGrouping<14>((ScanBlock *) &tag_Callouts.ElemName_ends_14, sizeof(BitBlock)/sizeof(ScanBlock));
    415     }
    416 
    417     if ( bitblock_has_bit(tag_Callouts.ElemName_ends_15) )
    418     {
    419         StreamScanLengthGrouping<15>((ScanBlock *) &tag_Callouts.ElemName_ends_15, sizeof(BitBlock)/sizeof(ScanBlock));
    420     }
    421 
    422     if ( bitblock_has_bit(tag_Callouts.ElemName_ends_16) )
    423     {
    424         StreamScanLengthGrouping<16>((ScanBlock *) &tag_Callouts.ElemName_ends_16, sizeof(BitBlock)/sizeof(ScanBlock));
    425     }
    426 
    427     if ( bitblock_has_bit(tag_Callouts.ElemName_ends_17_and_longer) )
    428     {
    429         StreamScanLengthGrouping<17>((ScanBlock *) &tag_Callouts.ElemName_ends_17_and_longer, sizeof(BitBlock)/sizeof(ScanBlock));
    430     }
    431 
    432     // Store the last starting position in case we hit boundary case
    433     previous_block_last_elem_start = - count_reverse_zeroes (elem_starts);
    434 
    435     //copy current hash value data as previous one.
    436     memmove (&hashvalues[0], &hashvalues[1], 16);
    437591
    438592    if (bitblock_has_bit(simd_or(check_streams.non_ascii_name_starts, check_streams.non_ascii_names))) {
     
    506660    }
    507661
     662
     663
    508664    if(error_tracker.Has_Noted_Error()){
    509665            tracker.get_Line_and_Column(error_tracker.Noted_Pos_In_Block(), error_line, error_column);
     
    514670    matcher.store_streams(check_streams.tag_marks, check_streams.name_follows, check_streams.misc_mask, chars_avail);
    515671    tracker.AdvanceBlock();
     672#else
     673            if (bitblock_has_bit(simd_or(check_streams.non_ascii_name_starts, check_streams.non_ascii_names))) {
     674                BitBlockForwardIterator iter_NameStrt_check(&check_streams.non_ascii_name_starts);
     675                validate_block(iter_NameStrt_check, block_base, NameStrt_check);
     676                BitBlockForwardIterator iter_Name_check(&check_streams.non_ascii_names);
     677                validate_block(iter_Name_check, block_base, Name_check);
     678            }
     679            if (bitblock_has_bit(ctCDPI_Callouts.PI_name_starts)){
     680                BitBlockForwardIterator iter_PI_name_starts(&(ctCDPI_Callouts.PI_name_starts));
     681                validate_block(iter_PI_name_starts, block_base, buffer_base, PIName_check);
     682            }
     683            if (bitblock_has_bit(ctCDPI_Callouts.CD_starts)){
     684                BitBlockForwardIterator iter_CD_check(&ctCDPI_Callouts.CD_starts);
     685                validate_block(iter_CD_check, block_base, CD_check);
     686            }
     687            if(bitblock_has_bit(ref_Callouts.GenRef_starts)){
     688                BitBlockForwardIterator iter_GenRef_check(&ref_Callouts.GenRef_starts);
     689                validate_block(iter_GenRef_check, block_base, GenRef_check);
     690            }
     691            if(bitblock_has_bit(ref_Callouts.DecRef_starts)){
     692                BitBlockForwardIterator iter_DecRef_check(&ref_Callouts.DecRef_starts);
     693                validate_block(iter_DecRef_check, block_base, DecRef_check);
     694            }
     695            if(bitblock_has_bit(ref_Callouts.HexRef_starts)){
     696                BitBlockForwardIterator iter_HexRef_check(&ref_Callouts.HexRef_starts);
     697                validate_block(iter_HexRef_check, block_base, HexRef_check);
     698            }
     699            if(bitblock_has_bit(check_streams.att_refs)){
     700                BitBlockForwardIterator iter_AttRef_check(&check_streams.att_refs);
     701                validate_block(iter_AttRef_check, block_base, AttRef_check);
     702            }
     703
     704            if(error_tracker.Has_Noted_Error()){
     705                    int error_line, error_column;
     706                    tracker.get_Line_and_Column(error_tracker.Noted_Pos_In_Block(), error_line, error_column);
     707                    ReportError(error_tracker.Noted_Error_Msg(), error_line, error_column);
     708                    exit(-1);
     709            }
     710
     711            matcher.store_streams(check_streams.tag_marks, check_streams.name_follows, check_streams.misc_mask, chars_avail);
     712            tracker.AdvanceBlock();
     713
     714
     715#endif
     716
    516717}
    517718
     
    535736  int buf_pos = 0;
    536737  int block_pos = 0;
    537   int errpos = 0;
    538738  int chars_avail = 0;
    539739  int check_pos = 0;
    540740  int chars_read = 0;
    541   BytePack buf[(BUFFER_SIZE+BLOCK_SIZE+OVERLAP_BUFSIZE*2)/sizeof(SIMD_type)];
     741  BytePack buf[(BUFFER_SIZE+BLOCK_SIZE+OVERLAP_BUFSIZE*2)/sizeof(BitBlock)];
    542742
    543743  char * srcbuf = ((char *) buf) + OVERLAP_BUFSIZE;
     
    654854#endif
    655855}
     856
     857
Note: See TracChangeset for help on using the changeset viewer.