Changeset 2160

Show
Ignore:
Timestamp:
05/24/12 20:12:42 (13 months ago)
Author:
ksherdy
Message:

Refactored pablo template.

Location:
proto/parabix2
Files:
2 modified

Legend:

Unmodified
Added
Removed
  • proto/parabix2/pablo_template.cpp

    r2155 r2160  
    1313#include "../lib/bitblock_iterator.hpp" 
    1414#include "../lib/s2p.hpp" 
    15  
    16 #define SEGMENT_BLOCKS 12 
    17 #define BUFFER_SIZE (BLOCK_SIZE * SEGMENT_BLOCKS) 
    18 #define OVERLAP_BUFSIZE (sizeof(BitBlock)) 
     15#include "../lib/perflib/perfsec.h" 
    1916 
    2017#include "xmldecl.h" 
    2118#include "namechars.h" 
    22 #include "../lib/perflib/perfsec.h" 
    23  
    2419#include "TagMatcher.hpp" 
    2520#include "LineColTracker.hpp" 
     
    2924 
    3025#ifdef BUFFER_PROFILING 
    31         BOM_Table * parser_timer; 
    32  
     26    BOM_Table * parser_timer; 
    3327#elif CODE_CLOCKER 
    34         #define NUM_EVENTS 1 
    35         int Events[NUM_EVENTS] = {PAPI_TOT_CYC}; 
    36         //int Events[NUM_EVENTS] = {PAPI_L2_DCM}; 
    37         //int Events[NUM_EVENTS] = {PAPI_TOT_CYC, PAPI_BR_MSP}; 
    38         int cal_size = 20; 
    39         CC * parser_timer = new CC(Events,NUM_EVENTS,cal_size); 
     28    //#define NUM_EVENTS 1 
     29    //int Events[NUM_EVENTS] = {PAPI_TOT_CYC}; 
     30    //int Events[NUM_EVENTS] = {PAPI_L2_DCM}; 
     31    #define NUM_EVENTS 2 
     32    int Events[NUM_EVENTS] = {PAPI_TOT_CYC, PAPI_BR_MSP}; 
     33    int cal_size = 20; 
     34    CC * parser_timer = new CC(Events,NUM_EVENTS,cal_size); 
    4035#else 
    41         void * parser_timer; 
     36    void * parser_timer; 
    4237#endif 
    4338 
    44 int block_base=0; 
    45 int buffer_base=0; 
    46 char * source; 
    47  
    48 LineColTracker tracker; 
    49 TagMatcher matcher; 
    5039ErrorTracker error_tracker; 
    5140BitBlock EOF_mask = simd<1>::constant<1>(); 
    5241 
    53 static inline int NameStrt_check(int pos); 
    54 static inline int Name_check(int pos); 
    55 static inline int PIName_check(int pos); 
    56 static inline int CD_check(int pos); 
    57 static inline int GenRef_check(int pos); 
    58 static inline int HexRef_check(int pos); 
    59 static inline int DecRef_check(int pos); 
    60 static inline int AttRef_check(int pos); 
    61  
     42////////////////////////////////////////////////////////////////////////////////////////// 
     43// Buffer Management 
     44////////////////////////////////////////////////////////////////////////////////////////// 
     45#include "../lib/buffer.hpp" 
     46 
     47#define OVERLAP_BUFSIZE PADDING_SIZE //sizeof(BitBlock) 
     48 
     49////////////////////////////////////////////////////////////////////////////////////////// 
     50// @ global depends on 'error_tracker' and 'EOF_mask' definitions. 
     51////////////////////////////////////////////////////////////////////////////////////////// 
    6252@global 
    6353 
    64 static inline void s2p_do_block(BytePack U8[], Basis_bits & basis_bits); 
    65 static inline void s2p_do_final_block(BytePack U8[], Basis_bits & basis_bits, BitBlock EOF_mask); 
    66 static inline void postprocess_do_block(Lex & lex, CtCDPI_Callouts & ctCDPI_Callouts, Ref_Callouts & ref_Callouts, Check_streams & check_streams, int chars_avail); 
    67  
    68 void do_process(FILE *infile, FILE *outfile); 
    69  
    70 static inline void validate_block(BitBlockForwardIterator & start, int block_base, int is_valid(int)); 
    71 static inline void validate_block(BitBlockForwardIterator & start, int block_base, int is_valid(int,int)); 
     54////////////////////////////////////////////////////////////////////////////////////////// 
     55// Headers that depend @ global stream struct types. 
     56////////////////////////////////////////////////////////////////////////////////////////// 
     57#include "../lib/transpose.hpp" 
     58#include "post_process.hpp" 
     59 
     60static void do_process(FILE *infile, FILE *outfile); 
    7261 
    7362int main(int argc, char * argv[]) { 
     
    10089        } 
    10190 
    102 //      PERF_SEC_BIND(1); 
     91        PERF_SEC_BIND(1); 
    10392 
    10493        PERF_SEC_INIT(parser_timer); 
     
    116105} 
    117106 
    118 /* s2p Definitions */ 
    119 static inline void s2p_do_block(BytePack U8[], Basis_bits & basis_bits) { 
    120   s2p(U8[0], U8[1], U8[2], U8[3], U8[4], U8[5], U8[6], U8[7], 
    121         basis_bits.bit_0, basis_bits.bit_1, basis_bits.bit_2, basis_bits.bit_3, basis_bits.bit_4, basis_bits.bit_5, basis_bits.bit_6, basis_bits.bit_7); 
    122 } 
    123  
    124 static inline void s2p_do_final_block(BytePack U8[], Basis_bits & basis_bits, BitBlock EOF_mask) { 
    125   s2p_do_block(U8, basis_bits); 
    126   basis_bits.bit_0 = simd_and(basis_bits.bit_0, EOF_mask); 
    127   basis_bits.bit_1 = simd_and(basis_bits.bit_1, EOF_mask); 
    128   basis_bits.bit_2 = simd_and(basis_bits.bit_2, EOF_mask); 
    129   basis_bits.bit_3 = simd_and(basis_bits.bit_3, EOF_mask); 
    130   basis_bits.bit_4 = simd_and(basis_bits.bit_4, EOF_mask); 
    131   basis_bits.bit_5 = simd_and(basis_bits.bit_5, EOF_mask); 
    132   basis_bits.bit_6 = simd_and(basis_bits.bit_6, EOF_mask); 
    133   basis_bits.bit_7 = simd_and(basis_bits.bit_7, EOF_mask); 
    134 } 
    135  
    136  
    137 static inline int NameStrt_check(int pos) { 
    138         if(XML_10_UTF8_NameStrt_bytes((unsigned char*)&source[pos]) == 0){ 
    139               return XMLTestSuiteError::NAME_START; 
    140         } 
    141         return 0; 
    142 } 
    143  
    144 static inline int Name_check(int pos) { 
    145         if(XML_10_UTF8_NameChar_bytes((unsigned char*)&source[pos]) == 0){ 
    146                   return XMLTestSuiteError::NAME; 
    147         } 
    148         return 0; 
    149 } 
    150  
    151 static inline int PIName_check(int pos, int file_pos) { 
    152         if (at_XxMmLll<ASCII>((unsigned char*)&source[pos]) && (source[pos+3]=='?' || source[pos+3]<= ' ')) { 
    153               // "<?xml" legal at start of file. 
    154               if (!((file_pos == 2) && at_XmlDecl_start<ASCII>((unsigned char*)&source[0]))) { 
    155                   return XMLTestSuiteError::XMLPINAME; 
    156               } 
    157         } 
    158         return 0; 
    159 } 
    160  
    161 static inline int CD_check(int pos) { 
    162         if (!at_CDATA1<ASCII>((unsigned char*)&source[pos])){ 
    163                   return XMLTestSuiteError::CDATA; 
    164         } 
    165         return 0; 
    166 } 
    167  
    168 static inline int GenRef_check(int pos) { 
    169         unsigned char* s = (unsigned char*)&source[pos]; 
    170         if (!(at_Ref_gt<ASCII>(s)||at_Ref_lt<ASCII>(s)||at_Ref_amp<ASCII>(s)||at_Ref_quot<ASCII>(s)||at_Ref_apos<ASCII>(s))){ 
    171               return XMLTestSuiteError::UNDEFREF; 
    172         } 
    173         return 0; 
    174 } 
    175  
    176 static inline int HexRef_check(int pos) { 
    177         unsigned char* s = (unsigned char*)&source[pos]; 
    178         int ch_val = 0; 
    179         while(at_HexDigit<ASCII>(s)){ 
    180           ch_val = HexVal<ASCII>(s[0]) + (ch_val<<4); 
    181           if (ch_val> 0x10FFFF ){ 
    182                 return XMLTestSuiteError::CHARREF; 
    183           } 
    184           s++; 
    185         } 
    186         if ((ch_val == 0x0) || ((ch_val | 0x7FF) == 0xDFFF)|| ((ch_val | 0x1) == 0xFFFF)){ 
    187           return XMLTestSuiteError::CHARREF; 
    188         } 
    189         else if (((ch_val < 0x20) && (ch_val != 0x9) && (ch_val != 0xD) && (ch_val != 0xA))){ 
    190           return XMLTestSuiteError::XML10CHARREF; 
    191         } 
    192         return 0; 
    193 } 
    194  
    195 static inline int DecRef_check(int pos) { 
    196         unsigned char* s = (unsigned char*)&source[pos]; 
    197         int ch_val = 0; 
    198         while(at_HexDigit<ASCII>(s)){ 
    199           ch_val = DigitVal<ASCII>(s[0]) + ch_val*10; 
    200           if (ch_val> 0x10FFFF ){ 
    201                         return XMLTestSuiteError::CHARREF; 
    202           } 
    203           s++; 
    204         } 
    205         if ((ch_val == 0x0) || ((ch_val | 0x7FF) == 0xDFFF)|| ((ch_val | 0x1) == 0xFFFF)){ 
    206                   return XMLTestSuiteError::CHARREF; 
    207         } 
    208         else if (((ch_val < 0x20) && (ch_val != 0x9) && (ch_val != 0xD) && (ch_val != 0xA))){ 
    209                   return XMLTestSuiteError::XML10CHARREF; 
    210         } 
    211         return 0; 
    212 } 
    213  
    214 static inline int AttRef_check(int pos) { 
    215         unsigned char* s = (unsigned char*)&source[pos]; 
    216         int ch_val = 0; 
    217         if(s[0]=='#'){ 
    218           s++; 
    219           if(s[0]=='x' || s[0]=='X'){ 
    220             s++; 
    221             while(at_HexDigit<ASCII>(s)){ 
    222               ch_val = HexVal<ASCII>(s[0]) + (ch_val<<4); 
    223               s++; 
    224             } 
    225           } 
    226           else{ 
    227             while(at_HexDigit<ASCII>(s)){ 
    228               ch_val = DigitVal<ASCII>(s[0]) + ch_val*10; 
    229               s++; 
    230             } 
    231           } 
    232           if (ch_val==60){ 
    233             return XMLTestSuiteError::ATTREF; 
    234           } 
    235         } 
    236         else if(at_Ref_lt<ASCII>(s)){ 
    237           return XMLTestSuiteError::ATTREF; 
    238         } 
    239         return 0; 
    240 } 
    241  
    242 static inline void validate_block(BitBlockForwardIterator & start, int block_base, int is_valid(int)) { 
    243  
    244         int pos, block_pos; 
    245         BitBlockForwardIterator end; 
    246         while(start != end) { 
    247  
    248                 block_pos = block_base + *start; 
    249                 int rv = is_valid(block_pos); 
    250  
    251                 if (rv) { 
    252                         int error_line, error_column; 
    253                         tracker.get_Line_and_Column(block_pos, error_line, error_column); 
    254                         ReportError(XMLTestSuiteError::get_msg(rv), error_line, error_column); 
    255                         exit(-1); 
    256                 } 
    257                 start++; 
    258         } 
    259 } 
    260  
    261 static inline void validate_block(BitBlockForwardIterator & start, int block_base, int buffer_base, int is_valid(int,int)) { 
    262  
    263         int pos, block_pos, file_pos; 
    264         BitBlockForwardIterator end; 
    265         while(start != end) { 
    266  
    267                 block_pos = block_base + *start; 
    268                 file_pos = block_pos+buffer_base; 
    269  
    270  
    271                 int rv = is_valid(block_pos, file_pos); 
    272  
    273                 if (rv) { 
    274                         int error_line, error_column; 
    275                         tracker.get_Line_and_Column(block_pos, error_line, error_column); 
    276                         ReportError(XMLTestSuiteError::get_msg(rv), error_line, error_column); 
    277                         exit(-1); 
    278                 } 
    279                 start++; 
    280         } 
    281 } 
    282  
    283 static inline void postprocess_do_block(Lex & lex, CtCDPI_Callouts & ctCDPI_Callouts, Ref_Callouts & ref_Callouts, Check_streams & check_streams, int chars_avail){ 
    284 BitBlockForwardIterator iter; 
    285  
    286 tracker.StoreNewlines(lex.LF); 
    287  
    288 if (bitblock::any(simd_or(check_streams.non_ascii_name_starts, check_streams.non_ascii_names))) { 
    289         iter.init(&check_streams.non_ascii_name_starts); 
    290         validate_block(iter, block_base, NameStrt_check); 
    291         iter.init(&check_streams.non_ascii_names); 
    292         validate_block(iter, block_base, Name_check); 
    293 } 
    294 if (bitblock::any(ctCDPI_Callouts.PI_name_starts)){ 
    295         iter.init(&(ctCDPI_Callouts.PI_name_starts)); 
    296         validate_block(iter, block_base, buffer_base, PIName_check); 
    297 } 
    298 if (bitblock::any(ctCDPI_Callouts.CD_starts)){ 
    299         iter.init(&ctCDPI_Callouts.CD_starts); 
    300         validate_block(iter, block_base, CD_check); 
    301 } 
    302 if(bitblock::any(ref_Callouts.GenRef_starts)){ 
    303         iter.init(&ref_Callouts.GenRef_starts); 
    304         validate_block(iter, block_base, GenRef_check); 
    305 } 
    306 if(bitblock::any(ref_Callouts.DecRef_starts)){ 
    307         iter.init(&ref_Callouts.DecRef_starts); 
    308         validate_block(iter, block_base, DecRef_check); 
    309 } 
    310 if(bitblock::any(ref_Callouts.HexRef_starts)){ 
    311         iter.init(&ref_Callouts.HexRef_starts); 
    312         validate_block(iter, block_base, HexRef_check); 
    313 } 
    314 if(bitblock::any(check_streams.att_refs)){ 
    315         iter.init(&check_streams.att_refs); 
    316         validate_block(iter, block_base, AttRef_check); 
    317 } 
    318  
    319 if(error_tracker.Has_Noted_Error()){ 
    320         int error_line, error_column; 
    321         tracker.get_Line_and_Column(error_tracker.Noted_Pos_In_Block(), error_line, error_column); 
    322         ReportError(error_tracker.Noted_Error_Msg(), error_line, error_column); 
    323         exit(-1); 
    324 } 
    325  
    326 matcher.store_streams(check_streams.tag_marks, check_streams.name_follows, check_streams.misc_mask, chars_avail); 
    327 tracker.AdvanceBlock(); 
    328  
    329 } 
    330  
    331107void do_process(FILE *infile, FILE *outfile) { 
    332108 
    333 @decl 
    334  
    335   int buf_pos = 0; 
    336   int block_pos = 0; 
    337   int chars_avail = 0; 
    338   int check_pos = 0; 
    339   int chars_read = 0; 
    340   BytePack buf[(BUFFER_SIZE+BLOCK_SIZE+OVERLAP_BUFSIZE*2)/sizeof(BitBlock)]; 
    341  
    342   char * srcbuf = ((char *) buf) + OVERLAP_BUFSIZE; 
    343   buffer_base = buf_pos; 
    344   source = srcbuf; 
    345  
    346   chars_read = fread((void *)srcbuf, 1, BUFFER_SIZE + OVERLAP_BUFSIZE, infile); 
    347   chars_avail = chars_read; 
    348   if (chars_avail > BUFFER_SIZE) chars_avail = BUFFER_SIZE; 
    349  
    350   matcher.setSrc(srcbuf); 
    351  
    352   if(chars_read<4){ 
     109    @decl 
     110 
     111    LineColTracker tracker; 
     112    TagMatcher<SEGMENT_SIZE,OVERLAP_BUFSIZE> matcher; 
     113 
     114    uint8_t * src_buf; 
     115    int block_base=0; 
     116    int buffer_base=0; 
     117    int buffer_pos = 0; 
     118    int block_pos = 0; 
     119    int chars_avail = 0; 
     120    int check_pos = 0; 
     121    int chars_read = 0; 
     122 
     123    ////////////////////////////////////////////////////////////////////////////////////////// 
     124    // Buffer Management 
     125    ////////////////////////////////////////////////////////////////////////////////////////// 
     126    BitBlock buf[(PADDING_SIZE + SEGMENT_SIZE + PADDING_SIZE)/sizeof(BitBlock)]; 
     127    src_buf = (uint8_t *)buf + PADDING_SIZE; 
     128 
     129    //ALLOC_STATIC_ALIGNED_BYTE_BUFFER(src_buf, (PADDING_SIZE + SEGMENT_SIZE + PADDING_SIZE)); 
     130 
     131    buffer_base = buffer_pos; 
     132    chars_read = fread((void *)src_buf, 1, BUFFER_SIZE, infile); 
     133    chars_avail = chars_read; 
     134    if (chars_avail > SEGMENT_SIZE) chars_avail = SEGMENT_SIZE; 
     135 
     136    ////////////////////////////////////////////////////////////////////////////////////////// 
     137    // XML Validation / Content Model 
     138    ////////////////////////////////////////////////////////////////////////////////////////// 
     139    if(chars_read<4){ 
    353140    fprintf(stderr,"File is too short. Not well formed.\n"); 
    354141    exit(-1); 
    355   } 
    356  
    357   Entity_Info * e = new Entity_Info; 
    358   e->AnalyzeSignature((unsigned char *)srcbuf); 
    359  
    360   if (e->code_unit_base == ASCII) { 
    361  
    362     XML_Decl_Parser<ASCII> decl_parser((unsigned char *)srcbuf); 
     142    } 
     143 
     144    Entity_Info * e = new Entity_Info; 
     145    e->AnalyzeSignature((unsigned char *)src_buf); 
     146 
     147    if (e->code_unit_base == ASCII) { 
     148 
     149    XML_Decl_Parser<ASCII> decl_parser((unsigned char *)src_buf); 
    363150 
    364151    decl_parser.ReadXMLInfo(*e); 
    365152 
    366153    if (e->code_unit_size != SingleByte || (e->has_encoding_decl && (!at_UTF_8(e->encoding)))){ 
    367         fprintf(stderr,"Sorry, this xmlwf demo only works for UTF-8.\n"); 
    368         exit(-1); 
    369     } 
    370   } 
    371   else { 
    372     fprintf(stderr,"Sorry, this xmlwf demo does not process EBCDIC.\n"); 
    373         exit(-1); 
    374   } 
    375  
    376   if (e->content_start != 0) { 
    377         memmove(&srcbuf[0], &srcbuf[e->content_start], chars_read - e->content_start); 
    378         buf_pos = e->content_start; 
    379         if (chars_avail == BUFFER_SIZE) { 
    380                 chars_read = chars_read - e->content_start + 
    381                              fread(&srcbuf[chars_read-e->content_start], 1, e->content_start, infile); 
    382                 chars_avail = chars_read; 
    383                 if (chars_avail > BUFFER_SIZE) chars_avail = BUFFER_SIZE; 
    384         } 
    385         else { 
    386           chars_read -=e->content_start; 
    387           chars_avail -=e->content_start; 
    388         } 
    389   } 
    390  
    391 @stream_stmts 
    392  
    393 /* Full Buffers */ 
    394  
    395     while (chars_avail == BUFFER_SIZE) { 
     154        fprintf(stderr,"Sorry, this xmlwf demo only works for UTF-8.\n"); 
     155        exit(-1); 
     156    } 
     157    } 
     158    else { 
     159        fprintf(stderr,"Sorry, this xmlwf demo does not process EBCDIC.\n"); 
     160        exit(-1); 
     161    } 
     162 
     163    if (e->content_start != 0) { 
     164    memmove(&src_buf[0], &src_buf[e->content_start], chars_read - e->content_start); 
     165    buffer_pos = e->content_start; 
     166    if (chars_avail == SEGMENT_SIZE) { 
     167        chars_read = chars_read - e->content_start + fread(&src_buf[chars_read-e->content_start], 1, e->content_start, infile); 
     168        chars_avail = chars_read; 
     169        if (chars_avail > SEGMENT_SIZE) chars_avail = SEGMENT_SIZE; 
     170    } 
     171    else { 
     172      chars_read -=e->content_start; 
     173      chars_avail -=e->content_start; 
     174    } 
     175    } 
     176 
     177    @stream_stmts 
     178 
     179 
     180    ////////////////////////////////////////////////////////////////////////////////////////// 
     181    // Full Segments 
     182    ////////////////////////////////////////////////////////////////////////////////////////// 
     183    matcher.setSrc((char *)src_buf); 
     184    while (chars_avail == SEGMENT_SIZE) { 
    396185      PERF_SEC_START(parser_timer); 
    397186      for (int blk = 0; blk < SEGMENT_BLOCKS; blk++) { 
    398187          block_base = blk*BLOCK_SIZE; 
    399           s2p_do_block((BytePack *) &srcbuf[block_base], basis_bits); 
     188          s2p_do_block((BytePack *) &src_buf[block_base], basis_bits); 
     189 
    400190          @block_stmts 
    401           postprocess_do_block(lex, ctCDPI_Callouts, ref_Callouts, check_streams, chars_avail); 
     191 
     192          tracker.StoreNewlines(lex.LF); 
     193          postprocess_do_block(lex, ctCDPI_Callouts, ref_Callouts, check_streams, (char *)src_buf, buffer_base, block_base, chars_avail, tracker); 
     194          matcher.store_streams(check_streams.tag_marks, check_streams.name_follows, check_streams.misc_mask, chars_avail); 
     195          tracker.AdvanceBlock(); 
    402196      } 
    403197      matcher.StreamScan(chars_avail); 
     
    406200 
    407201      int bytes_left = chars_read - chars_avail; 
    408       memmove(srcbuf, &srcbuf[BUFFER_SIZE], bytes_left); 
    409       chars_read = fread(&srcbuf[bytes_left],1, BUFFER_SIZE + OVERLAP_BUFSIZE - bytes_left, infile) + bytes_left; 
     202      memmove(src_buf, &src_buf[SEGMENT_SIZE], bytes_left); 
     203      chars_read = fread(&src_buf[bytes_left], 1, BUFFER_SIZE - bytes_left, infile) + bytes_left; 
    410204      chars_avail = chars_read; 
    411       if (chars_avail > BUFFER_SIZE) chars_avail = BUFFER_SIZE; 
    412       buf_pos += chars_avail; 
    413       buffer_base = buf_pos; 
    414     } 
    415 /* Final Partial Buffer */ 
     205      if (chars_avail > SEGMENT_SIZE) chars_avail = SEGMENT_SIZE; 
     206      buffer_pos += chars_avail; 
     207      buffer_base = buffer_pos; 
     208    } 
     209 
     210    ////////////////////////////////////////////////////////////////////////////////////////// 
     211    // Final Partial Segment 
     212    ////////////////////////////////////////////////////////////////////////////////////////// 
    416213    PERF_SEC_START(parser_timer); 
    417214 
    418215    block_pos = 0; 
    419216    int remaining = chars_avail; 
    420 /* Full Blocks */ 
     217 
     218    /* Full Blocks */ 
    421219    while (remaining >= BLOCK_SIZE) { 
    422           block_base = block_pos; 
    423           s2p_do_block((BytePack *) &srcbuf[block_pos], basis_bits); 
     220          block_base = block_pos; 
     221          s2p_do_block((BytePack *) &src_buf[block_pos], basis_bits); 
    424222          @block_stmts 
    425           postprocess_do_block(lex, ctCDPI_Callouts, ref_Callouts, check_streams, chars_avail); 
    426           block_pos += BLOCK_SIZE; 
     223          tracker.StoreNewlines(lex.LF); 
     224          postprocess_do_block(lex, ctCDPI_Callouts, ref_Callouts, check_streams, (char *)src_buf, buffer_base, block_base, chars_avail, tracker); 
     225          matcher.store_streams(check_streams.tag_marks, check_streams.name_follows, check_streams.misc_mask, chars_avail); 
     226          tracker.AdvanceBlock(); 
     227          block_pos += BLOCK_SIZE; 
    427228          remaining -= BLOCK_SIZE; 
    428229    } 
    429230    block_base = block_pos; 
     231 
     232    /* Partial Block or Any Carry */ 
    430233    if (remaining > 0 || @any_carry) { 
    431234          EOF_mask = bitblock::srl(simd<1>::constant<1>(), convert(BLOCK_SIZE-remaining)); 
    432           s2p_do_final_block((BytePack *) &srcbuf[block_pos], basis_bits, EOF_mask); 
     235          s2p_do_final_block((BytePack *) &src_buf[block_pos], basis_bits, EOF_mask); 
    433236          @final_block_stmts 
    434           postprocess_do_block(lex, ctCDPI_Callouts, ref_Callouts, check_streams, chars_avail); 
    435     } 
    436     buf_pos += chars_avail; 
    437     buffer_base = buf_pos; 
     237          tracker.StoreNewlines(lex.LF); 
     238          postprocess_do_block(lex, ctCDPI_Callouts, ref_Callouts, check_streams, (char *)src_buf, buffer_base, block_base, chars_avail, tracker); 
     239          matcher.store_streams(check_streams.tag_marks, check_streams.name_follows, check_streams.misc_mask, chars_avail); 
     240          tracker.AdvanceBlock(); 
     241 
     242    } 
     243 
     244    buffer_pos += chars_avail; 
     245    buffer_base = buffer_pos; 
    438246 
    439247    matcher.StreamScan(chars_avail); 
    440248    matcher.Advance_buffer(); 
    441  
    442249 
    443250    PERF_SEC_END(parser_timer, chars_avail); 
  • proto/parabix2/src/TagMatcher.hpp

    r2155 r2160  
    44#include "../lib/bitblock.hpp" 
    55 
    6 #define MAX_DEPTH 100 
    76#include <algorithm> 
    87#include <iostream> 
     
    109using namespace std; 
    1110 
    12  
    1311#define MAX_DEPTH 100 
    1412#define MAX_ATTS 100 
     
    1917}; 
    2018 
     19template <uint64_t BUF_SIZE, uint64_t OVER_SIZE> 
    2120class TagMatcher { 
    22   public: 
    23   BitBlock tagMarks[BUFFER_SIZE/BLOCK_SIZE]; 
    24   BitBlock miscMarks[BUFFER_SIZE/BLOCK_SIZE]; 
    25   char tags_buf[BUFFER_SIZE]; 
    26   int tags_buf_cur; 
    27   int stream_index; 
    28   char * srcbuf; 
    29   int depth; 
    30   int inTagPos; 
    31   int finalStartPos; 
    32   char* tag_stack[MAX_DEPTH]; 
    33   int tag_lgth_stack[MAX_DEPTH]; 
    34   BitBlock NameFollows[BUFFER_SIZE/BLOCK_SIZE+1]; // 1 extra block for sentinel 
    35   int buf_base; 
    36   enum TagMatchState {InStartTag, InEndTag, InAttName, Clear} state; 
    37   enum TagMatchMode {StartOfFile, InFile} mode; 
    38   struct attribute Attr[MAX_ATTS]; 
    39   struct attribute InAtt; 
    40   int att_index; 
    41   int InFinalEndTag; 
    42  
     21 
     22public: 
    4323  TagMatcher(); 
    4424  ~TagMatcher(); 
     
    5030  int does_match(char * s1, char * s2, int lgth); 
    5131  int lookup_or_insert(char*s, int lgth); 
     32 
     33    int depth; 
     34 
     35private: 
     36  BitBlock tagMarks[BUF_SIZE/BLOCK_SIZE]; 
     37  BitBlock miscMarks[BUF_SIZE/BLOCK_SIZE]; 
     38  char tags_buf[BUF_SIZE]; 
     39  int tags_buf_cur; 
     40  int stream_index; 
     41  char * srcbuf; 
     42 
     43  int inTagPos; 
     44  int finalStartPos; 
     45  char* tag_stack[MAX_DEPTH]; 
     46  int tag_lgth_stack[MAX_DEPTH]; 
     47  BitBlock NameFollows[BUF_SIZE/BLOCK_SIZE+1]; // 1 extra block for sentinel 
     48  int buf_base; 
     49  enum TagMatchState {InStartTag, InEndTag, InAttName, Clear} state; 
     50  enum TagMatchMode {StartOfFile, InFile} mode; 
     51  struct attribute Attr[MAX_ATTS]; 
     52  struct attribute InAtt; 
     53  int att_index; 
     54  int InFinalEndTag; 
     55 
    5256}; 
    5357 
    54 int TagMatcher::lookup_or_insert(char* s, int lgth){ 
    55   for(int i=0; i< att_index; i++) 
    56     if(lgth == Attr[i].lgth &&  does_match(s,Attr[i].start,lgth)) 
    57       return 1; 
    58  
    59   Attr[att_index].start = s; 
    60   Attr[att_index].lgth = lgth; 
    61   att_index++; 
    62   return 0; 
    63 } 
    64  
    65 int TagMatcher::does_match(char * s1, char * s2, int lgth){ 
     58template <uint64_t BUF_SIZE, uint64_t OVER_SIZE> 
     59int TagMatcher<BUF_SIZE, OVER_SIZE>::does_match(char * s1, char * s2, int lgth){ 
    6660    int matchlen = 0; 
    6761    int i=0; 
     
    6963      /* full 16 byte match */ 
    7064 
    71       if (bitblock::any(simd_xor(bitblock::load_unaligned((BitBlock*)&s1[i]),  
    72                                 bitblock::load_unaligned((BitBlock*)&s2[i])))) { 
    73                 return 0; 
     65      if (bitblock::any(simd_xor(bitblock::load_unaligned((BitBlock*)&s1[i]), 
     66                bitblock::load_unaligned((BitBlock*)&s2[i])))) { 
     67        return 0; 
    7468      } 
    7569      else { 
    76                 lgth -= sizeof(BitBlock); 
    77                 i +=sizeof(BitBlock); 
    78       } 
    79     } 
    80  
    81         scanword_t temp = ~hsimd<8>::signmask(simd<8>::eq(bitblock::load_unaligned((BitBlock*)&s1[i]), 
     70        lgth -= sizeof(BitBlock); 
     71        i +=sizeof(BitBlock); 
     72      } 
     73    } 
     74 
     75    scanword_t temp = ~hsimd<8>::signmask(simd<8>::eq(bitblock::load_unaligned((BitBlock*)&s1[i]), 
    8276                                                  bitblock::load_unaligned((BitBlock*)&s2[i]))); 
    8377 
    84         return lgth <= scan_forward_zeroes(temp); 
    85 } 
    86  
    87  
    88 int TagMatcher:: tag_match(int pos, int chars_avail) { 
     78    return lgth <= scan_forward_zeroes(temp); 
     79} 
     80 
     81 
     82template <uint64_t BUF_SIZE, uint64_t OVER_SIZE> 
     83int TagMatcher<BUF_SIZE, OVER_SIZE>::lookup_or_insert(char* s, int lgth){ 
     84  for(int i=0; i< this->att_index; i++) 
     85    if(lgth == this->Attr[i].lgth &&  this->does_match(s,this->Attr[i].start,lgth)) 
     86      return 1; 
     87 
     88  this->Attr[att_index].start = s; 
     89  this->Attr[att_index].lgth = lgth; 
     90  this->att_index++; 
     91  return 0; 
     92} 
     93 
     94 
     95template <uint64_t BUF_SIZE, uint64_t OVER_SIZE> 
     96int TagMatcher<BUF_SIZE, OVER_SIZE>:: tag_match(int pos, int chars_avail) { 
    8997        int rt_val=0; 
    9098//      end tag 
    91         if(srcbuf[pos]=='/' ){ 
     99    if(this->srcbuf[pos]=='/' ){ 
    92100          pos++; 
    93           depth--; 
    94           if (depth<0) 
     101      this->depth--; 
     102      if (this->depth<0) 
    95103            return pos; 
    96           int lgth = tag_lgth_stack[depth]; 
    97  
    98           if (does_match(tag_stack[depth],&srcbuf[pos],lgth) && ((srcbuf[pos+lgth] == '>') ||(srcbuf[pos+lgth] <= ' '))) rt_val=0; 
    99           else if (pos + lgth >= BUFFER_SIZE + OVERLAP_BUFSIZE) { 
    100             state = InEndTag; 
    101             inTagPos = BUFFER_SIZE - pos; 
     104      int lgth = this->tag_lgth_stack[depth]; 
     105 
     106      if (does_match(this->tag_stack[depth],&this->srcbuf[pos],lgth) && ((this->srcbuf[pos+lgth] == '>') ||(this->srcbuf[pos+lgth] <= ' '))) rt_val=0; 
     107      else if (pos + lgth >= BUF_SIZE + OVER_SIZE) { 
     108        this->state = InEndTag; 
     109        this-> inTagPos = BUF_SIZE - pos; 
    102110            rt_val=0; 
    103111          } 
     
    109117          } 
    110118 
    111           if (depth == 0){ 
    112             while(srcbuf[pos]!='>'){ 
     119      if (this->depth == 0){ 
     120        while(this->srcbuf[pos]!='>'){ 
    113121              pos++; 
    114122              if(pos>=chars_avail){ 
    115                 InFinalEndTag = 1; 
     123        this->InFinalEndTag = 1; 
    116124                return 0; 
    117125              } 
    118126            } 
    119             pos = bitstream_scan(miscMarks,pos+1); 
     127        pos = bitstream_scan(this->miscMarks,pos+1); 
     128            if(pos!=chars_avail){ 
     129          fprintf(stderr,"illegal content after root element at position = %i\n",this->buf_base+pos); 
     130              exit(-1); 
     131            } 
     132          } 
     133          return rt_val; 
     134        } 
     135//      empty tag 
     136    else if(this->srcbuf[pos]=='>'){ 
     137      this->depth--; 
     138      if (this->depth == 0){ 
     139        while(this->srcbuf[pos]!='>') 
     140              pos++; 
     141        pos = bitstream_scan(this->miscMarks,pos+1); 
     142 
    120143            if(pos!=chars_avail){ 
    121144              fprintf(stderr,"illegal content after root element at position = %i\n",buf_base+pos); 
     
    123146            } 
    124147          } 
    125           return rt_val; 
    126         } 
    127 //      empty tag 
    128         else if(srcbuf[pos]=='>'){ 
    129           depth--; 
    130           if (depth == 0){ 
    131             while(srcbuf[pos]!='>') 
    132               pos++; 
    133             pos = bitstream_scan(miscMarks,pos+1); 
    134  
    135             if(pos!=chars_avail){ 
    136               fprintf(stderr,"illegal content after root element at position = %i\n",buf_base+pos); 
    137               exit(-1); 
    138             } 
    139           } 
    140148        } 
    141149//      start tag 
    142         else if(srcbuf[pos-1]=='<'){ 
    143           att_index = 0; 
    144           if(depth<MAX_DEPTH){ 
    145             int end_pos = bitstream_scan(NameFollows,pos); 
    146             tag_lgth_stack[depth] = end_pos-pos; 
    147             tag_stack[depth] = &srcbuf[pos]; 
    148             if(end_pos<BUFFER_SIZE){ 
    149               depth++; 
     150    else if(this->srcbuf[pos-1]=='<'){ 
     151      this->att_index = 0; 
     152      if(this->depth<MAX_DEPTH){ 
     153        int end_pos = bitstream_scan(this->NameFollows,pos); 
     154        this->tag_lgth_stack[this->depth] = end_pos-pos; 
     155        this->tag_stack[depth] = &this->srcbuf[pos]; 
     156        if(end_pos<BUF_SIZE){ 
     157         this->depth++; 
    150158            } 
    151159            else{ 
    152               state = InStartTag; 
    153               finalStartPos = pos; 
     160          this->state = InStartTag; 
     161          this->finalStartPos = pos; 
    154162            } 
    155163          } 
    156164          else{ 
    157             fprintf(stderr,"Max nesting depth exceeded at position =%i. depth = %i\n",buf_base+pos, depth); 
     165        fprintf(stderr,"Max nesting depth exceeded at position =%i. depth = %i\n",this->buf_base+pos, this->depth); 
    158166            exit(-1); 
    159167          } 
     
    161169//      attribute 
    162170        else{ 
    163           int end_pos = bitstream_scan(NameFollows,pos); 
    164           if(end_pos<BUFFER_SIZE){ 
    165             if(lookup_or_insert(&srcbuf[pos], end_pos-pos)){ 
    166               fprintf(stderr,"Attribute name is not unique at position =%i.\n",buf_base+pos); 
     171      int end_pos = bitstream_scan(this->NameFollows,pos); 
     172      if(end_pos<BUF_SIZE){ 
     173        if(lookup_or_insert(&this->srcbuf[pos], end_pos-pos)){ 
     174          fprintf(stderr,"Attribute name is not unique at position =%i.\n",this->buf_base+pos); 
    167175              exit(-1); 
    168176            } 
    169177          } 
    170178          else{ 
    171             state = InAttName; 
    172             InAtt.start = &srcbuf[pos]; 
    173             InAtt.lgth = BUFFER_SIZE-pos; 
     179        this->state = InAttName; 
     180        this->InAtt.start = &this->srcbuf[pos]; 
     181        this->InAtt.lgth = BUF_SIZE-pos; 
    174182          } 
    175183        } 
     
    177185} 
    178186 
    179  
    180 int TagMatcher::StreamScan(int chars_avail) { 
     187template <uint64_t BUF_SIZE, uint64_t OVER_SIZE> 
     188int TagMatcher<BUF_SIZE, OVER_SIZE>::StreamScan(int chars_avail) { 
    181189 
    182190        int blk; 
     
    184192        int block_pos = 0; 
    185193 
    186         if(mode == StartOfFile){ 
     194    if(this->mode == StartOfFile){ 
    187195          int pos = bitstream_scan(miscMarks,0); 
    188196          if (pos==chars_avail){ 
    189             fprintf(stderr,"no element at position =%i.\n",buf_base+pos); 
     197        fprintf(stderr,"no element at position =%i.\n",this->buf_base+pos); 
    190198            exit(-1); 
    191199          } 
    192           if(srcbuf[pos-1]!='<'|| srcbuf[pos]=='!'||srcbuf[pos]=='/'){ 
     200      if(this->srcbuf[pos-1]!='<'|| this->srcbuf[pos]=='!'|| this->srcbuf[pos]=='/'){ 
    193201#ifdef DUMP 
    194202print_register<BitBlock>("srcbuf", bitblock::load_unaligned((BitBlock *) srcbuf)); 
     
    197205            exit(-1); 
    198206          } 
    199           mode = InFile; 
     207      this->mode = InFile; 
    200208        } 
    201209        for (blk = 0; blk < blk_counts; blk++) { 
    202                 scanword_t s = ((scanword_t*)tagMarks)[blk]; 
     210        scanword_t s = ((scanword_t*)this->tagMarks)[blk]; 
    203211                while(s) { 
    204212                        int code = tag_match(scan_forward_zeroes(s) + block_pos, chars_avail); 
     
    212220} 
    213221 
    214 void TagMatcher::store_streams(BitBlock tagMark, BitBlock NameFollow, BitBlock miscMark, int chars_avail){ 
     222template <uint64_t BUF_SIZE, uint64_t OVER_SIZE> 
     223void TagMatcher<BUF_SIZE, OVER_SIZE>::store_streams(BitBlock tagMark, BitBlock NameFollow, BitBlock miscMark, int chars_avail){ 
    215224#ifdef DUMP 
    216225print_register<BitBlock>("tagMark", tagMark); 
     
    220229printf("stream_index = %i\n", stream_index); 
    221230#endif 
    222   tagMarks[stream_index] = tagMark; 
    223   miscMarks[stream_index] = simd_not(miscMark); 
    224   NameFollows[stream_index] = NameFollow; 
    225   stream_index++; 
    226   if(stream_index==1){ 
    227  
    228     if (InFinalEndTag == 1){ 
     231  this->tagMarks[this->stream_index] = tagMark; 
     232  this->miscMarks[this->stream_index] = simd_not(miscMark); 
     233  this->NameFollows[this->stream_index] = NameFollow; 
     234  this->stream_index++; 
     235  if(this->stream_index==1){ 
     236 
     237    if (this->InFinalEndTag == 1){ 
    229238      int pos = -1; 
    230       while(srcbuf[pos]!='>'){ 
     239      while(this->srcbuf[pos]!='>'){ 
    231240        pos++; 
    232241        if(pos>=chars_avail){ 
    233           InFinalEndTag = 1; 
     242      this->InFinalEndTag = 1; 
    234243          return; 
    235244        } 
    236245      } 
    237       pos = bitstream_scan(miscMarks,pos+1); 
     246      pos = bitstream_scan(this->miscMarks,pos+1); 
    238247#ifdef DUMP 
    239248print_register<BitBlock>("miscMarks[0]", miscMarks[0]); 
     
    246255    } 
    247256 
    248     if(state == InStartTag) { 
    249       state = Clear; 
    250       int remain_lgth = bitstream_scan(NameFollows,0); 
    251       memcpy(&tags_buf[tags_buf_cur],srcbuf,remain_lgth); 
    252       tag_lgth_stack[depth] += remain_lgth; 
    253       depth++; 
    254     } 
    255     else if (state == InEndTag) { 
    256       state = Clear; 
    257       int lgth = tag_lgth_stack[depth]; 
    258       if (does_match(tag_stack[depth]+inTagPos,srcbuf,lgth-inTagPos) && ((srcbuf[lgth-inTagPos] == '>') ||(srcbuf[lgth-inTagPos] <= ' '))) return ; 
     257    if(this->state == InStartTag) { 
     258      this->state = this->Clear; 
     259      int remain_lgth = bitstream_scan(this->NameFollows,0); 
     260      memcpy(&this->tags_buf[this->tags_buf_cur],this->srcbuf,remain_lgth); 
     261      this->tag_lgth_stack[this->depth] += remain_lgth; 
     262      this->depth++; 
     263    } 
     264    else if (this->state == InEndTag) { 
     265      this->state = Clear; 
     266      int lgth = this->tag_lgth_stack[this->depth]; 
     267      if (does_match(this->tag_stack[this->depth]+this->inTagPos,this->srcbuf,lgth-this->inTagPos) && ((this->srcbuf[lgth-this->inTagPos] == '>') ||(this->srcbuf[lgth-this->inTagPos] <= ' '))) return ; 
    259268      else { 
    260269          fprintf(stderr,"tag name mismatch at position = %i\n",buf_base); 
     
    262271      } 
    263272    } 
    264     else if (state == InAttName) { 
    265       state = Clear; 
    266       int remain_lgth = bitstream_scan(NameFollows,0); 
    267       memcpy(&tags_buf[tags_buf_cur],srcbuf,remain_lgth); 
    268       if(lookup_or_insert(InAtt.start, InAtt.lgth+remain_lgth)){ 
     273    else if (this->state == InAttName) { 
     274      this->state = Clear; 
     275      int remain_lgth = bitstream_scan(this->NameFollows,0); 
     276      memcpy(&this->tags_buf[this->tags_buf_cur],this->srcbuf,remain_lgth); 
     277      if(lookup_or_insert(this->InAtt.start, this->InAtt.lgth+remain_lgth)){ 
    269278              fprintf(stderr,"Attribute name is not unique at position =%i.\n",buf_base); 
    270279              exit(-1); 
     
    274283} 
    275284 
    276 TagMatcher::TagMatcher(){ 
    277   stream_index = 0; 
    278   depth = 0; 
    279   buf_base = 0; 
    280   state = Clear; 
    281   mode = StartOfFile; 
    282   InFinalEndTag = 0; 
    283   NameFollows[BUFFER_SIZE/BLOCK_SIZE]=simd<1>::constant<1>();// TODO - verify simd_const_1(1);  //sentinel 
    284 } 
    285  
    286  
    287  
    288 TagMatcher::~TagMatcher(){ 
    289  
    290 } 
    291  
    292 void TagMatcher::setSrc(char * src){ 
    293   srcbuf = src; 
    294 } 
    295  
    296 void TagMatcher::Advance_buffer(){ 
    297   buf_base += BUFFER_SIZE; 
    298   stream_index=0; 
    299   tags_buf_cur = 0; 
    300   att_index = 0; 
    301   for(int i=0; i< depth; i++){ 
    302     if(&tags_buf[tags_buf_cur]!=tag_stack[i]) 
    303       memcpy(&tags_buf[tags_buf_cur],tag_stack[i],tag_lgth_stack[i]); 
    304     tag_stack[i] = &tags_buf[tags_buf_cur]; 
    305     tags_buf_cur += tag_lgth_stack[i]; 
    306   } 
    307   if(state == InStartTag) { 
    308       memcpy(&tags_buf[tags_buf_cur],&srcbuf[finalStartPos],tag_lgth_stack[depth]); 
    309       tag_stack[depth] = &tags_buf[tags_buf_cur]; 
    310       tags_buf_cur += tag_lgth_stack[depth]; 
    311   } 
    312   else if(state == InEndTag) { 
    313      memcpy(&tags_buf[tags_buf_cur],tag_stack[depth],tag_lgth_stack[depth]); 
    314     tag_stack[depth] = &tags_buf[tags_buf_cur]; 
    315     tags_buf_cur += tag_lgth_stack[depth]; 
    316   } 
    317   else if(state == InAttName) { 
    318       memcpy(&tags_buf[tags_buf_cur],InAtt.start,InAtt.lgth); 
    319       InAtt.start = &tags_buf[tags_buf_cur]; 
    320       tags_buf_cur += InAtt.lgth; 
    321   } 
    322   srcbuf[-1] = srcbuf[BUFFER_SIZE-1]; 
    323 } 
     285template <uint64_t BUF_SIZE, uint64_t OVER_SIZE> 
     286TagMatcher<BUF_SIZE, OVER_SIZE>::TagMatcher(){ 
     287  this->stream_index = 0; 
     288  this->depth = 0; 
     289  this->buf_base = 0; 
     290  this->state = Clear; 
     291  this->mode = StartOfFile; 
     292  this->InFinalEndTag = 0; 
     293  this->NameFollows[BUF_SIZE/BLOCK_SIZE]=simd<1>::constant<1>();// TODO - verify simd_const_1(1);  //sentinel 
     294} 
     295 
     296template <uint64_t BUF_SIZE, uint64_t OVER_SIZE> 
     297TagMatcher<BUF_SIZE, OVER_SIZE>::~TagMatcher(){ 
     298 
     299} 
     300 
     301template <uint64_t BUF_SIZE, uint64_t OVER_SIZE> 
     302void TagMatcher<BUF_SIZE, OVER_SIZE>::setSrc(char * src){ 
     303  this->srcbuf = src; 
     304} 
     305 
     306template <uint64_t BUF_SIZE, uint64_t OVER_SIZE> 
     307void TagMatcher<BUF_SIZE, OVER_SIZE>::Advance_buffer(){ 
     308  this->buf_base += BUF_SIZE; 
     309  this->stream_index=0; 
     310  this->tags_buf_cur = 0; 
     311  this->att_index = 0; 
     312  for(int i=0; i< this->depth; i++){ 
     313    if(&this->tags_buf[this->tags_buf_cur]!=this->tag_stack[i]) 
     314      memcpy(&this->tags_buf[this->tags_buf_cur],this->tag_stack[i],this->tag_lgth_stack[i]); 
     315    this->tag_stack[i] = &this->tags_buf[tags_buf_cur]; 
     316    this->tags_buf_cur += this->tag_lgth_stack[i]; 
     317  } 
     318  if(this->state == InStartTag) { 
     319      memcpy(&this->tags_buf[this->tags_buf_cur],&this->srcbuf[this->finalStartPos],this->tag_lgth_stack[this->depth]); 
     320      this->tag_stack[depth] = &this->tags_buf[this->tags_buf_cur]; 
     321      this->tags_buf_cur += this->tag_lgth_stack[this->depth]; 
     322  } 
     323  else if(this->state == InEndTag) { 
     324     memcpy(&this->tags_buf[this->tags_buf_cur],this->tag_stack[this->depth],this->tag_lgth_stack[this->depth]); 
     325    this->tag_stack[depth] = &this->tags_buf[this->tags_buf_cur]; 
     326    this->tags_buf_cur += this->tag_lgth_stack[this->depth]; 
     327  } 
     328  else if(this->state == InAttName) { 
     329      memcpy(&this->tags_buf[this->tags_buf_cur],this->InAtt.start,this->InAtt.lgth); 
     330      this->InAtt.start = &this->tags_buf[tags_buf_cur]; 
     331      this->tags_buf_cur += this->InAtt.lgth; 
     332  } 
     333  this->srcbuf[-1] = this->srcbuf[BUF_SIZE-1]; 
     334} 
     335 
    324336 
    325337#endif /* TAGMATCHER_HPP_ */