Changeset 2160


Ignore:
Timestamp:
May 24, 2012, 8:12:42 PM (7 years ago)
Author:
ksherdy
Message:

Refactored pablo template.

Location:
proto/parabix2
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • proto/parabix2/pablo_template.cpp

    r2155 r2160  
    1313#include "../lib/bitblock_iterator.hpp"
    1414#include "../lib/s2p.hpp"
    15 
    16 #define SEGMENT_BLOCKS 12
    17 #define BUFFER_SIZE (BLOCK_SIZE * SEGMENT_BLOCKS)
    18 #define OVERLAP_BUFSIZE (sizeof(BitBlock))
     15#include "../lib/perflib/perfsec.h"
    1916
    2017#include "xmldecl.h"
    2118#include "namechars.h"
    22 #include "../lib/perflib/perfsec.h"
    23 
    2419#include "TagMatcher.hpp"
    2520#include "LineColTracker.hpp"
     
    2924
    3025#ifdef BUFFER_PROFILING
    31         BOM_Table * parser_timer;
    32 
     26    BOM_Table * parser_timer;
    3327#elif CODE_CLOCKER
    34         #define NUM_EVENTS 1
    35         int Events[NUM_EVENTS] = {PAPI_TOT_CYC};
    36         //int Events[NUM_EVENTS] = {PAPI_L2_DCM};
    37         //int Events[NUM_EVENTS] = {PAPI_TOT_CYC, PAPI_BR_MSP};
    38         int cal_size = 20;
    39         CC * parser_timer = new CC(Events,NUM_EVENTS,cal_size);
     28    //#define NUM_EVENTS 1
     29    //int Events[NUM_EVENTS] = {PAPI_TOT_CYC};
     30    //int Events[NUM_EVENTS] = {PAPI_L2_DCM};
     31    #define NUM_EVENTS 2
     32    int Events[NUM_EVENTS] = {PAPI_TOT_CYC, PAPI_BR_MSP};
     33    int cal_size = 20;
     34    CC * parser_timer = new CC(Events,NUM_EVENTS,cal_size);
    4035#else
    41         void * parser_timer;
     36    void * parser_timer;
    4237#endif
    4338
    44 int block_base=0;
    45 int buffer_base=0;
    46 char * source;
    47 
    48 LineColTracker tracker;
    49 TagMatcher matcher;
    5039ErrorTracker error_tracker;
    5140BitBlock EOF_mask = simd<1>::constant<1>();
    5241
    53 static inline int NameStrt_check(int pos);
    54 static inline int Name_check(int pos);
    55 static inline int PIName_check(int pos);
    56 static inline int CD_check(int pos);
    57 static inline int GenRef_check(int pos);
    58 static inline int HexRef_check(int pos);
    59 static inline int DecRef_check(int pos);
    60 static inline int AttRef_check(int pos);
    61 
     42//////////////////////////////////////////////////////////////////////////////////////////
     43// Buffer Management
     44//////////////////////////////////////////////////////////////////////////////////////////
     45#include "../lib/buffer.hpp"
     46
     47#define OVERLAP_BUFSIZE PADDING_SIZE //sizeof(BitBlock)
     48
     49//////////////////////////////////////////////////////////////////////////////////////////
     50// @ global depends on 'error_tracker' and 'EOF_mask' definitions.
     51//////////////////////////////////////////////////////////////////////////////////////////
    6252@global
    6353
    64 static inline void s2p_do_block(BytePack U8[], Basis_bits & basis_bits);
    65 static inline void s2p_do_final_block(BytePack U8[], Basis_bits & basis_bits, BitBlock EOF_mask);
    66 static inline void postprocess_do_block(Lex & lex, CtCDPI_Callouts & ctCDPI_Callouts, Ref_Callouts & ref_Callouts, Check_streams & check_streams, int chars_avail);
    67 
    68 void do_process(FILE *infile, FILE *outfile);
    69 
    70 static inline void validate_block(BitBlockForwardIterator & start, int block_base, int is_valid(int));
    71 static inline void validate_block(BitBlockForwardIterator & start, int block_base, int is_valid(int,int));
     54//////////////////////////////////////////////////////////////////////////////////////////
     55// Headers that depend @ global stream struct types.
     56//////////////////////////////////////////////////////////////////////////////////////////
     57#include "../lib/transpose.hpp"
     58#include "post_process.hpp"
     59
     60static void do_process(FILE *infile, FILE *outfile);
    7261
    7362int main(int argc, char * argv[]) {
     
    10089        }
    10190
    102 //      PERF_SEC_BIND(1);
     91        PERF_SEC_BIND(1);
    10392
    10493        PERF_SEC_INIT(parser_timer);
     
    116105}
    117106
    118 /* s2p Definitions */
    119 static inline void s2p_do_block(BytePack U8[], Basis_bits & basis_bits) {
    120   s2p(U8[0], U8[1], U8[2], U8[3], U8[4], U8[5], U8[6], U8[7],
    121         basis_bits.bit_0, basis_bits.bit_1, basis_bits.bit_2, basis_bits.bit_3, basis_bits.bit_4, basis_bits.bit_5, basis_bits.bit_6, basis_bits.bit_7);
    122 }
    123 
    124 static inline void s2p_do_final_block(BytePack U8[], Basis_bits & basis_bits, BitBlock EOF_mask) {
    125   s2p_do_block(U8, basis_bits);
    126   basis_bits.bit_0 = simd_and(basis_bits.bit_0, EOF_mask);
    127   basis_bits.bit_1 = simd_and(basis_bits.bit_1, EOF_mask);
    128   basis_bits.bit_2 = simd_and(basis_bits.bit_2, EOF_mask);
    129   basis_bits.bit_3 = simd_and(basis_bits.bit_3, EOF_mask);
    130   basis_bits.bit_4 = simd_and(basis_bits.bit_4, EOF_mask);
    131   basis_bits.bit_5 = simd_and(basis_bits.bit_5, EOF_mask);
    132   basis_bits.bit_6 = simd_and(basis_bits.bit_6, EOF_mask);
    133   basis_bits.bit_7 = simd_and(basis_bits.bit_7, EOF_mask);
    134 }
    135 
    136 
    137 static inline int NameStrt_check(int pos) {
    138         if(XML_10_UTF8_NameStrt_bytes((unsigned char*)&source[pos]) == 0){
    139               return XMLTestSuiteError::NAME_START;
    140         }
    141         return 0;
    142 }
    143 
    144 static inline int Name_check(int pos) {
    145         if(XML_10_UTF8_NameChar_bytes((unsigned char*)&source[pos]) == 0){
    146                   return XMLTestSuiteError::NAME;
    147         }
    148         return 0;
    149 }
    150 
    151 static inline int PIName_check(int pos, int file_pos) {
    152         if (at_XxMmLll<ASCII>((unsigned char*)&source[pos]) && (source[pos+3]=='?' || source[pos+3]<= ' ')) {
    153               // "<?xml" legal at start of file.
    154               if (!((file_pos == 2) && at_XmlDecl_start<ASCII>((unsigned char*)&source[0]))) {
    155                   return XMLTestSuiteError::XMLPINAME;
    156               }
    157         }
    158         return 0;
    159 }
    160 
    161 static inline int CD_check(int pos) {
    162         if (!at_CDATA1<ASCII>((unsigned char*)&source[pos])){
    163                   return XMLTestSuiteError::CDATA;
    164         }
    165         return 0;
    166 }
    167 
    168 static inline int GenRef_check(int pos) {
    169         unsigned char* s = (unsigned char*)&source[pos];
    170         if (!(at_Ref_gt<ASCII>(s)||at_Ref_lt<ASCII>(s)||at_Ref_amp<ASCII>(s)||at_Ref_quot<ASCII>(s)||at_Ref_apos<ASCII>(s))){
    171               return XMLTestSuiteError::UNDEFREF;
    172         }
    173         return 0;
    174 }
    175 
    176 static inline int HexRef_check(int pos) {
    177         unsigned char* s = (unsigned char*)&source[pos];
    178         int ch_val = 0;
    179         while(at_HexDigit<ASCII>(s)){
    180           ch_val = HexVal<ASCII>(s[0]) + (ch_val<<4);
    181           if (ch_val> 0x10FFFF ){
    182                 return XMLTestSuiteError::CHARREF;
    183           }
    184           s++;
    185         }
    186         if ((ch_val == 0x0) || ((ch_val | 0x7FF) == 0xDFFF)|| ((ch_val | 0x1) == 0xFFFF)){
    187           return XMLTestSuiteError::CHARREF;
    188         }
    189         else if (((ch_val < 0x20) && (ch_val != 0x9) && (ch_val != 0xD) && (ch_val != 0xA))){
    190           return XMLTestSuiteError::XML10CHARREF;
    191         }
    192         return 0;
    193 }
    194 
    195 static inline int DecRef_check(int pos) {
    196         unsigned char* s = (unsigned char*)&source[pos];
    197         int ch_val = 0;
    198         while(at_HexDigit<ASCII>(s)){
    199           ch_val = DigitVal<ASCII>(s[0]) + ch_val*10;
    200           if (ch_val> 0x10FFFF ){
    201                         return XMLTestSuiteError::CHARREF;
    202           }
    203           s++;
    204         }
    205         if ((ch_val == 0x0) || ((ch_val | 0x7FF) == 0xDFFF)|| ((ch_val | 0x1) == 0xFFFF)){
    206                   return XMLTestSuiteError::CHARREF;
    207         }
    208         else if (((ch_val < 0x20) && (ch_val != 0x9) && (ch_val != 0xD) && (ch_val != 0xA))){
    209                   return XMLTestSuiteError::XML10CHARREF;
    210         }
    211         return 0;
    212 }
    213 
    214 static inline int AttRef_check(int pos) {
    215         unsigned char* s = (unsigned char*)&source[pos];
    216         int ch_val = 0;
    217         if(s[0]=='#'){
    218           s++;
    219           if(s[0]=='x' || s[0]=='X'){
    220             s++;
    221             while(at_HexDigit<ASCII>(s)){
    222               ch_val = HexVal<ASCII>(s[0]) + (ch_val<<4);
    223               s++;
    224             }
    225           }
    226           else{
    227             while(at_HexDigit<ASCII>(s)){
    228               ch_val = DigitVal<ASCII>(s[0]) + ch_val*10;
    229               s++;
    230             }
    231           }
    232           if (ch_val==60){
    233             return XMLTestSuiteError::ATTREF;
    234           }
    235         }
    236         else if(at_Ref_lt<ASCII>(s)){
    237           return XMLTestSuiteError::ATTREF;
    238         }
    239         return 0;
    240 }
    241 
    242 static inline void validate_block(BitBlockForwardIterator & start, int block_base, int is_valid(int)) {
    243 
    244         int pos, block_pos;
    245         BitBlockForwardIterator end;
    246         while(start != end) {
    247 
    248                 block_pos = block_base + *start;
    249                 int rv = is_valid(block_pos);
    250 
    251                 if (rv) {
    252                         int error_line, error_column;
    253                         tracker.get_Line_and_Column(block_pos, error_line, error_column);
    254                         ReportError(XMLTestSuiteError::get_msg(rv), error_line, error_column);
    255                         exit(-1);
    256                 }
    257                 start++;
    258         }
    259 }
    260 
    261 static inline void validate_block(BitBlockForwardIterator & start, int block_base, int buffer_base, int is_valid(int,int)) {
    262 
    263         int pos, block_pos, file_pos;
    264         BitBlockForwardIterator end;
    265         while(start != end) {
    266 
    267                 block_pos = block_base + *start;
    268                 file_pos = block_pos+buffer_base;
    269 
    270 
    271                 int rv = is_valid(block_pos, file_pos);
    272 
    273                 if (rv) {
    274                         int error_line, error_column;
    275                         tracker.get_Line_and_Column(block_pos, error_line, error_column);
    276                         ReportError(XMLTestSuiteError::get_msg(rv), error_line, error_column);
    277                         exit(-1);
    278                 }
    279                 start++;
    280         }
    281 }
    282 
    283 static inline void postprocess_do_block(Lex & lex, CtCDPI_Callouts & ctCDPI_Callouts, Ref_Callouts & ref_Callouts, Check_streams & check_streams, int chars_avail){
    284 BitBlockForwardIterator iter;
    285 
    286 tracker.StoreNewlines(lex.LF);
    287 
    288 if (bitblock::any(simd_or(check_streams.non_ascii_name_starts, check_streams.non_ascii_names))) {
    289         iter.init(&check_streams.non_ascii_name_starts);
    290         validate_block(iter, block_base, NameStrt_check);
    291         iter.init(&check_streams.non_ascii_names);
    292         validate_block(iter, block_base, Name_check);
    293 }
    294 if (bitblock::any(ctCDPI_Callouts.PI_name_starts)){
    295         iter.init(&(ctCDPI_Callouts.PI_name_starts));
    296         validate_block(iter, block_base, buffer_base, PIName_check);
    297 }
    298 if (bitblock::any(ctCDPI_Callouts.CD_starts)){
    299         iter.init(&ctCDPI_Callouts.CD_starts);
    300         validate_block(iter, block_base, CD_check);
    301 }
    302 if(bitblock::any(ref_Callouts.GenRef_starts)){
    303         iter.init(&ref_Callouts.GenRef_starts);
    304         validate_block(iter, block_base, GenRef_check);
    305 }
    306 if(bitblock::any(ref_Callouts.DecRef_starts)){
    307         iter.init(&ref_Callouts.DecRef_starts);
    308         validate_block(iter, block_base, DecRef_check);
    309 }
    310 if(bitblock::any(ref_Callouts.HexRef_starts)){
    311         iter.init(&ref_Callouts.HexRef_starts);
    312         validate_block(iter, block_base, HexRef_check);
    313 }
    314 if(bitblock::any(check_streams.att_refs)){
    315         iter.init(&check_streams.att_refs);
    316         validate_block(iter, block_base, AttRef_check);
    317 }
    318 
    319 if(error_tracker.Has_Noted_Error()){
    320         int error_line, error_column;
    321         tracker.get_Line_and_Column(error_tracker.Noted_Pos_In_Block(), error_line, error_column);
    322         ReportError(error_tracker.Noted_Error_Msg(), error_line, error_column);
    323         exit(-1);
    324 }
    325 
    326 matcher.store_streams(check_streams.tag_marks, check_streams.name_follows, check_streams.misc_mask, chars_avail);
    327 tracker.AdvanceBlock();
    328 
    329 }
    330 
    331107void do_process(FILE *infile, FILE *outfile) {
    332108
    333 @decl
    334 
    335   int buf_pos = 0;
    336   int block_pos = 0;
    337   int chars_avail = 0;
    338   int check_pos = 0;
    339   int chars_read = 0;
    340   BytePack buf[(BUFFER_SIZE+BLOCK_SIZE+OVERLAP_BUFSIZE*2)/sizeof(BitBlock)];
    341 
    342   char * srcbuf = ((char *) buf) + OVERLAP_BUFSIZE;
    343   buffer_base = buf_pos;
    344   source = srcbuf;
    345 
    346   chars_read = fread((void *)srcbuf, 1, BUFFER_SIZE + OVERLAP_BUFSIZE, infile);
    347   chars_avail = chars_read;
    348   if (chars_avail > BUFFER_SIZE) chars_avail = BUFFER_SIZE;
    349 
    350   matcher.setSrc(srcbuf);
    351 
    352   if(chars_read<4){
     109    @decl
     110
     111    LineColTracker tracker;
     112    TagMatcher<SEGMENT_SIZE,OVERLAP_BUFSIZE> matcher;
     113
     114    uint8_t * src_buf;
     115    int block_base=0;
     116    int buffer_base=0;
     117    int buffer_pos = 0;
     118    int block_pos = 0;
     119    int chars_avail = 0;
     120    int check_pos = 0;
     121    int chars_read = 0;
     122
     123    //////////////////////////////////////////////////////////////////////////////////////////
     124    // Buffer Management
     125    //////////////////////////////////////////////////////////////////////////////////////////
     126    BitBlock buf[(PADDING_SIZE + SEGMENT_SIZE + PADDING_SIZE)/sizeof(BitBlock)];
     127    src_buf = (uint8_t *)buf + PADDING_SIZE;
     128
     129    //ALLOC_STATIC_ALIGNED_BYTE_BUFFER(src_buf, (PADDING_SIZE + SEGMENT_SIZE + PADDING_SIZE));
     130
     131    buffer_base = buffer_pos;
     132    chars_read = fread((void *)src_buf, 1, BUFFER_SIZE, infile);
     133    chars_avail = chars_read;
     134    if (chars_avail > SEGMENT_SIZE) chars_avail = SEGMENT_SIZE;
     135
     136    //////////////////////////////////////////////////////////////////////////////////////////
     137    // XML Validation / Content Model
     138    //////////////////////////////////////////////////////////////////////////////////////////
     139    if(chars_read<4){
    353140    fprintf(stderr,"File is too short. Not well formed.\n");
    354141    exit(-1);
    355   }
    356 
    357   Entity_Info * e = new Entity_Info;
    358   e->AnalyzeSignature((unsigned char *)srcbuf);
    359 
    360   if (e->code_unit_base == ASCII) {
    361 
    362     XML_Decl_Parser<ASCII> decl_parser((unsigned char *)srcbuf);
     142    }
     143
     144    Entity_Info * e = new Entity_Info;
     145    e->AnalyzeSignature((unsigned char *)src_buf);
     146
     147    if (e->code_unit_base == ASCII) {
     148
     149    XML_Decl_Parser<ASCII> decl_parser((unsigned char *)src_buf);
    363150
    364151    decl_parser.ReadXMLInfo(*e);
    365152
    366153    if (e->code_unit_size != SingleByte || (e->has_encoding_decl && (!at_UTF_8(e->encoding)))){
    367         fprintf(stderr,"Sorry, this xmlwf demo only works for UTF-8.\n");
    368         exit(-1);
    369     }
    370   }
    371   else {
    372     fprintf(stderr,"Sorry, this xmlwf demo does not process EBCDIC.\n");
    373         exit(-1);
    374   }
    375 
    376   if (e->content_start != 0) {
    377         memmove(&srcbuf[0], &srcbuf[e->content_start], chars_read - e->content_start);
    378         buf_pos = e->content_start;
    379         if (chars_avail == BUFFER_SIZE) {
    380                 chars_read = chars_read - e->content_start +
    381                              fread(&srcbuf[chars_read-e->content_start], 1, e->content_start, infile);
    382                 chars_avail = chars_read;
    383                 if (chars_avail > BUFFER_SIZE) chars_avail = BUFFER_SIZE;
    384         }
    385         else {
    386           chars_read -=e->content_start;
    387           chars_avail -=e->content_start;
    388         }
    389   }
    390 
    391 @stream_stmts
    392 
    393 /* Full Buffers */
    394 
    395     while (chars_avail == BUFFER_SIZE) {
     154        fprintf(stderr,"Sorry, this xmlwf demo only works for UTF-8.\n");
     155        exit(-1);
     156    }
     157    }
     158    else {
     159        fprintf(stderr,"Sorry, this xmlwf demo does not process EBCDIC.\n");
     160        exit(-1);
     161    }
     162
     163    if (e->content_start != 0) {
     164    memmove(&src_buf[0], &src_buf[e->content_start], chars_read - e->content_start);
     165    buffer_pos = e->content_start;
     166    if (chars_avail == SEGMENT_SIZE) {
     167        chars_read = chars_read - e->content_start + fread(&src_buf[chars_read-e->content_start], 1, e->content_start, infile);
     168        chars_avail = chars_read;
     169        if (chars_avail > SEGMENT_SIZE) chars_avail = SEGMENT_SIZE;
     170    }
     171    else {
     172      chars_read -=e->content_start;
     173      chars_avail -=e->content_start;
     174    }
     175    }
     176
     177    @stream_stmts
     178
     179
     180    //////////////////////////////////////////////////////////////////////////////////////////
     181    // Full Segments
     182    //////////////////////////////////////////////////////////////////////////////////////////
     183    matcher.setSrc((char *)src_buf);
     184    while (chars_avail == SEGMENT_SIZE) {
    396185      PERF_SEC_START(parser_timer);
    397186      for (int blk = 0; blk < SEGMENT_BLOCKS; blk++) {
    398187          block_base = blk*BLOCK_SIZE;
    399           s2p_do_block((BytePack *) &srcbuf[block_base], basis_bits);
     188          s2p_do_block((BytePack *) &src_buf[block_base], basis_bits);
     189
    400190          @block_stmts
    401           postprocess_do_block(lex, ctCDPI_Callouts, ref_Callouts, check_streams, chars_avail);
     191
     192          tracker.StoreNewlines(lex.LF);
     193          postprocess_do_block(lex, ctCDPI_Callouts, ref_Callouts, check_streams, (char *)src_buf, buffer_base, block_base, chars_avail, tracker);
     194          matcher.store_streams(check_streams.tag_marks, check_streams.name_follows, check_streams.misc_mask, chars_avail);
     195          tracker.AdvanceBlock();
    402196      }
    403197      matcher.StreamScan(chars_avail);
     
    406200
    407201      int bytes_left = chars_read - chars_avail;
    408       memmove(srcbuf, &srcbuf[BUFFER_SIZE], bytes_left);
    409       chars_read = fread(&srcbuf[bytes_left],1, BUFFER_SIZE + OVERLAP_BUFSIZE - bytes_left, infile) + bytes_left;
     202      memmove(src_buf, &src_buf[SEGMENT_SIZE], bytes_left);
     203      chars_read = fread(&src_buf[bytes_left], 1, BUFFER_SIZE - bytes_left, infile) + bytes_left;
    410204      chars_avail = chars_read;
    411       if (chars_avail > BUFFER_SIZE) chars_avail = BUFFER_SIZE;
    412       buf_pos += chars_avail;
    413       buffer_base = buf_pos;
    414     }
    415 /* Final Partial Buffer */
     205      if (chars_avail > SEGMENT_SIZE) chars_avail = SEGMENT_SIZE;
     206      buffer_pos += chars_avail;
     207      buffer_base = buffer_pos;
     208    }
     209
     210    //////////////////////////////////////////////////////////////////////////////////////////
     211    // Final Partial Segment
     212    //////////////////////////////////////////////////////////////////////////////////////////
    416213    PERF_SEC_START(parser_timer);
    417214
    418215    block_pos = 0;
    419216    int remaining = chars_avail;
    420 /* Full Blocks */
     217
     218    /* Full Blocks */
    421219    while (remaining >= BLOCK_SIZE) {
    422           block_base = block_pos;
    423           s2p_do_block((BytePack *) &srcbuf[block_pos], basis_bits);
     220          block_base = block_pos;
     221          s2p_do_block((BytePack *) &src_buf[block_pos], basis_bits);
    424222          @block_stmts
    425           postprocess_do_block(lex, ctCDPI_Callouts, ref_Callouts, check_streams, chars_avail);
    426           block_pos += BLOCK_SIZE;
     223          tracker.StoreNewlines(lex.LF);
     224          postprocess_do_block(lex, ctCDPI_Callouts, ref_Callouts, check_streams, (char *)src_buf, buffer_base, block_base, chars_avail, tracker);
     225          matcher.store_streams(check_streams.tag_marks, check_streams.name_follows, check_streams.misc_mask, chars_avail);
     226          tracker.AdvanceBlock();
     227          block_pos += BLOCK_SIZE;
    427228          remaining -= BLOCK_SIZE;
    428229    }
    429230    block_base = block_pos;
     231
     232    /* Partial Block or Any Carry */
    430233    if (remaining > 0 || @any_carry) {
    431234          EOF_mask = bitblock::srl(simd<1>::constant<1>(), convert(BLOCK_SIZE-remaining));
    432           s2p_do_final_block((BytePack *) &srcbuf[block_pos], basis_bits, EOF_mask);
     235          s2p_do_final_block((BytePack *) &src_buf[block_pos], basis_bits, EOF_mask);
    433236          @final_block_stmts
    434           postprocess_do_block(lex, ctCDPI_Callouts, ref_Callouts, check_streams, chars_avail);
    435     }
    436     buf_pos += chars_avail;
    437     buffer_base = buf_pos;
     237          tracker.StoreNewlines(lex.LF);
     238          postprocess_do_block(lex, ctCDPI_Callouts, ref_Callouts, check_streams, (char *)src_buf, buffer_base, block_base, chars_avail, tracker);
     239          matcher.store_streams(check_streams.tag_marks, check_streams.name_follows, check_streams.misc_mask, chars_avail);
     240          tracker.AdvanceBlock();
     241
     242    }
     243
     244    buffer_pos += chars_avail;
     245    buffer_base = buffer_pos;
    438246
    439247    matcher.StreamScan(chars_avail);
    440248    matcher.Advance_buffer();
    441 
    442249
    443250    PERF_SEC_END(parser_timer, chars_avail);
  • proto/parabix2/src/TagMatcher.hpp

    r2155 r2160  
    44#include "../lib/bitblock.hpp"
    55
    6 #define MAX_DEPTH 100
    76#include <algorithm>
    87#include <iostream>
     
    109using namespace std;
    1110
    12 
    1311#define MAX_DEPTH 100
    1412#define MAX_ATTS 100
     
    1917};
    2018
     19template <uint64_t BUF_SIZE, uint64_t OVER_SIZE>
    2120class TagMatcher {
    22   public:
    23   BitBlock tagMarks[BUFFER_SIZE/BLOCK_SIZE];
    24   BitBlock miscMarks[BUFFER_SIZE/BLOCK_SIZE];
    25   char tags_buf[BUFFER_SIZE];
    26   int tags_buf_cur;
    27   int stream_index;
    28   char * srcbuf;
    29   int depth;
    30   int inTagPos;
    31   int finalStartPos;
    32   char* tag_stack[MAX_DEPTH];
    33   int tag_lgth_stack[MAX_DEPTH];
    34   BitBlock NameFollows[BUFFER_SIZE/BLOCK_SIZE+1]; // 1 extra block for sentinel
    35   int buf_base;
    36   enum TagMatchState {InStartTag, InEndTag, InAttName, Clear} state;
    37   enum TagMatchMode {StartOfFile, InFile} mode;
    38   struct attribute Attr[MAX_ATTS];
    39   struct attribute InAtt;
    40   int att_index;
    41   int InFinalEndTag;
    42 
     21
     22public:
    4323  TagMatcher();
    4424  ~TagMatcher();
     
    5030  int does_match(char * s1, char * s2, int lgth);
    5131  int lookup_or_insert(char*s, int lgth);
     32
     33    int depth;
     34
     35private:
     36  BitBlock tagMarks[BUF_SIZE/BLOCK_SIZE];
     37  BitBlock miscMarks[BUF_SIZE/BLOCK_SIZE];
     38  char tags_buf[BUF_SIZE];
     39  int tags_buf_cur;
     40  int stream_index;
     41  char * srcbuf;
     42
     43  int inTagPos;
     44  int finalStartPos;
     45  char* tag_stack[MAX_DEPTH];
     46  int tag_lgth_stack[MAX_DEPTH];
     47  BitBlock NameFollows[BUF_SIZE/BLOCK_SIZE+1]; // 1 extra block for sentinel
     48  int buf_base;
     49  enum TagMatchState {InStartTag, InEndTag, InAttName, Clear} state;
     50  enum TagMatchMode {StartOfFile, InFile} mode;
     51  struct attribute Attr[MAX_ATTS];
     52  struct attribute InAtt;
     53  int att_index;
     54  int InFinalEndTag;
     55
    5256};
    5357
    54 int TagMatcher::lookup_or_insert(char* s, int lgth){
    55   for(int i=0; i< att_index; i++)
    56     if(lgth == Attr[i].lgth &&  does_match(s,Attr[i].start,lgth))
    57       return 1;
    58 
    59   Attr[att_index].start = s;
    60   Attr[att_index].lgth = lgth;
    61   att_index++;
    62   return 0;
    63 }
    64 
    65 int TagMatcher::does_match(char * s1, char * s2, int lgth){
     58template <uint64_t BUF_SIZE, uint64_t OVER_SIZE>
     59int TagMatcher<BUF_SIZE, OVER_SIZE>::does_match(char * s1, char * s2, int lgth){
    6660    int matchlen = 0;
    6761    int i=0;
     
    6963      /* full 16 byte match */
    7064
    71       if (bitblock::any(simd_xor(bitblock::load_unaligned((BitBlock*)&s1[i]), 
    72                                 bitblock::load_unaligned((BitBlock*)&s2[i])))) {
    73                 return 0;
     65      if (bitblock::any(simd_xor(bitblock::load_unaligned((BitBlock*)&s1[i]),
     66                bitblock::load_unaligned((BitBlock*)&s2[i])))) {
     67        return 0;
    7468      }
    7569      else {
    76                 lgth -= sizeof(BitBlock);
    77                 i +=sizeof(BitBlock);
    78       }
    79     }
    80 
    81         scanword_t temp = ~hsimd<8>::signmask(simd<8>::eq(bitblock::load_unaligned((BitBlock*)&s1[i]),
     70        lgth -= sizeof(BitBlock);
     71        i +=sizeof(BitBlock);
     72      }
     73    }
     74
     75    scanword_t temp = ~hsimd<8>::signmask(simd<8>::eq(bitblock::load_unaligned((BitBlock*)&s1[i]),
    8276                                                  bitblock::load_unaligned((BitBlock*)&s2[i])));
    8377
    84         return lgth <= scan_forward_zeroes(temp);
    85 }
    86 
    87 
    88 int TagMatcher:: tag_match(int pos, int chars_avail) {
     78    return lgth <= scan_forward_zeroes(temp);
     79}
     80
     81
     82template <uint64_t BUF_SIZE, uint64_t OVER_SIZE>
     83int TagMatcher<BUF_SIZE, OVER_SIZE>::lookup_or_insert(char* s, int lgth){
     84  for(int i=0; i< this->att_index; i++)
     85    if(lgth == this->Attr[i].lgth &&  this->does_match(s,this->Attr[i].start,lgth))
     86      return 1;
     87
     88  this->Attr[att_index].start = s;
     89  this->Attr[att_index].lgth = lgth;
     90  this->att_index++;
     91  return 0;
     92}
     93
     94
     95template <uint64_t BUF_SIZE, uint64_t OVER_SIZE>
     96int TagMatcher<BUF_SIZE, OVER_SIZE>:: tag_match(int pos, int chars_avail) {
    8997        int rt_val=0;
    9098//      end tag
    91         if(srcbuf[pos]=='/' ){
     99    if(this->srcbuf[pos]=='/' ){
    92100          pos++;
    93           depth--;
    94           if (depth<0)
     101      this->depth--;
     102      if (this->depth<0)
    95103            return pos;
    96           int lgth = tag_lgth_stack[depth];
    97 
    98           if (does_match(tag_stack[depth],&srcbuf[pos],lgth) && ((srcbuf[pos+lgth] == '>') ||(srcbuf[pos+lgth] <= ' '))) rt_val=0;
    99           else if (pos + lgth >= BUFFER_SIZE + OVERLAP_BUFSIZE) {
    100             state = InEndTag;
    101             inTagPos = BUFFER_SIZE - pos;
     104      int lgth = this->tag_lgth_stack[depth];
     105
     106      if (does_match(this->tag_stack[depth],&this->srcbuf[pos],lgth) && ((this->srcbuf[pos+lgth] == '>') ||(this->srcbuf[pos+lgth] <= ' '))) rt_val=0;
     107      else if (pos + lgth >= BUF_SIZE + OVER_SIZE) {
     108        this->state = InEndTag;
     109        this-> inTagPos = BUF_SIZE - pos;
    102110            rt_val=0;
    103111          }
     
    109117          }
    110118
    111           if (depth == 0){
    112             while(srcbuf[pos]!='>'){
     119      if (this->depth == 0){
     120        while(this->srcbuf[pos]!='>'){
    113121              pos++;
    114122              if(pos>=chars_avail){
    115                 InFinalEndTag = 1;
     123        this->InFinalEndTag = 1;
    116124                return 0;
    117125              }
    118126            }
    119             pos = bitstream_scan(miscMarks,pos+1);
     127        pos = bitstream_scan(this->miscMarks,pos+1);
     128            if(pos!=chars_avail){
     129          fprintf(stderr,"illegal content after root element at position = %i\n",this->buf_base+pos);
     130              exit(-1);
     131            }
     132          }
     133          return rt_val;
     134        }
     135//      empty tag
     136    else if(this->srcbuf[pos]=='>'){
     137      this->depth--;
     138      if (this->depth == 0){
     139        while(this->srcbuf[pos]!='>')
     140              pos++;
     141        pos = bitstream_scan(this->miscMarks,pos+1);
     142
    120143            if(pos!=chars_avail){
    121144              fprintf(stderr,"illegal content after root element at position = %i\n",buf_base+pos);
     
    123146            }
    124147          }
    125           return rt_val;
    126         }
    127 //      empty tag
    128         else if(srcbuf[pos]=='>'){
    129           depth--;
    130           if (depth == 0){
    131             while(srcbuf[pos]!='>')
    132               pos++;
    133             pos = bitstream_scan(miscMarks,pos+1);
    134 
    135             if(pos!=chars_avail){
    136               fprintf(stderr,"illegal content after root element at position = %i\n",buf_base+pos);
    137               exit(-1);
    138             }
    139           }
    140148        }
    141149//      start tag
    142         else if(srcbuf[pos-1]=='<'){
    143           att_index = 0;
    144           if(depth<MAX_DEPTH){
    145             int end_pos = bitstream_scan(NameFollows,pos);
    146             tag_lgth_stack[depth] = end_pos-pos;
    147             tag_stack[depth] = &srcbuf[pos];
    148             if(end_pos<BUFFER_SIZE){
    149               depth++;
     150    else if(this->srcbuf[pos-1]=='<'){
     151      this->att_index = 0;
     152      if(this->depth<MAX_DEPTH){
     153        int end_pos = bitstream_scan(this->NameFollows,pos);
     154        this->tag_lgth_stack[this->depth] = end_pos-pos;
     155        this->tag_stack[depth] = &this->srcbuf[pos];
     156        if(end_pos<BUF_SIZE){
     157         this->depth++;
    150158            }
    151159            else{
    152               state = InStartTag;
    153               finalStartPos = pos;
     160          this->state = InStartTag;
     161          this->finalStartPos = pos;
    154162            }
    155163          }
    156164          else{
    157             fprintf(stderr,"Max nesting depth exceeded at position =%i. depth = %i\n",buf_base+pos, depth);
     165        fprintf(stderr,"Max nesting depth exceeded at position =%i. depth = %i\n",this->buf_base+pos, this->depth);
    158166            exit(-1);
    159167          }
     
    161169//      attribute
    162170        else{
    163           int end_pos = bitstream_scan(NameFollows,pos);
    164           if(end_pos<BUFFER_SIZE){
    165             if(lookup_or_insert(&srcbuf[pos], end_pos-pos)){
    166               fprintf(stderr,"Attribute name is not unique at position =%i.\n",buf_base+pos);
     171      int end_pos = bitstream_scan(this->NameFollows,pos);
     172      if(end_pos<BUF_SIZE){
     173        if(lookup_or_insert(&this->srcbuf[pos], end_pos-pos)){
     174          fprintf(stderr,"Attribute name is not unique at position =%i.\n",this->buf_base+pos);
    167175              exit(-1);
    168176            }
    169177          }
    170178          else{
    171             state = InAttName;
    172             InAtt.start = &srcbuf[pos];
    173             InAtt.lgth = BUFFER_SIZE-pos;
     179        this->state = InAttName;
     180        this->InAtt.start = &this->srcbuf[pos];
     181        this->InAtt.lgth = BUF_SIZE-pos;
    174182          }
    175183        }
     
    177185}
    178186
    179 
    180 int TagMatcher::StreamScan(int chars_avail) {
     187template <uint64_t BUF_SIZE, uint64_t OVER_SIZE>
     188int TagMatcher<BUF_SIZE, OVER_SIZE>::StreamScan(int chars_avail) {
    181189
    182190        int blk;
     
    184192        int block_pos = 0;
    185193
    186         if(mode == StartOfFile){
     194    if(this->mode == StartOfFile){
    187195          int pos = bitstream_scan(miscMarks,0);
    188196          if (pos==chars_avail){
    189             fprintf(stderr,"no element at position =%i.\n",buf_base+pos);
     197        fprintf(stderr,"no element at position =%i.\n",this->buf_base+pos);
    190198            exit(-1);
    191199          }
    192           if(srcbuf[pos-1]!='<'|| srcbuf[pos]=='!'||srcbuf[pos]=='/'){
     200      if(this->srcbuf[pos-1]!='<'|| this->srcbuf[pos]=='!'|| this->srcbuf[pos]=='/'){
    193201#ifdef DUMP
    194202print_register<BitBlock>("srcbuf", bitblock::load_unaligned((BitBlock *) srcbuf));
     
    197205            exit(-1);
    198206          }
    199           mode = InFile;
     207      this->mode = InFile;
    200208        }
    201209        for (blk = 0; blk < blk_counts; blk++) {
    202                 scanword_t s = ((scanword_t*)tagMarks)[blk];
     210        scanword_t s = ((scanword_t*)this->tagMarks)[blk];
    203211                while(s) {
    204212                        int code = tag_match(scan_forward_zeroes(s) + block_pos, chars_avail);
     
    212220}
    213221
    214 void TagMatcher::store_streams(BitBlock tagMark, BitBlock NameFollow, BitBlock miscMark, int chars_avail){
     222template <uint64_t BUF_SIZE, uint64_t OVER_SIZE>
     223void TagMatcher<BUF_SIZE, OVER_SIZE>::store_streams(BitBlock tagMark, BitBlock NameFollow, BitBlock miscMark, int chars_avail){
    215224#ifdef DUMP
    216225print_register<BitBlock>("tagMark", tagMark);
     
    220229printf("stream_index = %i\n", stream_index);
    221230#endif
    222   tagMarks[stream_index] = tagMark;
    223   miscMarks[stream_index] = simd_not(miscMark);
    224   NameFollows[stream_index] = NameFollow;
    225   stream_index++;
    226   if(stream_index==1){
    227 
    228     if (InFinalEndTag == 1){
     231  this->tagMarks[this->stream_index] = tagMark;
     232  this->miscMarks[this->stream_index] = simd_not(miscMark);
     233  this->NameFollows[this->stream_index] = NameFollow;
     234  this->stream_index++;
     235  if(this->stream_index==1){
     236
     237    if (this->InFinalEndTag == 1){
    229238      int pos = -1;
    230       while(srcbuf[pos]!='>'){
     239      while(this->srcbuf[pos]!='>'){
    231240        pos++;
    232241        if(pos>=chars_avail){
    233           InFinalEndTag = 1;
     242      this->InFinalEndTag = 1;
    234243          return;
    235244        }
    236245      }
    237       pos = bitstream_scan(miscMarks,pos+1);
     246      pos = bitstream_scan(this->miscMarks,pos+1);
    238247#ifdef DUMP
    239248print_register<BitBlock>("miscMarks[0]", miscMarks[0]);
     
    246255    }
    247256
    248     if(state == InStartTag) {
    249       state = Clear;
    250       int remain_lgth = bitstream_scan(NameFollows,0);
    251       memcpy(&tags_buf[tags_buf_cur],srcbuf,remain_lgth);
    252       tag_lgth_stack[depth] += remain_lgth;
    253       depth++;
    254     }
    255     else if (state == InEndTag) {
    256       state = Clear;
    257       int lgth = tag_lgth_stack[depth];
    258       if (does_match(tag_stack[depth]+inTagPos,srcbuf,lgth-inTagPos) && ((srcbuf[lgth-inTagPos] == '>') ||(srcbuf[lgth-inTagPos] <= ' '))) return ;
     257    if(this->state == InStartTag) {
     258      this->state = this->Clear;
     259      int remain_lgth = bitstream_scan(this->NameFollows,0);
     260      memcpy(&this->tags_buf[this->tags_buf_cur],this->srcbuf,remain_lgth);
     261      this->tag_lgth_stack[this->depth] += remain_lgth;
     262      this->depth++;
     263    }
     264    else if (this->state == InEndTag) {
     265      this->state = Clear;
     266      int lgth = this->tag_lgth_stack[this->depth];
     267      if (does_match(this->tag_stack[this->depth]+this->inTagPos,this->srcbuf,lgth-this->inTagPos) && ((this->srcbuf[lgth-this->inTagPos] == '>') ||(this->srcbuf[lgth-this->inTagPos] <= ' '))) return ;
    259268      else {
    260269          fprintf(stderr,"tag name mismatch at position = %i\n",buf_base);
     
    262271      }
    263272    }
    264     else if (state == InAttName) {
    265       state = Clear;
    266       int remain_lgth = bitstream_scan(NameFollows,0);
    267       memcpy(&tags_buf[tags_buf_cur],srcbuf,remain_lgth);
    268       if(lookup_or_insert(InAtt.start, InAtt.lgth+remain_lgth)){
     273    else if (this->state == InAttName) {
     274      this->state = Clear;
     275      int remain_lgth = bitstream_scan(this->NameFollows,0);
     276      memcpy(&this->tags_buf[this->tags_buf_cur],this->srcbuf,remain_lgth);
     277      if(lookup_or_insert(this->InAtt.start, this->InAtt.lgth+remain_lgth)){
    269278              fprintf(stderr,"Attribute name is not unique at position =%i.\n",buf_base);
    270279              exit(-1);
     
    274283}
    275284
    276 TagMatcher::TagMatcher(){
    277   stream_index = 0;
    278   depth = 0;
    279   buf_base = 0;
    280   state = Clear;
    281   mode = StartOfFile;
    282   InFinalEndTag = 0;
    283   NameFollows[BUFFER_SIZE/BLOCK_SIZE]=simd<1>::constant<1>();// TODO - verify simd_const_1(1);  //sentinel
    284 }
    285 
    286 
    287 
    288 TagMatcher::~TagMatcher(){
    289 
    290 }
    291 
    292 void TagMatcher::setSrc(char * src){
    293   srcbuf = src;
    294 }
    295 
    296 void TagMatcher::Advance_buffer(){
    297   buf_base += BUFFER_SIZE;
    298   stream_index=0;
    299   tags_buf_cur = 0;
    300   att_index = 0;
    301   for(int i=0; i< depth; i++){
    302     if(&tags_buf[tags_buf_cur]!=tag_stack[i])
    303       memcpy(&tags_buf[tags_buf_cur],tag_stack[i],tag_lgth_stack[i]);
    304     tag_stack[i] = &tags_buf[tags_buf_cur];
    305     tags_buf_cur += tag_lgth_stack[i];
    306   }
    307   if(state == InStartTag) {
    308       memcpy(&tags_buf[tags_buf_cur],&srcbuf[finalStartPos],tag_lgth_stack[depth]);
    309       tag_stack[depth] = &tags_buf[tags_buf_cur];
    310       tags_buf_cur += tag_lgth_stack[depth];
    311   }
    312   else if(state == InEndTag) {
    313      memcpy(&tags_buf[tags_buf_cur],tag_stack[depth],tag_lgth_stack[depth]);
    314     tag_stack[depth] = &tags_buf[tags_buf_cur];
    315     tags_buf_cur += tag_lgth_stack[depth];
    316   }
    317   else if(state == InAttName) {
    318       memcpy(&tags_buf[tags_buf_cur],InAtt.start,InAtt.lgth);
    319       InAtt.start = &tags_buf[tags_buf_cur];
    320       tags_buf_cur += InAtt.lgth;
    321   }
    322   srcbuf[-1] = srcbuf[BUFFER_SIZE-1];
    323 }
     285template <uint64_t BUF_SIZE, uint64_t OVER_SIZE>
     286TagMatcher<BUF_SIZE, OVER_SIZE>::TagMatcher(){
     287  this->stream_index = 0;
     288  this->depth = 0;
     289  this->buf_base = 0;
     290  this->state = Clear;
     291  this->mode = StartOfFile;
     292  this->InFinalEndTag = 0;
     293  this->NameFollows[BUF_SIZE/BLOCK_SIZE]=simd<1>::constant<1>();// TODO - verify simd_const_1(1);  //sentinel
     294}
     295
     296template <uint64_t BUF_SIZE, uint64_t OVER_SIZE>
     297TagMatcher<BUF_SIZE, OVER_SIZE>::~TagMatcher(){
     298
     299}
     300
     301template <uint64_t BUF_SIZE, uint64_t OVER_SIZE>
     302void TagMatcher<BUF_SIZE, OVER_SIZE>::setSrc(char * src){
     303  this->srcbuf = src;
     304}
     305
     306template <uint64_t BUF_SIZE, uint64_t OVER_SIZE>
     307void TagMatcher<BUF_SIZE, OVER_SIZE>::Advance_buffer(){
     308  this->buf_base += BUF_SIZE;
     309  this->stream_index=0;
     310  this->tags_buf_cur = 0;
     311  this->att_index = 0;
     312  for(int i=0; i< this->depth; i++){
     313    if(&this->tags_buf[this->tags_buf_cur]!=this->tag_stack[i])
     314      memcpy(&this->tags_buf[this->tags_buf_cur],this->tag_stack[i],this->tag_lgth_stack[i]);
     315    this->tag_stack[i] = &this->tags_buf[tags_buf_cur];
     316    this->tags_buf_cur += this->tag_lgth_stack[i];
     317  }
     318  if(this->state == InStartTag) {
     319      memcpy(&this->tags_buf[this->tags_buf_cur],&this->srcbuf[this->finalStartPos],this->tag_lgth_stack[this->depth]);
     320      this->tag_stack[depth] = &this->tags_buf[this->tags_buf_cur];
     321      this->tags_buf_cur += this->tag_lgth_stack[this->depth];
     322  }
     323  else if(this->state == InEndTag) {
     324     memcpy(&this->tags_buf[this->tags_buf_cur],this->tag_stack[this->depth],this->tag_lgth_stack[this->depth]);
     325    this->tag_stack[depth] = &this->tags_buf[this->tags_buf_cur];
     326    this->tags_buf_cur += this->tag_lgth_stack[this->depth];
     327  }
     328  else if(this->state == InAttName) {
     329      memcpy(&this->tags_buf[this->tags_buf_cur],this->InAtt.start,this->InAtt.lgth);
     330      this->InAtt.start = &this->tags_buf[tags_buf_cur];
     331      this->tags_buf_cur += this->InAtt.lgth;
     332  }
     333  this->srcbuf[-1] = this->srcbuf[BUF_SIZE-1];
     334}
     335
    324336
    325337#endif /* TAGMATCHER_HPP_ */
Note: See TracChangeset for help on using the changeset viewer.