Ignore:
Timestamp:
May 24, 2012, 6:51:24 PM (7 years ago)
Author:
ksherdy
Message:

Reverted to 2142.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • proto/parabix2/src/TagMatcher.hpp

    r2147 r2155  
    44#include "../lib/bitblock.hpp"
    55
     6#define MAX_DEPTH 100
    67#include <algorithm>
    78#include <iostream>
     
    910using namespace std;
    1011
     12
    1113#define MAX_DEPTH 100
    1214#define MAX_ATTS 100
     
    1719};
    1820
    19 template <uint64_t BUF_SIZE, uint64_t OVER_SIZE>
    2021class TagMatcher {
    21 
    22 public:
     22  public:
     23  BitBlock tagMarks[BUFFER_SIZE/BLOCK_SIZE];
     24  BitBlock miscMarks[BUFFER_SIZE/BLOCK_SIZE];
     25  char tags_buf[BUFFER_SIZE];
     26  int tags_buf_cur;
     27  int stream_index;
     28  char * srcbuf;
     29  int depth;
     30  int inTagPos;
     31  int finalStartPos;
     32  char* tag_stack[MAX_DEPTH];
     33  int tag_lgth_stack[MAX_DEPTH];
     34  BitBlock NameFollows[BUFFER_SIZE/BLOCK_SIZE+1]; // 1 extra block for sentinel
     35  int buf_base;
     36  enum TagMatchState {InStartTag, InEndTag, InAttName, Clear} state;
     37  enum TagMatchMode {StartOfFile, InFile} mode;
     38  struct attribute Attr[MAX_ATTS];
     39  struct attribute InAtt;
     40  int att_index;
     41  int InFinalEndTag;
     42
    2343  TagMatcher();
    2444  ~TagMatcher();
     
    3050  int does_match(char * s1, char * s2, int lgth);
    3151  int lookup_or_insert(char*s, int lgth);
    32 
    33     int depth;
    34 
    35 private:
    36   BitBlock tagMarks[BUF_SIZE/BLOCK_SIZE];
    37   BitBlock miscMarks[BUF_SIZE/BLOCK_SIZE];
    38   char tags_buf[BUF_SIZE];
    39   int tags_buf_cur;
    40   int stream_index;
    41   char * srcbuf;
    42 
    43   int inTagPos;
    44   int finalStartPos;
    45   char* tag_stack[MAX_DEPTH];
    46   int tag_lgth_stack[MAX_DEPTH];
    47   BitBlock NameFollows[BUF_SIZE/BLOCK_SIZE+1]; // 1 extra block for sentinel
    48   int buf_base;
    49   enum TagMatchState {InStartTag, InEndTag, InAttName, Clear} state;
    50   enum TagMatchMode {StartOfFile, InFile} mode;
    51   struct attribute Attr[MAX_ATTS];
    52   struct attribute InAtt;
    53   int att_index;
    54   int InFinalEndTag;
    55 
    5652};
    5753
    58 template <uint64_t BUF_SIZE, uint64_t OVER_SIZE>
    59 int TagMatcher<BUF_SIZE, OVER_SIZE>::does_match(char * s1, char * s2, int lgth){
     54int TagMatcher::lookup_or_insert(char* s, int lgth){
     55  for(int i=0; i< att_index; i++)
     56    if(lgth == Attr[i].lgth &&  does_match(s,Attr[i].start,lgth))
     57      return 1;
     58
     59  Attr[att_index].start = s;
     60  Attr[att_index].lgth = lgth;
     61  att_index++;
     62  return 0;
     63}
     64
     65int TagMatcher::does_match(char * s1, char * s2, int lgth){
    6066    int matchlen = 0;
    6167    int i=0;
     
    6369      /* full 16 byte match */
    6470
    65       if (bitblock::any(simd_xor(bitblock::load_unaligned((BitBlock*)&s1[i]),
    66                 bitblock::load_unaligned((BitBlock*)&s2[i])))) {
    67         return 0;
     71      if (bitblock::any(simd_xor(bitblock::load_unaligned((BitBlock*)&s1[i]), 
     72                                bitblock::load_unaligned((BitBlock*)&s2[i])))) {
     73                return 0;
    6874      }
    6975      else {
    70         lgth -= sizeof(BitBlock);
    71         i +=sizeof(BitBlock);
    72       }
    73     }
    74 
    75     scanword_t temp = ~hsimd<8>::signmask(simd<8>::eq(bitblock::load_unaligned((BitBlock*)&s1[i]),
     76                lgth -= sizeof(BitBlock);
     77                i +=sizeof(BitBlock);
     78      }
     79    }
     80
     81        scanword_t temp = ~hsimd<8>::signmask(simd<8>::eq(bitblock::load_unaligned((BitBlock*)&s1[i]),
    7682                                                  bitblock::load_unaligned((BitBlock*)&s2[i])));
    7783
    78     return lgth <= scan_forward_zeroes(temp);
    79 }
    80 
    81 
    82 template <uint64_t BUF_SIZE, uint64_t OVER_SIZE>
    83 int TagMatcher<BUF_SIZE, OVER_SIZE>::lookup_or_insert(char* s, int lgth){
    84   for(int i=0; i< this->att_index; i++)
    85     if(lgth == this->Attr[i].lgth &&  this->does_match(s,this->Attr[i].start,lgth))
    86       return 1;
    87 
    88   this->Attr[att_index].start = s;
    89   this->Attr[att_index].lgth = lgth;
    90   this->att_index++;
    91   return 0;
    92 }
    93 
    94 
    95 template <uint64_t BUF_SIZE, uint64_t OVER_SIZE>
    96 int TagMatcher<BUF_SIZE, OVER_SIZE>:: tag_match(int pos, int chars_avail) {
     84        return lgth <= scan_forward_zeroes(temp);
     85}
     86
     87
     88int TagMatcher:: tag_match(int pos, int chars_avail) {
    9789        int rt_val=0;
    9890//      end tag
    99     if(this->srcbuf[pos]=='/' ){
     91        if(srcbuf[pos]=='/' ){
    10092          pos++;
    101       this->depth--;
    102       if (this->depth<0)
     93          depth--;
     94          if (depth<0)
    10395            return pos;
    104       int lgth = this->tag_lgth_stack[depth];
    105 
    106       if (does_match(this->tag_stack[depth],&this->srcbuf[pos],lgth) && ((this->srcbuf[pos+lgth] == '>') ||(this->srcbuf[pos+lgth] <= ' '))) rt_val=0;
    107       else if (pos + lgth >= BUF_SIZE + OVER_SIZE) {
    108         this->state = InEndTag;
    109         this-> inTagPos = BUF_SIZE - pos;
     96          int lgth = tag_lgth_stack[depth];
     97
     98          if (does_match(tag_stack[depth],&srcbuf[pos],lgth) && ((srcbuf[pos+lgth] == '>') ||(srcbuf[pos+lgth] <= ' '))) rt_val=0;
     99          else if (pos + lgth >= BUFFER_SIZE + OVERLAP_BUFSIZE) {
     100            state = InEndTag;
     101            inTagPos = BUFFER_SIZE - pos;
    110102            rt_val=0;
    111103          }
     
    117109          }
    118110
    119       if (this->depth == 0){
    120         while(this->srcbuf[pos]!='>'){
     111          if (depth == 0){
     112            while(srcbuf[pos]!='>'){
    121113              pos++;
    122114              if(pos>=chars_avail){
    123         this->InFinalEndTag = 1;
     115                InFinalEndTag = 1;
    124116                return 0;
    125117              }
    126118            }
    127         pos = bitstream_scan(this->miscMarks,pos+1);
    128             if(pos!=chars_avail){
    129           fprintf(stderr,"illegal content after root element at position = %i\n",this->buf_base+pos);
    130               exit(-1);
    131             }
    132           }
    133           return rt_val;
    134         }
    135 //      empty tag
    136     else if(this->srcbuf[pos]=='>'){
    137       this->depth--;
    138       if (this->depth == 0){
    139         while(this->srcbuf[pos]!='>')
    140               pos++;
    141         pos = bitstream_scan(this->miscMarks,pos+1);
    142 
     119            pos = bitstream_scan(miscMarks,pos+1);
    143120            if(pos!=chars_avail){
    144121              fprintf(stderr,"illegal content after root element at position = %i\n",buf_base+pos);
     
    146123            }
    147124          }
     125          return rt_val;
     126        }
     127//      empty tag
     128        else if(srcbuf[pos]=='>'){
     129          depth--;
     130          if (depth == 0){
     131            while(srcbuf[pos]!='>')
     132              pos++;
     133            pos = bitstream_scan(miscMarks,pos+1);
     134
     135            if(pos!=chars_avail){
     136              fprintf(stderr,"illegal content after root element at position = %i\n",buf_base+pos);
     137              exit(-1);
     138            }
     139          }
    148140        }
    149141//      start tag
    150     else if(this->srcbuf[pos-1]=='<'){
    151       this->att_index = 0;
    152       if(this->depth<MAX_DEPTH){
    153         int end_pos = bitstream_scan(this->NameFollows,pos);
    154         this->tag_lgth_stack[this->depth] = end_pos-pos;
    155         this->tag_stack[depth] = &this->srcbuf[pos];
    156         if(end_pos<BUF_SIZE){
    157          this->depth++;
     142        else if(srcbuf[pos-1]=='<'){
     143          att_index = 0;
     144          if(depth<MAX_DEPTH){
     145            int end_pos = bitstream_scan(NameFollows,pos);
     146            tag_lgth_stack[depth] = end_pos-pos;
     147            tag_stack[depth] = &srcbuf[pos];
     148            if(end_pos<BUFFER_SIZE){
     149              depth++;
    158150            }
    159151            else{
    160           this->state = InStartTag;
    161           this->finalStartPos = pos;
     152              state = InStartTag;
     153              finalStartPos = pos;
    162154            }
    163155          }
    164156          else{
    165         fprintf(stderr,"Max nesting depth exceeded at position =%i. depth = %i\n",this->buf_base+pos, this->depth);
     157            fprintf(stderr,"Max nesting depth exceeded at position =%i. depth = %i\n",buf_base+pos, depth);
    166158            exit(-1);
    167159          }
     
    169161//      attribute
    170162        else{
    171       int end_pos = bitstream_scan(this->NameFollows,pos);
    172       if(end_pos<BUF_SIZE){
    173         if(lookup_or_insert(&this->srcbuf[pos], end_pos-pos)){
    174           fprintf(stderr,"Attribute name is not unique at position =%i.\n",this->buf_base+pos);
     163          int end_pos = bitstream_scan(NameFollows,pos);
     164          if(end_pos<BUFFER_SIZE){
     165            if(lookup_or_insert(&srcbuf[pos], end_pos-pos)){
     166              fprintf(stderr,"Attribute name is not unique at position =%i.\n",buf_base+pos);
    175167              exit(-1);
    176168            }
    177169          }
    178170          else{
    179         this->state = InAttName;
    180         this->InAtt.start = &this->srcbuf[pos];
    181         this->InAtt.lgth = BUF_SIZE-pos;
     171            state = InAttName;
     172            InAtt.start = &srcbuf[pos];
     173            InAtt.lgth = BUFFER_SIZE-pos;
    182174          }
    183175        }
     
    185177}
    186178
    187 template <uint64_t BUF_SIZE, uint64_t OVER_SIZE>
    188 int TagMatcher<BUF_SIZE, OVER_SIZE>::StreamScan(int chars_avail) {
     179
     180int TagMatcher::StreamScan(int chars_avail) {
    189181
    190182        int blk;
     
    192184        int block_pos = 0;
    193185
    194     if(this->mode == StartOfFile){
     186        if(mode == StartOfFile){
    195187          int pos = bitstream_scan(miscMarks,0);
    196188          if (pos==chars_avail){
    197         fprintf(stderr,"no element at position =%i.\n",this->buf_base+pos);
     189            fprintf(stderr,"no element at position =%i.\n",buf_base+pos);
    198190            exit(-1);
    199191          }
    200       if(this->srcbuf[pos-1]!='<'|| this->srcbuf[pos]=='!'|| this->srcbuf[pos]=='/'){
     192          if(srcbuf[pos-1]!='<'|| srcbuf[pos]=='!'||srcbuf[pos]=='/'){
    201193#ifdef DUMP
    202194print_register<BitBlock>("srcbuf", bitblock::load_unaligned((BitBlock *) srcbuf));
     
    205197            exit(-1);
    206198          }
    207       this->mode = InFile;
     199          mode = InFile;
    208200        }
    209201        for (blk = 0; blk < blk_counts; blk++) {
    210         scanword_t s = ((scanword_t*)this->tagMarks)[blk];
     202                scanword_t s = ((scanword_t*)tagMarks)[blk];
    211203                while(s) {
    212204                        int code = tag_match(scan_forward_zeroes(s) + block_pos, chars_avail);
     
    220212}
    221213
    222 template <uint64_t BUF_SIZE, uint64_t OVER_SIZE>
    223 void TagMatcher<BUF_SIZE, OVER_SIZE>::store_streams(BitBlock tagMark, BitBlock NameFollow, BitBlock miscMark, int chars_avail){
     214void TagMatcher::store_streams(BitBlock tagMark, BitBlock NameFollow, BitBlock miscMark, int chars_avail){
    224215#ifdef DUMP
    225216print_register<BitBlock>("tagMark", tagMark);
     
    229220printf("stream_index = %i\n", stream_index);
    230221#endif
    231   this->tagMarks[this->stream_index] = tagMark;
    232   this->miscMarks[this->stream_index] = simd_not(miscMark);
    233   this->NameFollows[this->stream_index] = NameFollow;
    234   this->stream_index++;
    235   if(this->stream_index==1){
    236 
    237     if (this->InFinalEndTag == 1){
     222  tagMarks[stream_index] = tagMark;
     223  miscMarks[stream_index] = simd_not(miscMark);
     224  NameFollows[stream_index] = NameFollow;
     225  stream_index++;
     226  if(stream_index==1){
     227
     228    if (InFinalEndTag == 1){
    238229      int pos = -1;
    239       while(this->srcbuf[pos]!='>'){
     230      while(srcbuf[pos]!='>'){
    240231        pos++;
    241232        if(pos>=chars_avail){
    242       this->InFinalEndTag = 1;
     233          InFinalEndTag = 1;
    243234          return;
    244235        }
    245236      }
    246       pos = bitstream_scan(this->miscMarks,pos+1);
     237      pos = bitstream_scan(miscMarks,pos+1);
    247238#ifdef DUMP
    248239print_register<BitBlock>("miscMarks[0]", miscMarks[0]);
     
    255246    }
    256247
    257     if(this->state == InStartTag) {
    258       this->state = this->Clear;
    259       int remain_lgth = bitstream_scan(this->NameFollows,0);
    260       memcpy(&this->tags_buf[this->tags_buf_cur],this->srcbuf,remain_lgth);
    261       this->tag_lgth_stack[this->depth] += remain_lgth;
    262       this->depth++;
    263     }
    264     else if (this->state == InEndTag) {
    265       this->state = Clear;
    266       int lgth = this->tag_lgth_stack[this->depth];
    267       if (does_match(this->tag_stack[this->depth]+this->inTagPos,this->srcbuf,lgth-this->inTagPos) && ((this->srcbuf[lgth-this->inTagPos] == '>') ||(this->srcbuf[lgth-this->inTagPos] <= ' '))) return ;
     248    if(state == InStartTag) {
     249      state = Clear;
     250      int remain_lgth = bitstream_scan(NameFollows,0);
     251      memcpy(&tags_buf[tags_buf_cur],srcbuf,remain_lgth);
     252      tag_lgth_stack[depth] += remain_lgth;
     253      depth++;
     254    }
     255    else if (state == InEndTag) {
     256      state = Clear;
     257      int lgth = tag_lgth_stack[depth];
     258      if (does_match(tag_stack[depth]+inTagPos,srcbuf,lgth-inTagPos) && ((srcbuf[lgth-inTagPos] == '>') ||(srcbuf[lgth-inTagPos] <= ' '))) return ;
    268259      else {
    269260          fprintf(stderr,"tag name mismatch at position = %i\n",buf_base);
     
    271262      }
    272263    }
    273     else if (this->state == InAttName) {
    274       this->state = Clear;
    275       int remain_lgth = bitstream_scan(this->NameFollows,0);
    276       memcpy(&this->tags_buf[this->tags_buf_cur],this->srcbuf,remain_lgth);
    277       if(lookup_or_insert(this->InAtt.start, this->InAtt.lgth+remain_lgth)){
     264    else if (state == InAttName) {
     265      state = Clear;
     266      int remain_lgth = bitstream_scan(NameFollows,0);
     267      memcpy(&tags_buf[tags_buf_cur],srcbuf,remain_lgth);
     268      if(lookup_or_insert(InAtt.start, InAtt.lgth+remain_lgth)){
    278269              fprintf(stderr,"Attribute name is not unique at position =%i.\n",buf_base);
    279270              exit(-1);
     
    283274}
    284275
    285 template <uint64_t BUF_SIZE, uint64_t OVER_SIZE>
    286 TagMatcher<BUF_SIZE, OVER_SIZE>::TagMatcher(){
    287   this->stream_index = 0;
    288   this->depth = 0;
    289   this->buf_base = 0;
    290   this->state = Clear;
    291   this->mode = StartOfFile;
    292   this->InFinalEndTag = 0;
    293   this->NameFollows[BUF_SIZE/BLOCK_SIZE]=simd<1>::constant<1>();// TODO - verify simd_const_1(1);  //sentinel
    294 }
    295 
    296 template <uint64_t BUF_SIZE, uint64_t OVER_SIZE>
    297 TagMatcher<BUF_SIZE, OVER_SIZE>::~TagMatcher(){
    298 
    299 }
    300 
    301 template <uint64_t BUF_SIZE, uint64_t OVER_SIZE>
    302 void TagMatcher<BUF_SIZE, OVER_SIZE>::setSrc(char * src){
    303   this->srcbuf = src;
    304 }
    305 
    306 template <uint64_t BUF_SIZE, uint64_t OVER_SIZE>
    307 void TagMatcher<BUF_SIZE, OVER_SIZE>::Advance_buffer(){
    308   this->buf_base += BUF_SIZE;
    309   this->stream_index=0;
    310   this->tags_buf_cur = 0;
    311   this->att_index = 0;
    312   for(int i=0; i< this->depth; i++){
    313     if(&this->tags_buf[this->tags_buf_cur]!=this->tag_stack[i])
    314       memcpy(&this->tags_buf[this->tags_buf_cur],this->tag_stack[i],this->tag_lgth_stack[i]);
    315     this->tag_stack[i] = &this->tags_buf[tags_buf_cur];
    316     this->tags_buf_cur += this->tag_lgth_stack[i];
    317   }
    318   if(this->state == InStartTag) {
    319       memcpy(&this->tags_buf[this->tags_buf_cur],&this->srcbuf[this->finalStartPos],this->tag_lgth_stack[this->depth]);
    320       this->tag_stack[depth] = &this->tags_buf[this->tags_buf_cur];
    321       this->tags_buf_cur += this->tag_lgth_stack[this->depth];
    322   }
    323   else if(this->state == InEndTag) {
    324      memcpy(&this->tags_buf[this->tags_buf_cur],this->tag_stack[this->depth],this->tag_lgth_stack[this->depth]);
    325     this->tag_stack[depth] = &this->tags_buf[this->tags_buf_cur];
    326     this->tags_buf_cur += this->tag_lgth_stack[this->depth];
    327   }
    328   else if(this->state == InAttName) {
    329       memcpy(&this->tags_buf[this->tags_buf_cur],this->InAtt.start,this->InAtt.lgth);
    330       this->InAtt.start = &this->tags_buf[tags_buf_cur];
    331       this->tags_buf_cur += this->InAtt.lgth;
    332   }
    333   this->srcbuf[-1] = this->srcbuf[BUF_SIZE-1];
    334 }
    335 
     276TagMatcher::TagMatcher(){
     277  stream_index = 0;
     278  depth = 0;
     279  buf_base = 0;
     280  state = Clear;
     281  mode = StartOfFile;
     282  InFinalEndTag = 0;
     283  NameFollows[BUFFER_SIZE/BLOCK_SIZE]=simd<1>::constant<1>();// TODO - verify simd_const_1(1);  //sentinel
     284}
     285
     286
     287
     288TagMatcher::~TagMatcher(){
     289
     290}
     291
     292void TagMatcher::setSrc(char * src){
     293  srcbuf = src;
     294}
     295
     296void TagMatcher::Advance_buffer(){
     297  buf_base += BUFFER_SIZE;
     298  stream_index=0;
     299  tags_buf_cur = 0;
     300  att_index = 0;
     301  for(int i=0; i< depth; i++){
     302    if(&tags_buf[tags_buf_cur]!=tag_stack[i])
     303      memcpy(&tags_buf[tags_buf_cur],tag_stack[i],tag_lgth_stack[i]);
     304    tag_stack[i] = &tags_buf[tags_buf_cur];
     305    tags_buf_cur += tag_lgth_stack[i];
     306  }
     307  if(state == InStartTag) {
     308      memcpy(&tags_buf[tags_buf_cur],&srcbuf[finalStartPos],tag_lgth_stack[depth]);
     309      tag_stack[depth] = &tags_buf[tags_buf_cur];
     310      tags_buf_cur += tag_lgth_stack[depth];
     311  }
     312  else if(state == InEndTag) {
     313     memcpy(&tags_buf[tags_buf_cur],tag_stack[depth],tag_lgth_stack[depth]);
     314    tag_stack[depth] = &tags_buf[tags_buf_cur];
     315    tags_buf_cur += tag_lgth_stack[depth];
     316  }
     317  else if(state == InAttName) {
     318      memcpy(&tags_buf[tags_buf_cur],InAtt.start,InAtt.lgth);
     319      InAtt.start = &tags_buf[tags_buf_cur];
     320      tags_buf_cur += InAtt.lgth;
     321  }
     322  srcbuf[-1] = srcbuf[BUFFER_SIZE-1];
     323}
    336324
    337325#endif /* TAGMATCHER_HPP_ */
Note: See TracChangeset for help on using the changeset viewer.