Changeset 2147 for proto


Ignore:
Timestamp:
May 24, 2012, 4:35:39 PM (7 years ago)
Author:
ksherdy
Message:

Template TagMatcher?.hpp to rid class of global variable dependencies.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • proto/parabix2/src/TagMatcher.hpp

    r2126 r2147  
    44#include "../lib/bitblock.hpp"
    55
    6 #define MAX_DEPTH 100
    76#include <algorithm>
    87#include <iostream>
     
    109using namespace std;
    1110
    12 
    1311#define MAX_DEPTH 100
    1412#define MAX_ATTS 100
     
    1917};
    2018
     19template <uint64_t BUF_SIZE, uint64_t OVER_SIZE>
    2120class TagMatcher {
    22   public:
    23   BitBlock tagMarks[BUFFER_SIZE/BLOCK_SIZE];
    24   BitBlock miscMarks[BUFFER_SIZE/BLOCK_SIZE];
    25   char tags_buf[BUFFER_SIZE];
    26   int tags_buf_cur;
    27   int stream_index;
    28   char * srcbuf;
    29   int depth;
    30   int inTagPos;
    31   int finalStartPos;
    32   char* tag_stack[MAX_DEPTH];
    33   int tag_lgth_stack[MAX_DEPTH];
    34   BitBlock NameFollows[BUFFER_SIZE/BLOCK_SIZE+1]; // 1 extra block for sentinel
    35   int buf_base;
    36   enum TagMatchState {InStartTag, InEndTag, InAttName, Clear} state;
    37   enum TagMatchMode {StartOfFile, InFile} mode;
    38   struct attribute Attr[MAX_ATTS];
    39   struct attribute InAtt;
    40   int att_index;
    41   int InFinalEndTag;
    42 
     21
     22public:
    4323  TagMatcher();
    4424  ~TagMatcher();
     
    5030  int does_match(char * s1, char * s2, int lgth);
    5131  int lookup_or_insert(char*s, int lgth);
     32
     33    int depth;
     34
     35private:
     36  BitBlock tagMarks[BUF_SIZE/BLOCK_SIZE];
     37  BitBlock miscMarks[BUF_SIZE/BLOCK_SIZE];
     38  char tags_buf[BUF_SIZE];
     39  int tags_buf_cur;
     40  int stream_index;
     41  char * srcbuf;
     42
     43  int inTagPos;
     44  int finalStartPos;
     45  char* tag_stack[MAX_DEPTH];
     46  int tag_lgth_stack[MAX_DEPTH];
     47  BitBlock NameFollows[BUF_SIZE/BLOCK_SIZE+1]; // 1 extra block for sentinel
     48  int buf_base;
     49  enum TagMatchState {InStartTag, InEndTag, InAttName, Clear} state;
     50  enum TagMatchMode {StartOfFile, InFile} mode;
     51  struct attribute Attr[MAX_ATTS];
     52  struct attribute InAtt;
     53  int att_index;
     54  int InFinalEndTag;
     55
    5256};
    5357
    54 int TagMatcher::lookup_or_insert(char* s, int lgth){
    55   for(int i=0; i< att_index; i++)
    56     if(lgth == Attr[i].lgth &&  does_match(s,Attr[i].start,lgth))
    57       return 1;
    58 
    59   Attr[att_index].start = s;
    60   Attr[att_index].lgth = lgth;
    61   att_index++;
    62   return 0;
    63 }
    64 
    65 int TagMatcher::does_match(char * s1, char * s2, int lgth){
     58template <uint64_t BUF_SIZE, uint64_t OVER_SIZE>
     59int TagMatcher<BUF_SIZE, OVER_SIZE>::does_match(char * s1, char * s2, int lgth){
    6660    int matchlen = 0;
    6761    int i=0;
     
    6963      /* full 16 byte match */
    7064
    71       if (bitblock::any(simd_xor(bitblock::load_unaligned((BitBlock*)&s1[i]), 
    72                                 bitblock::load_unaligned((BitBlock*)&s2[i])))) {
    73                 return 0;
     65      if (bitblock::any(simd_xor(bitblock::load_unaligned((BitBlock*)&s1[i]),
     66                bitblock::load_unaligned((BitBlock*)&s2[i])))) {
     67        return 0;
    7468      }
    7569      else {
    76                 lgth -= sizeof(BitBlock);
    77                 i +=sizeof(BitBlock);
    78       }
    79     }
    80 
    81         scanword_t temp = ~hsimd<8>::signmask(simd<8>::eq(bitblock::load_unaligned((BitBlock*)&s1[i]),
     70        lgth -= sizeof(BitBlock);
     71        i +=sizeof(BitBlock);
     72      }
     73    }
     74
     75    scanword_t temp = ~hsimd<8>::signmask(simd<8>::eq(bitblock::load_unaligned((BitBlock*)&s1[i]),
    8276                                                  bitblock::load_unaligned((BitBlock*)&s2[i])));
    8377
    84         return lgth <= scan_forward_zeroes(temp);
    85 }
    86 
    87 
    88 int TagMatcher:: tag_match(int pos, int chars_avail) {
     78    return lgth <= scan_forward_zeroes(temp);
     79}
     80
     81
     82template <uint64_t BUF_SIZE, uint64_t OVER_SIZE>
     83int TagMatcher<BUF_SIZE, OVER_SIZE>::lookup_or_insert(char* s, int lgth){
     84  for(int i=0; i< this->att_index; i++)
     85    if(lgth == this->Attr[i].lgth &&  this->does_match(s,this->Attr[i].start,lgth))
     86      return 1;
     87
     88  this->Attr[att_index].start = s;
     89  this->Attr[att_index].lgth = lgth;
     90  this->att_index++;
     91  return 0;
     92}
     93
     94
     95template <uint64_t BUF_SIZE, uint64_t OVER_SIZE>
     96int TagMatcher<BUF_SIZE, OVER_SIZE>:: tag_match(int pos, int chars_avail) {
    8997        int rt_val=0;
    9098//      end tag
    91         if(srcbuf[pos]=='/' ){
     99    if(this->srcbuf[pos]=='/' ){
    92100          pos++;
    93           depth--;
    94           if (depth<0)
     101      this->depth--;
     102      if (this->depth<0)
    95103            return pos;
    96           int lgth = tag_lgth_stack[depth];
    97 
    98           if (does_match(tag_stack[depth],&srcbuf[pos],lgth) && ((srcbuf[pos+lgth] == '>') ||(srcbuf[pos+lgth] <= ' '))) rt_val=0;
    99           else if (pos + lgth >= BUFFER_SIZE + OVERLAP_BUFSIZE) {
    100             state = InEndTag;
    101             inTagPos = BUFFER_SIZE - pos;
     104      int lgth = this->tag_lgth_stack[depth];
     105
     106      if (does_match(this->tag_stack[depth],&this->srcbuf[pos],lgth) && ((this->srcbuf[pos+lgth] == '>') ||(this->srcbuf[pos+lgth] <= ' '))) rt_val=0;
     107      else if (pos + lgth >= BUF_SIZE + OVER_SIZE) {
     108        this->state = InEndTag;
     109        this-> inTagPos = BUF_SIZE - pos;
    102110            rt_val=0;
    103111          }
     
    109117          }
    110118
    111           if (depth == 0){
    112             while(srcbuf[pos]!='>'){
     119      if (this->depth == 0){
     120        while(this->srcbuf[pos]!='>'){
    113121              pos++;
    114122              if(pos>=chars_avail){
    115                 InFinalEndTag = 1;
     123        this->InFinalEndTag = 1;
    116124                return 0;
    117125              }
    118126            }
    119             pos = bitstream_scan(miscMarks,pos+1);
     127        pos = bitstream_scan(this->miscMarks,pos+1);
     128            if(pos!=chars_avail){
     129          fprintf(stderr,"illegal content after root element at position = %i\n",this->buf_base+pos);
     130              exit(-1);
     131            }
     132          }
     133          return rt_val;
     134        }
     135//      empty tag
     136    else if(this->srcbuf[pos]=='>'){
     137      this->depth--;
     138      if (this->depth == 0){
     139        while(this->srcbuf[pos]!='>')
     140              pos++;
     141        pos = bitstream_scan(this->miscMarks,pos+1);
     142
    120143            if(pos!=chars_avail){
    121144              fprintf(stderr,"illegal content after root element at position = %i\n",buf_base+pos);
     
    123146            }
    124147          }
    125           return rt_val;
    126         }
    127 //      empty tag
    128         else if(srcbuf[pos]=='>'){
    129           depth--;
    130           if (depth == 0){
    131             while(srcbuf[pos]!='>')
    132               pos++;
    133             pos = bitstream_scan(miscMarks,pos+1);
    134 
    135             if(pos!=chars_avail){
    136               fprintf(stderr,"illegal content after root element at position = %i\n",buf_base+pos);
    137               exit(-1);
    138             }
    139           }
    140148        }
    141149//      start tag
    142         else if(srcbuf[pos-1]=='<'){
    143           att_index = 0;
    144           if(depth<MAX_DEPTH){
    145             int end_pos = bitstream_scan(NameFollows,pos);
    146             tag_lgth_stack[depth] = end_pos-pos;
    147             tag_stack[depth] = &srcbuf[pos];
    148             if(end_pos<BUFFER_SIZE){
    149               depth++;
     150    else if(this->srcbuf[pos-1]=='<'){
     151      this->att_index = 0;
     152      if(this->depth<MAX_DEPTH){
     153        int end_pos = bitstream_scan(this->NameFollows,pos);
     154        this->tag_lgth_stack[this->depth] = end_pos-pos;
     155        this->tag_stack[depth] = &this->srcbuf[pos];
     156        if(end_pos<BUF_SIZE){
     157         this->depth++;
    150158            }
    151159            else{
    152               state = InStartTag;
    153               finalStartPos = pos;
     160          this->state = InStartTag;
     161          this->finalStartPos = pos;
    154162            }
    155163          }
    156164          else{
    157             fprintf(stderr,"Max nesting depth exceeded at position =%i. depth = %i\n",buf_base+pos, depth);
     165        fprintf(stderr,"Max nesting depth exceeded at position =%i. depth = %i\n",this->buf_base+pos, this->depth);
    158166            exit(-1);
    159167          }
     
    161169//      attribute
    162170        else{
    163           int end_pos = bitstream_scan(NameFollows,pos);
    164           if(end_pos<BUFFER_SIZE){
    165             if(lookup_or_insert(&srcbuf[pos], end_pos-pos)){
    166               fprintf(stderr,"Attribute name is not unique at position =%i.\n",buf_base+pos);
     171      int end_pos = bitstream_scan(this->NameFollows,pos);
     172      if(end_pos<BUF_SIZE){
     173        if(lookup_or_insert(&this->srcbuf[pos], end_pos-pos)){
     174          fprintf(stderr,"Attribute name is not unique at position =%i.\n",this->buf_base+pos);
    167175              exit(-1);
    168176            }
    169177          }
    170178          else{
    171             state = InAttName;
    172             InAtt.start = &srcbuf[pos];
    173             InAtt.lgth = BUFFER_SIZE-pos;
     179        this->state = InAttName;
     180        this->InAtt.start = &this->srcbuf[pos];
     181        this->InAtt.lgth = BUF_SIZE-pos;
    174182          }
    175183        }
     
    177185}
    178186
    179 
    180 int TagMatcher::StreamScan(int chars_avail) {
     187template <uint64_t BUF_SIZE, uint64_t OVER_SIZE>
     188int TagMatcher<BUF_SIZE, OVER_SIZE>::StreamScan(int chars_avail) {
    181189
    182190        int blk;
     
    184192        int block_pos = 0;
    185193
    186         if(mode == StartOfFile){
     194    if(this->mode == StartOfFile){
    187195          int pos = bitstream_scan(miscMarks,0);
    188196          if (pos==chars_avail){
    189             fprintf(stderr,"no element at position =%i.\n",buf_base+pos);
     197        fprintf(stderr,"no element at position =%i.\n",this->buf_base+pos);
    190198            exit(-1);
    191199          }
    192           if(srcbuf[pos-1]!='<'|| srcbuf[pos]=='!'||srcbuf[pos]=='/'){
     200      if(this->srcbuf[pos-1]!='<'|| this->srcbuf[pos]=='!'|| this->srcbuf[pos]=='/'){
    193201#ifdef DUMP
    194202print_register<BitBlock>("srcbuf", bitblock::load_unaligned((BitBlock *) srcbuf));
     
    197205            exit(-1);
    198206          }
    199           mode = InFile;
     207      this->mode = InFile;
    200208        }
    201209        for (blk = 0; blk < blk_counts; blk++) {
    202                 scanword_t s = ((scanword_t*)tagMarks)[blk];
     210        scanword_t s = ((scanword_t*)this->tagMarks)[blk];
    203211                while(s) {
    204212                        int code = tag_match(scan_forward_zeroes(s) + block_pos, chars_avail);
     
    212220}
    213221
    214 void TagMatcher::store_streams(BitBlock tagMark, BitBlock NameFollow, BitBlock miscMark, int chars_avail){
     222template <uint64_t BUF_SIZE, uint64_t OVER_SIZE>
     223void TagMatcher<BUF_SIZE, OVER_SIZE>::store_streams(BitBlock tagMark, BitBlock NameFollow, BitBlock miscMark, int chars_avail){
    215224#ifdef DUMP
    216225print_register<BitBlock>("tagMark", tagMark);
     
    220229printf("stream_index = %i\n", stream_index);
    221230#endif
    222   tagMarks[stream_index] = tagMark;
    223   miscMarks[stream_index] = simd_not(miscMark);
    224   NameFollows[stream_index] = NameFollow;
    225   stream_index++;
    226   if(stream_index==1){
    227 
    228     if (InFinalEndTag == 1){
     231  this->tagMarks[this->stream_index] = tagMark;
     232  this->miscMarks[this->stream_index] = simd_not(miscMark);
     233  this->NameFollows[this->stream_index] = NameFollow;
     234  this->stream_index++;
     235  if(this->stream_index==1){
     236
     237    if (this->InFinalEndTag == 1){
    229238      int pos = -1;
    230       while(srcbuf[pos]!='>'){
     239      while(this->srcbuf[pos]!='>'){
    231240        pos++;
    232241        if(pos>=chars_avail){
    233           InFinalEndTag = 1;
     242      this->InFinalEndTag = 1;
    234243          return;
    235244        }
    236245      }
    237       pos = bitstream_scan(miscMarks,pos+1);
     246      pos = bitstream_scan(this->miscMarks,pos+1);
    238247#ifdef DUMP
    239248print_register<BitBlock>("miscMarks[0]", miscMarks[0]);
     
    246255    }
    247256
    248     if(state == InStartTag) {
    249       state = Clear;
    250       int remain_lgth = bitstream_scan(NameFollows,0);
    251       memcpy(&tags_buf[tags_buf_cur],srcbuf,remain_lgth);
    252       tag_lgth_stack[depth] += remain_lgth;
    253       depth++;
    254     }
    255     else if (state == InEndTag) {
    256       state = Clear;
    257       int lgth = tag_lgth_stack[depth];
    258       if (does_match(tag_stack[depth]+inTagPos,srcbuf,lgth-inTagPos) && ((srcbuf[lgth-inTagPos] == '>') ||(srcbuf[lgth-inTagPos] <= ' '))) return ;
     257    if(this->state == InStartTag) {
     258      this->state = this->Clear;
     259      int remain_lgth = bitstream_scan(this->NameFollows,0);
     260      memcpy(&this->tags_buf[this->tags_buf_cur],this->srcbuf,remain_lgth);
     261      this->tag_lgth_stack[this->depth] += remain_lgth;
     262      this->depth++;
     263    }
     264    else if (this->state == InEndTag) {
     265      this->state = Clear;
     266      int lgth = this->tag_lgth_stack[this->depth];
     267      if (does_match(this->tag_stack[this->depth]+this->inTagPos,this->srcbuf,lgth-this->inTagPos) && ((this->srcbuf[lgth-this->inTagPos] == '>') ||(this->srcbuf[lgth-this->inTagPos] <= ' '))) return ;
    259268      else {
    260269          fprintf(stderr,"tag name mismatch at position = %i\n",buf_base);
     
    262271      }
    263272    }
    264     else if (state == InAttName) {
    265       state = Clear;
    266       int remain_lgth = bitstream_scan(NameFollows,0);
    267       memcpy(&tags_buf[tags_buf_cur],srcbuf,remain_lgth);
    268       if(lookup_or_insert(InAtt.start, InAtt.lgth+remain_lgth)){
     273    else if (this->state == InAttName) {
     274      this->state = Clear;
     275      int remain_lgth = bitstream_scan(this->NameFollows,0);
     276      memcpy(&this->tags_buf[this->tags_buf_cur],this->srcbuf,remain_lgth);
     277      if(lookup_or_insert(this->InAtt.start, this->InAtt.lgth+remain_lgth)){
    269278              fprintf(stderr,"Attribute name is not unique at position =%i.\n",buf_base);
    270279              exit(-1);
     
    274283}
    275284
    276 TagMatcher::TagMatcher(){
    277   stream_index = 0;
    278   depth = 0;
    279   buf_base = 0;
    280   state = Clear;
    281   mode = StartOfFile;
    282   InFinalEndTag = 0;
    283   NameFollows[BUFFER_SIZE/BLOCK_SIZE]=simd<1>::constant<1>();// TODO - verify simd_const_1(1);  //sentinel
    284 }
    285 
    286 
    287 
    288 TagMatcher::~TagMatcher(){
    289 
    290 }
    291 
    292 void TagMatcher::setSrc(char * src){
    293   srcbuf = src;
    294 }
    295 
    296 void TagMatcher::Advance_buffer(){
    297   buf_base += BUFFER_SIZE;
    298   stream_index=0;
    299   tags_buf_cur = 0;
    300   att_index = 0;
    301   for(int i=0; i< depth; i++){
    302     if(&tags_buf[tags_buf_cur]!=tag_stack[i])
    303       memcpy(&tags_buf[tags_buf_cur],tag_stack[i],tag_lgth_stack[i]);
    304     tag_stack[i] = &tags_buf[tags_buf_cur];
    305     tags_buf_cur += tag_lgth_stack[i];
    306   }
    307   if(state == InStartTag) {
    308       memcpy(&tags_buf[tags_buf_cur],&srcbuf[finalStartPos],tag_lgth_stack[depth]);
    309       tag_stack[depth] = &tags_buf[tags_buf_cur];
    310       tags_buf_cur += tag_lgth_stack[depth];
    311   }
    312   else if(state == InEndTag) {
    313      memcpy(&tags_buf[tags_buf_cur],tag_stack[depth],tag_lgth_stack[depth]);
    314     tag_stack[depth] = &tags_buf[tags_buf_cur];
    315     tags_buf_cur += tag_lgth_stack[depth];
    316   }
    317   else if(state == InAttName) {
    318       memcpy(&tags_buf[tags_buf_cur],InAtt.start,InAtt.lgth);
    319       InAtt.start = &tags_buf[tags_buf_cur];
    320       tags_buf_cur += InAtt.lgth;
    321   }
    322   srcbuf[-1] = srcbuf[BUFFER_SIZE-1];
    323 }
     285template <uint64_t BUF_SIZE, uint64_t OVER_SIZE>
     286TagMatcher<BUF_SIZE, OVER_SIZE>::TagMatcher(){
     287  this->stream_index = 0;
     288  this->depth = 0;
     289  this->buf_base = 0;
     290  this->state = Clear;
     291  this->mode = StartOfFile;
     292  this->InFinalEndTag = 0;
     293  this->NameFollows[BUF_SIZE/BLOCK_SIZE]=simd<1>::constant<1>();// TODO - verify simd_const_1(1);  //sentinel
     294}
     295
     296template <uint64_t BUF_SIZE, uint64_t OVER_SIZE>
     297TagMatcher<BUF_SIZE, OVER_SIZE>::~TagMatcher(){
     298
     299}
     300
     301template <uint64_t BUF_SIZE, uint64_t OVER_SIZE>
     302void TagMatcher<BUF_SIZE, OVER_SIZE>::setSrc(char * src){
     303  this->srcbuf = src;
     304}
     305
     306template <uint64_t BUF_SIZE, uint64_t OVER_SIZE>
     307void TagMatcher<BUF_SIZE, OVER_SIZE>::Advance_buffer(){
     308  this->buf_base += BUF_SIZE;
     309  this->stream_index=0;
     310  this->tags_buf_cur = 0;
     311  this->att_index = 0;
     312  for(int i=0; i< this->depth; i++){
     313    if(&this->tags_buf[this->tags_buf_cur]!=this->tag_stack[i])
     314      memcpy(&this->tags_buf[this->tags_buf_cur],this->tag_stack[i],this->tag_lgth_stack[i]);
     315    this->tag_stack[i] = &this->tags_buf[tags_buf_cur];
     316    this->tags_buf_cur += this->tag_lgth_stack[i];
     317  }
     318  if(this->state == InStartTag) {
     319      memcpy(&this->tags_buf[this->tags_buf_cur],&this->srcbuf[this->finalStartPos],this->tag_lgth_stack[this->depth]);
     320      this->tag_stack[depth] = &this->tags_buf[this->tags_buf_cur];
     321      this->tags_buf_cur += this->tag_lgth_stack[this->depth];
     322  }
     323  else if(this->state == InEndTag) {
     324     memcpy(&this->tags_buf[this->tags_buf_cur],this->tag_stack[this->depth],this->tag_lgth_stack[this->depth]);
     325    this->tag_stack[depth] = &this->tags_buf[this->tags_buf_cur];
     326    this->tags_buf_cur += this->tag_lgth_stack[this->depth];
     327  }
     328  else if(this->state == InAttName) {
     329      memcpy(&this->tags_buf[this->tags_buf_cur],this->InAtt.start,this->InAtt.lgth);
     330      this->InAtt.start = &this->tags_buf[tags_buf_cur];
     331      this->tags_buf_cur += this->InAtt.lgth;
     332  }
     333  this->srcbuf[-1] = this->srcbuf[BUF_SIZE-1];
     334}
     335
    324336
    325337#endif /* TAGMATCHER_HPP_ */
Note: See TracChangeset for help on using the changeset viewer.