Changeset 568


Ignore:
Timestamp:
Aug 10, 2010, 5:35:14 PM (9 years ago)
Author:
lindanl
Message:

tag matching for arbitrary length

File:
1 edited

Legend:

Unmodified
Added
Removed
  • proto/parabix2/src/tag_matcher.cpp

    r567 r568  
    11
    22#define MAX_DEPTH 100
     3#include <algorithm>
     4#include <iostream>
     5using namespace std;
    36
    47class tag_matcher {
     
    1316  int buf_base;
    1417  enum TagMatchState {InStartTag, InEndTag, Clear} state;
    15    
     18  int inTagpos;
     19  int rem_lgth;
     20  int new_depth;
    1621  tag_matcher(char * src);
    1722  ~tag_matcher(); 
     
    1924  void store_streams(SIMD_type tagMark, SIMD_type tagNameFollow);
    2025  int tag_match(int pos);
     26  int calc_match_len(SIMD_type * s, char * c, int lgth);
    2127  void Advance_buffer();
    2228};
    2329
     30int tag_matcher::calc_match_len(SIMD_type * s, char * c, int lgth){
     31    int matchlen = 0;
     32    int i=0;
     33    while (lgth > 16) {
     34      /* full 16 byte match */
     35      if (simd_all_eq_8(s[i], sisd_load_unaligned((SIMD_type*)&c[i*sizeof(SIMD_type)]))) {
     36        lgth -= sizeof(SIMD_type);
     37        matchlen += sizeof(SIMD_type);
     38        i++;
     39      }
     40      else {
     41        return -1;
     42      }
     43    }
     44    matchlen += cfzl(~_mm_movemask_epi8(simd_eq_8(s[i], sisd_load_unaligned((SIMD_type*)&c[i*sizeof(SIMD_type)])))); 
     45    return matchlen;
     46}
     47
    2448int tag_matcher:: tag_match(int pos) {
     49        int lgth;
     50        int matchlen;
     51        int i;
     52        int to_match;
    2553//      printf("%c\n",srcbuf[pos]);
    2654        if(srcbuf[pos]=='/' ){
    2755          pos++;
    28           depth--;
     56          depth--;
    2957          if (depth<0)
    3058            return pos;
    31           int matchlen = cfzl(~_mm_movemask_epi8(simd_eq_8(tag_stack[depth], sisd_load_unaligned((SIMD_type*)&srcbuf[pos]))));
    32           if (matchlen > tag_lgth_stack[depth]) return 0;
    33           else if ((matchlen == tag_lgth_stack[depth]) && ((srcbuf[pos+matchlen] == '>') ||(srcbuf[pos+matchlen] <= ' '))) return 0;
     59          lgth = tag_lgth_stack[depth];
     60          to_match = min(lgth,BUFFER_SIZE - pos);
     61          depth -= ((lgth-1)/16);
     62          matchlen = calc_match_len(&tag_stack[depth], &srcbuf[pos], to_match);
     63          if (matchlen > lgth) return 0;
     64          else if ((matchlen == lgth) && ((srcbuf[pos+matchlen] == '>') ||(srcbuf[pos+matchlen] <= ' '))) return 0;
    3465          else if (pos + matchlen >= BUFFER_SIZE) {
    35             matchlen = BUFFER_SIZE - pos;
    36             tag_stack[depth] = sisd_srl(tag_stack[depth], sisd_from_int(matchlen*8));
    37             tag_lgth_stack[depth] -= matchlen;
     66            inTagpos = matchlen;
     67            rem_lgth = lgth-matchlen;
    3868            state = InEndTag;
    3969          }
     
    4373          }
    4474        }
    45         else if(srcbuf[pos]=='>'){
     75        else if(srcbuf[pos]=='>'){       
    4676          depth--;
    4777        }
    4878        else {
    49           if(depth<MAX_DEPTH){
    50             int end_pos = bitstream_scan(tagNameFollows,pos);
    51             tag_lgth_stack[depth] = end_pos-pos;
    52             tag_stack[depth] = sisd_load_unaligned((SIMD_type*)&srcbuf[pos]);
     79         
     80          int end_pos = bitstream_scan(tagNameFollows,pos);
     81          int lgth = end_pos-pos;
     82          new_depth = depth + (lgth+15)/16;
     83          if(new_depth<MAX_DEPTH){         
     84            int d = depth;
     85            lgth = min(lgth, BUFFER_SIZE - pos);
     86            for (i = 0; i < (lgth-1)/16; i++) {
     87              tag_stack[d] = sisd_load_unaligned((SIMD_type*)&srcbuf[pos+i*sizeof(SIMD_type)]);
     88              d++;
     89            }
     90
     91            tag_stack[d] = sisd_load_unaligned((SIMD_type*)&srcbuf[pos+i*sizeof(SIMD_type)]);   
     92           
    5393            if(end_pos<BUFFER_SIZE){
    54               depth++;
     94              depth = new_depth;
     95              tag_lgth_stack[depth-1] = lgth;
    5596            }
    5697            else{
    5798              state = InStartTag;
    58             }
     99              inTagpos = BUFFER_SIZE - pos;
     100            } 
    59101          }
    60102          else{
     
    87129
    88130void tag_matcher::store_streams(SIMD_type tagMark, SIMD_type tagNameFollow){
     131 
     132  SIMD_type tag_tmp_stack[MAX_DEPTH];
    89133  tagMarks[stream_index] = tagMark;
    90134  tagNameFollows[stream_index] = tagNameFollow;
    91   stream_index++; 
     135  stream_index++;
    92136  if(stream_index==1){
    93137    if(state == InStartTag) {     
    94138      state = Clear;
    95139      int remain_lgth = bitstream_scan(tagNameFollows,0);
    96       SIMD_type shft = sisd_from_int(8*tag_lgth_stack[depth]);
    97       tag_stack[depth] = simd_or(simd_andc(tag_stack[depth],sisd_sll(simd_const_1(1),shft)),
    98                                 sisd_sll(sisd_load_unaligned((SIMD_type*)srcbuf),shft));
    99       tag_lgth_stack[depth] += remain_lgth;
    100       depth++;
     140      int lgth = inTagpos + remain_lgth;
     141      new_depth = depth + (lgth+15)/16;
     142      char * tag_stack_ptr = (char *) &tag_stack[depth];
     143      memcpy((void*)&tag_stack_ptr[inTagpos],(void*)srcbuf,remain_lgth);
     144      depth = new_depth;
     145      tag_lgth_stack[depth-1] = lgth;
    101146    }
    102147    else if (state == InEndTag) {
    103       int matchlen = cfzl(~_mm_movemask_epi8(simd_eq_8(tag_stack[depth], sisd_load_unaligned((SIMD_type*)srcbuf))));
    104148     
     149      char * tag_stack_ptr = (char *) &tag_stack[depth];
     150      for(int i=0;i<=rem_lgth/16;i++)
     151        tag_tmp_stack[i] = sisd_load_unaligned((SIMD_type*)&tag_stack_ptr[inTagpos]);
     152      int matchlen = calc_match_len(tag_tmp_stack, srcbuf, rem_lgth);
    105153      state = Clear;
    106       if (matchlen > tag_lgth_stack[depth]) return ;
    107       else if ((matchlen == tag_lgth_stack[depth]) && ((srcbuf[matchlen] == '>') ||(srcbuf[matchlen] <= ' '))) return;
     154      if (matchlen > rem_lgth) return ;
     155      else if ((matchlen == rem_lgth) && ((srcbuf[matchlen] == '>') ||(srcbuf[matchlen] <= ' '))) return;
    108156      else {
    109157          fprintf(stderr,"tag name mismatch at position = %i\n",buf_base);
Note: See TracChangeset for help on using the changeset viewer.