Changeset 579


Ignore:
Timestamp:
Aug 16, 2010, 4:04:31 PM (9 years ago)
Author:
lindanl
Message:

tag stack storing positions instead of strings

Location:
proto/parabix2
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • proto/parabix2/parabix2_compilable.py

    r573 r579  
    407407        #return (CT_callouts, callouts, refs, u16hi, u16lo, delmask, error, lex, u16delmask, EOF_mask)
    408408        tag_marks = callouts.EmptyTagMarks | LAngleFollow
    409         return (CT_callouts, callouts, refs, error, lex, EOF_mask, name_check, name_start_check, control, tag_marks, ElemNameFollows)
    410 
    411        
     409        return (CT_callouts, callouts, refs, error, lex, EOF_mask, name_check, name_start_check, control, tag_marks, ElemNameFollows, CD_starts)
     410
     411       
  • proto/parabix2/src/bytelex.h

    r527 r579  
    9494  return s8int64(x8data) ==
    9595         c8int64<C, '<', '!', '[', 'C', 'D', 'A', 'T', 'A'>::value;
     96}
     97
     98template<CodeUnit_Base C>
     99inline bool at_CDATA1(unsigned char x8data[]) {
     100  return s7int64(x8data) ==
     101         c7int64<C, '[', 'C', 'D', 'A', 'T', 'A', '['>::value;
    96102}
    97103
  • proto/parabix2/src/tag_matcher.cpp

    r569 r579  
    1 
    21#define MAX_DEPTH 100
    32#include <algorithm>
     
    54using namespace std;
    65
     6#define MAX_DEPTH 100
     7
    78class tag_matcher {
    89  public:
    910  SIMD_type tagMarks[BUFFER_SIZE/BLOCK_SIZE];
     11  char tags_buf[BUFFER_SIZE];
     12  int tags_buf_cur;
    1013  int stream_index;
    1114  char * srcbuf;
    1215  int depth;
    13   SIMD_type tag_stack[MAX_DEPTH];
     16  int inTagPos;
     17  int finalStartPos;
     18  char* tag_stack[MAX_DEPTH];
    1419  int tag_lgth_stack[MAX_DEPTH];
    1520  SIMD_type tagNameFollows[BUFFER_SIZE/BLOCK_SIZE+1]; // 1 extra block for sentinel
    1621  int buf_base;
    1722  enum TagMatchState {InStartTag, InEndTag, Clear} state;
    18   int inTagpos;
    19   int rem_lgth;
    20  
     23   
    2124  tag_matcher(char * src);
    2225  ~tag_matcher(); 
     
    2427  void store_streams(SIMD_type tagMark, SIMD_type tagNameFollow);
    2528  int tag_match(int pos);
    26   int calc_match_len(SIMD_type * s, char * c, int lgth);
    2729  void Advance_buffer();
     30  int calc_match_len(char * s1, char * s2, int lgth);
    2831};
    2932
    30 int tag_matcher::calc_match_len(SIMD_type * s, char * c, int lgth){
     33int tag_matcher::calc_match_len(char * s1, char * s2, int lgth){
    3134    int matchlen = 0;
    3235    int i=0;
    33     while (lgth > 16) {
     36    while (lgth > sizeof(SIMD_type)) {
    3437      /* full 16 byte match */
    35       if (simd_all_eq_8(s[i], sisd_load_unaligned((SIMD_type*)&c[i*sizeof(SIMD_type)]))) {
     38      if (simd_all_eq_8(sisd_load_unaligned((SIMD_type*)&s1[i]), sisd_load_unaligned((SIMD_type*)&s2[i]))) {
    3639        lgth -= sizeof(SIMD_type);
    3740        matchlen += sizeof(SIMD_type);
    38         i++;
     41        i +=sizeof(SIMD_type);
    3942      }
    4043      else {
     
    4245      }
    4346    }
    44     matchlen += cfzl(~_mm_movemask_epi8(simd_eq_8(s[i], sisd_load_unaligned((SIMD_type*)&c[i*sizeof(SIMD_type)])))); 
     47    matchlen += cfzl(~_mm_movemask_epi8(simd_eq_8(sisd_load_unaligned((SIMD_type*)&s1[i]), sisd_load_unaligned((SIMD_type*)&s2[i])))); 
    4548    return matchlen;
    4649}
    4750
    4851int tag_matcher:: tag_match(int pos) {
    49         int lgth;
    50         int matchlen;
    51         int i;
    52         int to_match;
    53         int new_depth;
    5452//      printf("%c\n",srcbuf[pos]);
    5553        if(srcbuf[pos]=='/' ){
    5654          pos++;
    57           depth--;
     55          depth--;
    5856          if (depth<0)
    5957            return pos;
    60           lgth = tag_lgth_stack[depth];
    61           to_match = min(lgth,BUFFER_SIZE - pos);
    62           depth -= ((lgth-1)/16);
    63           matchlen = calc_match_len(&tag_stack[depth], &srcbuf[pos], to_match);
    64           if (matchlen > lgth) return 0;
    65           else if ((matchlen == lgth) && ((srcbuf[pos+matchlen] == '>') ||(srcbuf[pos+matchlen] <= ' '))) return 0;
    66           else if (pos + matchlen >= BUFFER_SIZE) {
    67             inTagpos = matchlen;
    68             rem_lgth = lgth-matchlen;
     58          int matchlen = calc_match_len(tag_stack[depth],&srcbuf[pos],tag_lgth_stack[depth]);
     59          if (matchlen > tag_lgth_stack[depth]) return 0;
     60          else if ((matchlen == tag_lgth_stack[depth]) && ((srcbuf[pos+matchlen] == '>') ||(srcbuf[pos+matchlen] <= ' '))) return 0;
     61          else if (pos + matchlen >= BUFFER_SIZE + OVERLAP_BUFSIZE) {
    6962            state = InEndTag;
     63            inTagPos = BUFFER_SIZE - pos;
    7064          }
    7165          else {
     66//            cout << "matchlen = " << matchlen << endl;
     67//            cout << "end tag is " << string(&srcbuf[pos],tag_lgth_stack[depth]) << endl ;
     68//            cout << "start tag is " << string(tag_stack[depth],tag_lgth_stack[depth]) << endl ;
    7269              fprintf(stderr,"tag name mismatch at position = %i\n",buf_base+pos);
    7370              exit(-1);
    7471          }
    7572        }
    76         else if(srcbuf[pos]=='>'){       
     73        else if(srcbuf[pos]=='>'){
    7774          depth--;
    78           lgth = tag_lgth_stack[depth];
    79           depth -= ((lgth-1)/16);
    8075        }
    8176        else {
    82          
    83           int end_pos = bitstream_scan(tagNameFollows,pos);
    84           int lgth = end_pos-pos;
    85           new_depth = depth + (lgth+15)/16;
    86           if(new_depth<MAX_DEPTH){         
    87             int d = depth;
    88             lgth = min(lgth, BUFFER_SIZE - pos);
    89             for (i = 0; i < (lgth-1)/16; i++) {
    90               tag_stack[d] = sisd_load_unaligned((SIMD_type*)&srcbuf[pos+i*sizeof(SIMD_type)]);
    91               d++;
    92             }
    93 
    94             tag_stack[d] = sisd_load_unaligned((SIMD_type*)&srcbuf[pos+i*sizeof(SIMD_type)]);   
    95            
     77          if(depth<MAX_DEPTH){
     78            int end_pos = bitstream_scan(tagNameFollows,pos);
     79            tag_lgth_stack[depth] = end_pos-pos;
     80            tag_stack[depth] = &srcbuf[pos];
    9681            if(end_pos<BUFFER_SIZE){
    97               depth = new_depth;
    98               tag_lgth_stack[depth-1] = lgth;
     82              depth++;
    9983            }
    10084            else{
    10185              state = InStartTag;
    102               inTagpos = BUFFER_SIZE - pos;
    103             } 
     86              finalStartPos = pos;
     87            }
    10488          }
    10589          else{
     
    11599 
    116100        int blk;
    117         int blk_counts = (chars_avail-1)/(sizeof(ScanBlock)*8)+1;
     101        int blk_counts = (chars_avail+sizeof(ScanBlock)*8-1)/(sizeof(ScanBlock)*8);
    118102        int block_pos = 0;
    119103       
     
    132116
    133117void tag_matcher::store_streams(SIMD_type tagMark, SIMD_type tagNameFollow){
    134  
    135   SIMD_type tag_tmp_stack[MAX_DEPTH];
    136118  tagMarks[stream_index] = tagMark;
    137119  tagNameFollows[stream_index] = tagNameFollow;
    138   stream_index++;
     120  stream_index++; 
    139121  if(stream_index==1){
    140     if(state == InStartTag) {     
     122    if(state == InStartTag) {
    141123      state = Clear;
    142124      int remain_lgth = bitstream_scan(tagNameFollows,0);
    143       int lgth = inTagpos + remain_lgth;
    144 //       new_depth = depth + (lgth+15)/16;
    145       char * tag_stack_ptr = (char *) &tag_stack[depth];
    146       memcpy((void*)&tag_stack_ptr[inTagpos],(void*)srcbuf,remain_lgth);
    147       depth = depth + (lgth+15)/16;
    148       tag_lgth_stack[depth-1] = lgth;
     125      memcpy(&tags_buf[tags_buf_cur],srcbuf,remain_lgth);
     126      tags_buf_cur += remain_lgth;
     127      tag_lgth_stack[depth] += remain_lgth;
     128      depth++;
    149129    }
    150130    else if (state == InEndTag) {
    151      
    152       char * tag_stack_ptr = (char *) &tag_stack[depth];
    153       for(int i=0;i<=rem_lgth/16;i++)
    154         tag_tmp_stack[i] = sisd_load_unaligned((SIMD_type*)&tag_stack_ptr[inTagpos]);
    155       int matchlen = calc_match_len(tag_tmp_stack, srcbuf, rem_lgth);
     131      int matchlen = calc_match_len(tag_stack[depth]+inTagPos,srcbuf,tag_lgth_stack[depth]-inTagPos);
    156132      state = Clear;
    157       if (matchlen > rem_lgth) return ;
    158       else if ((matchlen == rem_lgth) && ((srcbuf[matchlen] == '>') ||(srcbuf[matchlen] <= ' '))) return;
     133      if (matchlen > tag_lgth_stack[depth]) return ;
     134      else if ((matchlen == tag_lgth_stack[depth]) && ((srcbuf[matchlen] == '>') ||(srcbuf[matchlen] <= ' '))) return;
    159135      else {
    160136          fprintf(stderr,"tag name mismatch at position = %i\n",buf_base);
     
    181157  buf_base += BUFFER_SIZE;
    182158  stream_index=0;
     159  tags_buf_cur = 0;
     160  for(int i=0; i< depth; i++){
     161    if(&tags_buf[tags_buf_cur]!=tag_stack[i])
     162      memcpy(&tags_buf[tags_buf_cur],tag_stack[i],tag_lgth_stack[i]);         
     163    tag_stack[i] = &tags_buf[tags_buf_cur];
     164    tags_buf_cur += tag_lgth_stack[i];
     165  }
     166  if(state == InStartTag) {
     167      memcpy(&tags_buf[tags_buf_cur],&srcbuf[finalStartPos],tag_lgth_stack[depth]);           
     168      tag_stack[depth] = &tags_buf[tags_buf_cur];
     169      tags_buf_cur += tag_lgth_stack[depth];
     170  }
    183171}
Note: See TracChangeset for help on using the changeset viewer.