Changeset 582 for proto/parabix2


Ignore:
Timestamp:
Aug 19, 2010, 11:14:14 AM (9 years ago)
Author:
lindanl
Message:

Add attributes uniqueness checking

Location:
proto/parabix2
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • proto/parabix2/parabix2_compilable.py

    r579 r582  
    406406
    407407        #return (CT_callouts, callouts, refs, u16hi, u16lo, delmask, error, lex, u16delmask, EOF_mask)
    408         tag_marks = callouts.EmptyTagMarks | LAngleFollow
    409         return (CT_callouts, callouts, refs, error, lex, EOF_mask, name_check, name_start_check, control, tag_marks, ElemNameFollows, CD_starts)
    410 
    411        
     408        tag_marks = callouts.EmptyTagMarks | LAngleFollow | AttNameStarts
     409        NameFollows = ElemNameFollows | AttNameFollows
     410        return (CT_callouts, callouts, refs, error, lex, EOF_mask, name_check, name_start_check, control, tag_marks, NameFollows, CD_starts)
     411
     412       
  • proto/parabix2/src/tag_matcher.cpp

    r580 r582  
    55
    66#define MAX_DEPTH 100
     7#define MAX_ATTS 100
     8
     9struct attribute{
     10  char * start;
     11  int lgth;
     12};
    713
    814class tag_matcher {
     
    1824  char* tag_stack[MAX_DEPTH];
    1925  int tag_lgth_stack[MAX_DEPTH];
    20   SIMD_type tagNameFollows[BUFFER_SIZE/BLOCK_SIZE+1]; // 1 extra block for sentinel
     26  SIMD_type NameFollows[BUFFER_SIZE/BLOCK_SIZE+1]; // 1 extra block for sentinel
    2127  int buf_base;
    22   enum TagMatchState {InStartTag, InEndTag, Clear} state;
     28  enum TagMatchState {InStartTag, InEndTag, InAttName, Clear} state;
     29  struct attribute Attr[MAX_ATTS];
     30  struct attribute InAtt;
     31  int att_index;
    2332   
    2433  tag_matcher(char * src);
    2534  ~tag_matcher(); 
    2635  int StreamScan(int chars_avail);
    27   void store_streams(SIMD_type tagMark, SIMD_type tagNameFollow);
     36  void store_streams(SIMD_type tagMark, SIMD_type NameFollow);
    2837  int tag_match(int pos);
    2938  void Advance_buffer();
    3039  int does_match(char * s1, char * s2, int lgth);
     40  int lookup_or_insert(char*s, int lgth);
    3141};
     42
     43int tag_matcher::lookup_or_insert(char* s, int lgth){
     44  for(int i=0; i< att_index; i++)
     45    if(lgth == Attr[i].lgth &&  does_match(s,Attr[i].start,lgth))
     46      return 1;
     47 
     48  Attr[att_index].start = s;
     49  Attr[att_index].lgth = lgth;
     50  att_index++;
     51  return 0;
     52}
    3253
    3354int tag_matcher::does_match(char * s1, char * s2, int lgth){
     
    5273
    5374int tag_matcher:: tag_match(int pos) {
    54 //      printf("%c\n",srcbuf[pos]);
     75//      end tag
    5576        if(srcbuf[pos]=='/' ){
    5677          pos++;
     
    7394          }
    7495        }
     96//      empty tag
    7597        else if(srcbuf[pos]=='>'){
    7698          depth--;
    7799        }
    78         else {
     100//      start tag
     101        else if(srcbuf[pos-1]=='<'){
     102          att_index = 0;
    79103          if(depth<MAX_DEPTH){
    80             int end_pos = bitstream_scan(tagNameFollows,pos);
     104            int end_pos = bitstream_scan(NameFollows,pos);
    81105            tag_lgth_stack[depth] = end_pos-pos;
    82106            tag_stack[depth] = &srcbuf[pos];
     
    92116            fprintf(stderr,"Max nesting depth exceeded at position =%i. depth = %i\n",buf_base+pos, depth);
    93117            exit(-1);
     118          }
     119        }
     120//      attribute
     121        else{
     122          int end_pos = bitstream_scan(NameFollows,pos);
     123          if(end_pos<BUFFER_SIZE){
     124            if(lookup_or_insert(&srcbuf[pos], end_pos-pos)){
     125              fprintf(stderr,"Attribute name is not unique at position =%i.\n",buf_base+pos);
     126              exit(-1);
     127            }
     128          }
     129          else{
     130            state = InAttName;     
     131            InAtt.start = &srcbuf[pos];
     132            InAtt.lgth = BUFFER_SIZE-pos;
    94133          }
    95134        }
     
    117156}
    118157
    119 void tag_matcher::store_streams(SIMD_type tagMark, SIMD_type tagNameFollow){
     158void tag_matcher::store_streams(SIMD_type tagMark, SIMD_type NameFollow){
    120159  tagMarks[stream_index] = tagMark;
    121   tagNameFollows[stream_index] = tagNameFollow;
     160  NameFollows[stream_index] = NameFollow;
    122161  stream_index++; 
    123162  if(stream_index==1){
    124163    if(state == InStartTag) {
    125164      state = Clear;
    126       int remain_lgth = bitstream_scan(tagNameFollows,0);
     165      int remain_lgth = bitstream_scan(NameFollows,0);
    127166      memcpy(&tags_buf[tags_buf_cur],srcbuf,remain_lgth);
    128       tags_buf_cur += remain_lgth;
     167//       tags_buf_cur += remain_lgth;
    129168      tag_lgth_stack[depth] += remain_lgth;
    130169      depth++;
     
    139178      }     
    140179    }
     180    else if (state == InAttName) {
     181      state = Clear;
     182      int remain_lgth = bitstream_scan(NameFollows,0);
     183      memcpy(&tags_buf[tags_buf_cur],srcbuf,remain_lgth);
     184//       tags_buf_cur += remain_lgth;       
     185      if(lookup_or_insert(InAtt.start, InAtt.lgth+remain_lgth)){
     186              fprintf(stderr,"Attribute name is not unique at position =%i.\n",buf_base);
     187              exit(-1);
     188      }
     189    }
    141190  } 
    142191}
     
    148197  buf_base = 0;
    149198  state = Clear;
    150   tagNameFollows[BUFFER_SIZE/BLOCK_SIZE]=simd_const_1(1);  //sentinel
     199  NameFollows[BUFFER_SIZE/BLOCK_SIZE]=simd_const_1(1);  //sentinel
    151200}
    152201
     
    159208  stream_index=0;
    160209  tags_buf_cur = 0;
     210  att_index = 0;
    161211  for(int i=0; i< depth; i++){
    162212    if(&tags_buf[tags_buf_cur]!=tag_stack[i])
     
    170220      tags_buf_cur += tag_lgth_stack[depth];
    171221  }
    172 }
     222  if(state == InAttName) {
     223      memcpy(&tags_buf[tags_buf_cur],InAtt.start,InAtt.lgth);
     224      InAtt.start = &tags_buf[tags_buf_cur];
     225      tags_buf_cur += InAtt.lgth;
     226  }
     227  srcbuf[-1] = srcbuf[BUFFER_SIZE-1];
     228}
     229
  • proto/parabix2/template_tag_match.c

    r577 r582  
    248248  int check_pos = 0;
    249249  int chars_read = 0;
    250   char srcbuf[BUFFER_SIZE+BLOCK_SIZE];
     250  char buf[BUFFER_SIZE+BLOCK_SIZE+OVERLAP_BUFSIZE*2];
    251251 
     252  char * srcbuf = buf + OVERLAP_BUFSIZE;
    252253  buffer_base = buf_pos;
    253254  source = srcbuf;
     
    287288
    288289    if(chars_avail < BUFFER_SIZE){
    289    
     290     
    290291      while (block_pos < chars_avail){
    291292
     
    340341        }
    341342       
    342         t.store_streams(tag_marks, ElemNameFollows);
     343        t.store_streams(tag_marks, NameFollows);
    343344       
    344345        block_pos += BLOCK_SIZE;
     
    384385        }
    385386       
    386         t.store_streams(tag_marks, ElemNameFollows);
     387        t.store_streams(tag_marks, NameFollows);
    387388
    388389        block_pos += BLOCK_SIZE;
Note: See TracChangeset for help on using the changeset viewer.