Changeset 2170


Ignore:
Timestamp:
May 28, 2012, 12:44:44 PM (7 years ago)
Author:
ksherdy
Message:

Refactored TagMatcher? and pablo template.

Location:
proto/parabix2
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • proto/parabix2/pablo_template.cpp

    r2166 r2170  
    4545#define PADDING_BLOCKS 1
    4646#define PADDING_SIZE (BLOCK_SIZE * PADDING_BLOCKS)
     47#define COPYBACK_BLOCKS 3
     48#define COPYBACK_SIZE (BLOCK_SIZE * COPYBACK_BLOCKS)
     49#define LOOKAHEAD_BLOCKS 1
     50#define LOOKAHEAD_SIZE (BLOCK_SIZE * LOOKAHEAD_BLOCKS)
    4751#define SEGMENT_BLOCKS  12
    4852#define SEGMENT_SIZE (BLOCK_SIZE * SEGMENT_BLOCKS)
    49 #define BUFFER_SIZE (SEGMENT_SIZE + PADDING_SIZE)
    50 #define OVERLAP_BUFSIZE PADDING_SIZE //sizeof(BitBlock)
     53#define BUFFER_SIZE (COPYBACK_SIZE + SEGMENT_SIZE + LOOKAHEAD_SIZE + PADDING_SIZE)
     54
     55#define OVERLAP_BUFSIZE PADDING_SIZE
    5156
    5257//////////////////////////////////////////////////////////////////////////////////////////
     
    117122    uint8_t * src_buf;
    118123    int block_base=0;
     124        int block_pos = 0;
    119125    int buffer_base=0;
    120126    int buffer_pos = 0;
    121     int block_pos = 0;
    122127    int chars_avail = 0;
    123     int check_pos = 0;
    124     int chars_read = 0;
     128        int chars_read = 0;
     129        int check_pos = 0;
    125130
    126131    //////////////////////////////////////////////////////////////////////////////////////////
    127132    // Buffer Management
    128133    //////////////////////////////////////////////////////////////////////////////////////////
    129     BitBlock buf[(PADDING_SIZE + SEGMENT_SIZE + PADDING_SIZE)/sizeof(BitBlock)];
    130     src_buf = (uint8_t *)buf + PADDING_SIZE;
    131 
    132     buffer_base = buffer_pos;
    133     chars_read = fread((void *)src_buf, 1, BUFFER_SIZE, infile);
    134     chars_avail = chars_read;
    135     if (chars_avail > SEGMENT_SIZE) chars_avail = SEGMENT_SIZE;
     134        BitBlock buf[(BUFFER_SIZE)/sizeof(BitBlock)];
     135        src_buf = (uint8_t *)buf + COPYBACK_SIZE;
    136136
    137137    //////////////////////////////////////////////////////////////////////////////////////////
    138138    // XML Validation / Content Model
    139139    //////////////////////////////////////////////////////////////////////////////////////////
     140        chars_read = fread((void *)src_buf, 1, SEGMENT_SIZE, infile);
     141        chars_avail = chars_read;
     142        if (chars_avail >= SEGMENT_SIZE) chars_avail = SEGMENT_SIZE;
     143
    140144    if(chars_read<4){
    141     fprintf(stderr,"File is too short. Not well formed.\n");
    142     exit(-1);
     145                fprintf(stderr,"File is too short. Not well formed.\n");
     146                exit(-1);
    143147    }
    144148
     
    148152    if (e->code_unit_base == ASCII) {
    149153
    150     XML_Decl_Parser<ASCII> decl_parser((unsigned char *)src_buf);
    151 
    152     decl_parser.ReadXMLInfo(*e);
    153 
    154     if (e->code_unit_size != SingleByte || (e->has_encoding_decl && (!at_UTF_8(e->encoding)))){
    155         fprintf(stderr,"Sorry, this xmlwf demo only works for UTF-8.\n");
    156         exit(-1);
    157     }
     154                XML_Decl_Parser<ASCII> decl_parser((unsigned char *)src_buf);
     155
     156                decl_parser.ReadXMLInfo(*e);
     157
     158                if (e->code_unit_size != SingleByte || (e->has_encoding_decl && (!at_UTF_8(e->encoding)))){
     159                        fprintf(stderr,"Sorry, this xmlwf demo only works for UTF-8.\n");
     160                        exit(-1);
     161                }
    158162    }
    159163    else {
     
    163167
    164168    if (e->content_start != 0) {
    165     memmove(&src_buf[0], &src_buf[e->content_start], chars_read - e->content_start);
    166     buffer_pos = e->content_start;
    167     if (chars_avail == SEGMENT_SIZE) {
    168         chars_read = chars_read - e->content_start + fread(&src_buf[chars_read-e->content_start], 1, e->content_start, infile);
    169         chars_avail = chars_read;
    170         if (chars_avail > SEGMENT_SIZE) chars_avail = SEGMENT_SIZE;
    171     }
    172     else {
    173       chars_read -=e->content_start;
    174       chars_avail -=e->content_start;
    175     }
    176     }
     169
     170                memmove(&src_buf[0], &src_buf[e->content_start], chars_avail - e->content_start);
     171                buffer_pos = e->content_start;
     172                if ((chars_avail-e->content_start) < SEGMENT_SIZE) {
     173                        // TODO - Overlap buffer size.
     174                        chars_read = chars_avail - e->content_start + fread(&src_buf[chars_avail-e->content_start], 1, e->content_start, infile);
     175                        chars_avail = chars_read;
     176                }
     177                if (chars_avail >= SEGMENT_SIZE) chars_avail = SEGMENT_SIZE;
     178    }
     179
     180        //cout << chars_read << endl;
     181        //cout << chars_avail << endl;
     182        //exit(-1);
    177183
    178184    @stream_stmts
    179185
    180 
    181186    //////////////////////////////////////////////////////////////////////////////////////////
    182187    // Full Segments
    183188    //////////////////////////////////////////////////////////////////////////////////////////
    184189    matcher.setSrc((char *)src_buf);
    185     while (chars_avail == SEGMENT_SIZE) {
     190        while (chars_avail >= SEGMENT_SIZE) {
    186191      PERF_SEC_START(parser_timer);
    187192      for (int blk = 0; blk < SEGMENT_BLOCKS; blk++) {
     
    192197
    193198          tracker.StoreNewlines(lex.LF);
    194           postprocess_do_block(lex, ctCDPI_Callouts, ref_Callouts, check_streams, (char *)src_buf, buffer_base, block_base, chars_avail, tracker);
     199                  postprocess_do_block(lex, ctCDPI_Callouts, ref_Callouts, check_streams, (char *)src_buf, buffer_base, block_base, chars_avail+2, tracker);
    195200          matcher.store_streams(check_streams.tag_marks, check_streams.name_follows, check_streams.misc_mask, chars_avail);
    196201          tracker.AdvanceBlock();
     
    200205      PERF_SEC_END(parser_timer, chars_avail);
    201206
    202       int bytes_left = chars_read - chars_avail;
    203       memmove(src_buf, &src_buf[SEGMENT_SIZE], bytes_left);
    204       chars_read = fread(&src_buf[bytes_left], 1, BUFFER_SIZE - bytes_left, infile) + bytes_left;
     207          int bytes_left = chars_read - chars_avail;
     208
     209          memmove(src_buf, &src_buf[SEGMENT_SIZE], bytes_left);
     210
     211          // TODO - Look at this calculation. Define a segment size that incorporates OVERLAP_BUFSIZE.
     212
     213          chars_read = fread(&src_buf[bytes_left], 1, SEGMENT_SIZE + OVERLAP_BUFSIZE - bytes_left, infile) + bytes_left;
    205214      chars_avail = chars_read;
    206       if (chars_avail > SEGMENT_SIZE) chars_avail = SEGMENT_SIZE;
     215          if (chars_avail >= SEGMENT_SIZE) chars_avail = SEGMENT_SIZE;
    207216      buffer_pos += chars_avail;
    208217      buffer_base = buffer_pos;
     
    224233          tracker.StoreNewlines(lex.LF);
    225234          postprocess_do_block(lex, ctCDPI_Callouts, ref_Callouts, check_streams, (char *)src_buf, buffer_base, block_base, chars_avail, tracker);
    226           matcher.store_streams(check_streams.tag_marks, check_streams.name_follows, check_streams.misc_mask, chars_avail);
     235                  matcher.store_streams(check_streams.tag_marks, check_streams.name_follows, check_streams.misc_mask, chars_avail);
    227236          tracker.AdvanceBlock();
    228237          block_pos += BLOCK_SIZE;
  • proto/parabix2/src/TagMatcher.hpp

    r2160 r2170  
    1717};
    1818
    19 template <uint64_t BUF_SIZE, uint64_t OVER_SIZE>
     19template <uint64_t BUF_SIZE, uint64_t LA_SIZE> // buffer size, look ahead size
    2020class TagMatcher {
    2121
     
    3030  int does_match(char * s1, char * s2, int lgth);
    3131  int lookup_or_insert(char*s, int lgth);
    32 
    33     int depth;
     32  int depth;
    3433
    3534private:
     
    5655};
    5756
    58 template <uint64_t BUF_SIZE, uint64_t OVER_SIZE>
    59 int TagMatcher<BUF_SIZE, OVER_SIZE>::does_match(char * s1, char * s2, int lgth){
     57template <uint64_t BUF_SIZE, uint64_t LA_SIZE>
     58int TagMatcher<BUF_SIZE, LA_SIZE>::does_match(char * s1, char * s2, int lgth){
    6059    int matchlen = 0;
    6160    int i=0;
     
    8079
    8180
    82 template <uint64_t BUF_SIZE, uint64_t OVER_SIZE>
    83 int TagMatcher<BUF_SIZE, OVER_SIZE>::lookup_or_insert(char* s, int lgth){
     81template <uint64_t BUF_SIZE, uint64_t LA_SIZE>
     82int TagMatcher<BUF_SIZE, LA_SIZE>::lookup_or_insert(char* s, int lgth){
    8483  for(int i=0; i< this->att_index; i++)
    8584    if(lgth == this->Attr[i].lgth &&  this->does_match(s,this->Attr[i].start,lgth))
     
    9392
    9493
    95 template <uint64_t BUF_SIZE, uint64_t OVER_SIZE>
    96 int TagMatcher<BUF_SIZE, OVER_SIZE>:: tag_match(int pos, int chars_avail) {
     94template <uint64_t BUF_SIZE, uint64_t LA_SIZE>
     95int TagMatcher<BUF_SIZE, LA_SIZE>:: tag_match(int pos, int chars_avail) {
    9796        int rt_val=0;
    9897//      end tag
     
    105104
    106105      if (does_match(this->tag_stack[depth],&this->srcbuf[pos],lgth) && ((this->srcbuf[pos+lgth] == '>') ||(this->srcbuf[pos+lgth] <= ' '))) rt_val=0;
    107       else if (pos + lgth >= BUF_SIZE + OVER_SIZE) {
     106          else if (pos + lgth >= BUF_SIZE + LA_SIZE) {
    108107        this->state = InEndTag;
    109108        this-> inTagPos = BUF_SIZE - pos;
     
    185184}
    186185
    187 template <uint64_t BUF_SIZE, uint64_t OVER_SIZE>
    188 int TagMatcher<BUF_SIZE, OVER_SIZE>::StreamScan(int chars_avail) {
     186template <uint64_t BUF_SIZE, uint64_t LA_SIZE>
     187int TagMatcher<BUF_SIZE, LA_SIZE>::StreamScan(int chars_avail) {
    189188
    190189        int blk;
     
    220219}
    221220
    222 template <uint64_t BUF_SIZE, uint64_t OVER_SIZE>
    223 void TagMatcher<BUF_SIZE, OVER_SIZE>::store_streams(BitBlock tagMark, BitBlock NameFollow, BitBlock miscMark, int chars_avail){
     221template <uint64_t BUF_SIZE, uint64_t LA_SIZE>
     222void TagMatcher<BUF_SIZE, LA_SIZE>::store_streams(BitBlock tagMark, BitBlock NameFollow, BitBlock miscMark, int chars_avail){
    224223#ifdef DUMP
    225224print_register<BitBlock>("tagMark", tagMark);
     
    283282}
    284283
    285 template <uint64_t BUF_SIZE, uint64_t OVER_SIZE>
    286 TagMatcher<BUF_SIZE, OVER_SIZE>::TagMatcher(){
     284template <uint64_t BUF_SIZE, uint64_t LA_SIZE>
     285TagMatcher<BUF_SIZE, LA_SIZE>::TagMatcher(){
    287286  this->stream_index = 0;
    288287  this->depth = 0;
     
    294293}
    295294
    296 template <uint64_t BUF_SIZE, uint64_t OVER_SIZE>
    297 TagMatcher<BUF_SIZE, OVER_SIZE>::~TagMatcher(){
    298 
    299 }
    300 
    301 template <uint64_t BUF_SIZE, uint64_t OVER_SIZE>
    302 void TagMatcher<BUF_SIZE, OVER_SIZE>::setSrc(char * src){
     295template <uint64_t BUF_SIZE, uint64_t LA_SIZE>
     296TagMatcher<BUF_SIZE, LA_SIZE>::~TagMatcher(){
     297
     298}
     299
     300template <uint64_t BUF_SIZE, uint64_t LA_SIZE>
     301void TagMatcher<BUF_SIZE, LA_SIZE>::setSrc(char * src){
    303302  this->srcbuf = src;
    304303}
    305304
    306 template <uint64_t BUF_SIZE, uint64_t OVER_SIZE>
    307 void TagMatcher<BUF_SIZE, OVER_SIZE>::Advance_buffer(){
     305template <uint64_t BUF_SIZE, uint64_t LA_SIZE>
     306void TagMatcher<BUF_SIZE, LA_SIZE>::Advance_buffer(){
    308307  this->buf_base += BUF_SIZE;
    309308  this->stream_index=0;
Note: See TracChangeset for help on using the changeset viewer.