Changeset 2155 for proto


Ignore:
Timestamp:
May 24, 2012, 6:51:24 PM (7 years ago)
Author:
ksherdy
Message:

Reverted to 2142.

Location:
proto/parabix2
Files:
1 deleted
3 edited

Legend:

Unmodified
Added
Removed
  • proto/parabix2/pablo_template.cpp

    r2154 r2155  
    1313#include "../lib/bitblock_iterator.hpp"
    1414#include "../lib/s2p.hpp"
    15 #include "../lib/perflib/perfsec.h"
     15
     16#define SEGMENT_BLOCKS 12
     17#define BUFFER_SIZE (BLOCK_SIZE * SEGMENT_BLOCKS)
     18#define OVERLAP_BUFSIZE (sizeof(BitBlock))
    1619
    1720#include "xmldecl.h"
    1821#include "namechars.h"
     22#include "../lib/perflib/perfsec.h"
     23
    1924#include "TagMatcher.hpp"
    2025#include "LineColTracker.hpp"
     
    2429
    2530#ifdef BUFFER_PROFILING
    26     BOM_Table * parser_timer;
     31        BOM_Table * parser_timer;
     32
    2733#elif CODE_CLOCKER
    28     //#define NUM_EVENTS 1
    29     //int Events[NUM_EVENTS] = {PAPI_TOT_CYC};
    30     //int Events[NUM_EVENTS] = {PAPI_L2_DCM};
    31     #define NUM_EVENTS 2
    32     int Events[NUM_EVENTS] = {PAPI_TOT_CYC, PAPI_BR_MSP};
    33     int cal_size = 20;
    34     CC * parser_timer = new CC(Events,NUM_EVENTS,cal_size);
     34        #define NUM_EVENTS 1
     35        int Events[NUM_EVENTS] = {PAPI_TOT_CYC};
     36        //int Events[NUM_EVENTS] = {PAPI_L2_DCM};
     37        //int Events[NUM_EVENTS] = {PAPI_TOT_CYC, PAPI_BR_MSP};
     38        int cal_size = 20;
     39        CC * parser_timer = new CC(Events,NUM_EVENTS,cal_size);
    3540#else
    36     void * parser_timer;
     41        void * parser_timer;
    3742#endif
    3843
     44int block_base=0;
     45int buffer_base=0;
     46char * source;
     47
     48LineColTracker tracker;
     49TagMatcher matcher;
    3950ErrorTracker error_tracker;
    4051BitBlock EOF_mask = simd<1>::constant<1>();
    4152
    42 //////////////////////////////////////////////////////////////////////////////////////////
    43 // Buffer Management
    44 //////////////////////////////////////////////////////////////////////////////////////////
    45 #include "../lib/buffer.hpp"
    46 
    47 //////////////////////////////////////////////////////////////////////////////////////////
    48 // @ global depends on 'error_tracker' and 'EOF_mask' definitions.
    49 //////////////////////////////////////////////////////////////////////////////////////////
     53static inline int NameStrt_check(int pos);
     54static inline int Name_check(int pos);
     55static inline int PIName_check(int pos);
     56static inline int CD_check(int pos);
     57static inline int GenRef_check(int pos);
     58static inline int HexRef_check(int pos);
     59static inline int DecRef_check(int pos);
     60static inline int AttRef_check(int pos);
     61
    5062@global
    5163
    52 //////////////////////////////////////////////////////////////////////////////////////////
    53 // Headers that depend @ global stream struct types.
    54 //////////////////////////////////////////////////////////////////////////////////////////
    55 #include "../lib/transpose.hpp"
    56 #include "post_process.hpp"
    57 
    58 static void do_process(FILE *infile, FILE *outfile);
     64static inline void s2p_do_block(BytePack U8[], Basis_bits & basis_bits);
     65static inline void s2p_do_final_block(BytePack U8[], Basis_bits & basis_bits, BitBlock EOF_mask);
     66static inline void postprocess_do_block(Lex & lex, CtCDPI_Callouts & ctCDPI_Callouts, Ref_Callouts & ref_Callouts, Check_streams & check_streams, int chars_avail);
     67
     68void do_process(FILE *infile, FILE *outfile);
     69
     70static inline void validate_block(BitBlockForwardIterator & start, int block_base, int is_valid(int));
     71static inline void validate_block(BitBlockForwardIterator & start, int block_base, int is_valid(int,int));
    5972
    6073int main(int argc, char * argv[]) {
     
    87100        }
    88101
    89         PERF_SEC_BIND(1);
     102//      PERF_SEC_BIND(1);
    90103
    91104        PERF_SEC_INIT(parser_timer);
     
    103116}
    104117
     118/* s2p Definitions */
     119static inline void s2p_do_block(BytePack U8[], Basis_bits & basis_bits) {
     120  s2p(U8[0], U8[1], U8[2], U8[3], U8[4], U8[5], U8[6], U8[7],
     121        basis_bits.bit_0, basis_bits.bit_1, basis_bits.bit_2, basis_bits.bit_3, basis_bits.bit_4, basis_bits.bit_5, basis_bits.bit_6, basis_bits.bit_7);
     122}
     123
     124static inline void s2p_do_final_block(BytePack U8[], Basis_bits & basis_bits, BitBlock EOF_mask) {
     125  s2p_do_block(U8, basis_bits);
     126  basis_bits.bit_0 = simd_and(basis_bits.bit_0, EOF_mask);
     127  basis_bits.bit_1 = simd_and(basis_bits.bit_1, EOF_mask);
     128  basis_bits.bit_2 = simd_and(basis_bits.bit_2, EOF_mask);
     129  basis_bits.bit_3 = simd_and(basis_bits.bit_3, EOF_mask);
     130  basis_bits.bit_4 = simd_and(basis_bits.bit_4, EOF_mask);
     131  basis_bits.bit_5 = simd_and(basis_bits.bit_5, EOF_mask);
     132  basis_bits.bit_6 = simd_and(basis_bits.bit_6, EOF_mask);
     133  basis_bits.bit_7 = simd_and(basis_bits.bit_7, EOF_mask);
     134}
     135
     136
     137static inline int NameStrt_check(int pos) {
     138        if(XML_10_UTF8_NameStrt_bytes((unsigned char*)&source[pos]) == 0){
     139              return XMLTestSuiteError::NAME_START;
     140        }
     141        return 0;
     142}
     143
     144static inline int Name_check(int pos) {
     145        if(XML_10_UTF8_NameChar_bytes((unsigned char*)&source[pos]) == 0){
     146                  return XMLTestSuiteError::NAME;
     147        }
     148        return 0;
     149}
     150
     151static inline int PIName_check(int pos, int file_pos) {
     152        if (at_XxMmLll<ASCII>((unsigned char*)&source[pos]) && (source[pos+3]=='?' || source[pos+3]<= ' ')) {
     153              // "<?xml" legal at start of file.
     154              if (!((file_pos == 2) && at_XmlDecl_start<ASCII>((unsigned char*)&source[0]))) {
     155                  return XMLTestSuiteError::XMLPINAME;
     156              }
     157        }
     158        return 0;
     159}
     160
     161static inline int CD_check(int pos) {
     162        if (!at_CDATA1<ASCII>((unsigned char*)&source[pos])){
     163                  return XMLTestSuiteError::CDATA;
     164        }
     165        return 0;
     166}
     167
     168static inline int GenRef_check(int pos) {
     169        unsigned char* s = (unsigned char*)&source[pos];
     170        if (!(at_Ref_gt<ASCII>(s)||at_Ref_lt<ASCII>(s)||at_Ref_amp<ASCII>(s)||at_Ref_quot<ASCII>(s)||at_Ref_apos<ASCII>(s))){
     171              return XMLTestSuiteError::UNDEFREF;
     172        }
     173        return 0;
     174}
     175
     176static inline int HexRef_check(int pos) {
     177        unsigned char* s = (unsigned char*)&source[pos];
     178        int ch_val = 0;
     179        while(at_HexDigit<ASCII>(s)){
     180          ch_val = HexVal<ASCII>(s[0]) + (ch_val<<4);
     181          if (ch_val> 0x10FFFF ){
     182                return XMLTestSuiteError::CHARREF;
     183          }
     184          s++;
     185        }
     186        if ((ch_val == 0x0) || ((ch_val | 0x7FF) == 0xDFFF)|| ((ch_val | 0x1) == 0xFFFF)){
     187          return XMLTestSuiteError::CHARREF;
     188        }
     189        else if (((ch_val < 0x20) && (ch_val != 0x9) && (ch_val != 0xD) && (ch_val != 0xA))){
     190          return XMLTestSuiteError::XML10CHARREF;
     191        }
     192        return 0;
     193}
     194
     195static inline int DecRef_check(int pos) {
     196        unsigned char* s = (unsigned char*)&source[pos];
     197        int ch_val = 0;
     198        while(at_HexDigit<ASCII>(s)){
     199          ch_val = DigitVal<ASCII>(s[0]) + ch_val*10;
     200          if (ch_val> 0x10FFFF ){
     201                        return XMLTestSuiteError::CHARREF;
     202          }
     203          s++;
     204        }
     205        if ((ch_val == 0x0) || ((ch_val | 0x7FF) == 0xDFFF)|| ((ch_val | 0x1) == 0xFFFF)){
     206                  return XMLTestSuiteError::CHARREF;
     207        }
     208        else if (((ch_val < 0x20) && (ch_val != 0x9) && (ch_val != 0xD) && (ch_val != 0xA))){
     209                  return XMLTestSuiteError::XML10CHARREF;
     210        }
     211        return 0;
     212}
     213
     214static inline int AttRef_check(int pos) {
     215        unsigned char* s = (unsigned char*)&source[pos];
     216        int ch_val = 0;
     217        if(s[0]=='#'){
     218          s++;
     219          if(s[0]=='x' || s[0]=='X'){
     220            s++;
     221            while(at_HexDigit<ASCII>(s)){
     222              ch_val = HexVal<ASCII>(s[0]) + (ch_val<<4);
     223              s++;
     224            }
     225          }
     226          else{
     227            while(at_HexDigit<ASCII>(s)){
     228              ch_val = DigitVal<ASCII>(s[0]) + ch_val*10;
     229              s++;
     230            }
     231          }
     232          if (ch_val==60){
     233            return XMLTestSuiteError::ATTREF;
     234          }
     235        }
     236        else if(at_Ref_lt<ASCII>(s)){
     237          return XMLTestSuiteError::ATTREF;
     238        }
     239        return 0;
     240}
     241
     242static inline void validate_block(BitBlockForwardIterator & start, int block_base, int is_valid(int)) {
     243
     244        int pos, block_pos;
     245        BitBlockForwardIterator end;
     246        while(start != end) {
     247
     248                block_pos = block_base + *start;
     249                int rv = is_valid(block_pos);
     250
     251                if (rv) {
     252                        int error_line, error_column;
     253                        tracker.get_Line_and_Column(block_pos, error_line, error_column);
     254                        ReportError(XMLTestSuiteError::get_msg(rv), error_line, error_column);
     255                        exit(-1);
     256                }
     257                start++;
     258        }
     259}
     260
     261static inline void validate_block(BitBlockForwardIterator & start, int block_base, int buffer_base, int is_valid(int,int)) {
     262
     263        int pos, block_pos, file_pos;
     264        BitBlockForwardIterator end;
     265        while(start != end) {
     266
     267                block_pos = block_base + *start;
     268                file_pos = block_pos+buffer_base;
     269
     270
     271                int rv = is_valid(block_pos, file_pos);
     272
     273                if (rv) {
     274                        int error_line, error_column;
     275                        tracker.get_Line_and_Column(block_pos, error_line, error_column);
     276                        ReportError(XMLTestSuiteError::get_msg(rv), error_line, error_column);
     277                        exit(-1);
     278                }
     279                start++;
     280        }
     281}
     282
     283static inline void postprocess_do_block(Lex & lex, CtCDPI_Callouts & ctCDPI_Callouts, Ref_Callouts & ref_Callouts, Check_streams & check_streams, int chars_avail){
     284BitBlockForwardIterator iter;
     285
     286tracker.StoreNewlines(lex.LF);
     287
     288if (bitblock::any(simd_or(check_streams.non_ascii_name_starts, check_streams.non_ascii_names))) {
     289        iter.init(&check_streams.non_ascii_name_starts);
     290        validate_block(iter, block_base, NameStrt_check);
     291        iter.init(&check_streams.non_ascii_names);
     292        validate_block(iter, block_base, Name_check);
     293}
     294if (bitblock::any(ctCDPI_Callouts.PI_name_starts)){
     295        iter.init(&(ctCDPI_Callouts.PI_name_starts));
     296        validate_block(iter, block_base, buffer_base, PIName_check);
     297}
     298if (bitblock::any(ctCDPI_Callouts.CD_starts)){
     299        iter.init(&ctCDPI_Callouts.CD_starts);
     300        validate_block(iter, block_base, CD_check);
     301}
     302if(bitblock::any(ref_Callouts.GenRef_starts)){
     303        iter.init(&ref_Callouts.GenRef_starts);
     304        validate_block(iter, block_base, GenRef_check);
     305}
     306if(bitblock::any(ref_Callouts.DecRef_starts)){
     307        iter.init(&ref_Callouts.DecRef_starts);
     308        validate_block(iter, block_base, DecRef_check);
     309}
     310if(bitblock::any(ref_Callouts.HexRef_starts)){
     311        iter.init(&ref_Callouts.HexRef_starts);
     312        validate_block(iter, block_base, HexRef_check);
     313}
     314if(bitblock::any(check_streams.att_refs)){
     315        iter.init(&check_streams.att_refs);
     316        validate_block(iter, block_base, AttRef_check);
     317}
     318
     319if(error_tracker.Has_Noted_Error()){
     320        int error_line, error_column;
     321        tracker.get_Line_and_Column(error_tracker.Noted_Pos_In_Block(), error_line, error_column);
     322        ReportError(error_tracker.Noted_Error_Msg(), error_line, error_column);
     323        exit(-1);
     324}
     325
     326matcher.store_streams(check_streams.tag_marks, check_streams.name_follows, check_streams.misc_mask, chars_avail);
     327tracker.AdvanceBlock();
     328
     329}
     330
    105331void do_process(FILE *infile, FILE *outfile) {
    106332
    107     @decl
    108 
    109     LineColTracker tracker;
    110     TagMatcher<SEGMENT_SIZE,PADDING_SIZE> matcher;
    111 
    112     uint8_t * src_buf;
    113     int block_base=0;
    114     int buffer_base=0;
    115     int buffer_pos = 0;
    116     int block_pos = 0;
    117     int chars_avail = 0;
    118     int check_pos = 0;
    119     int chars_read = 0;
    120 
    121     //////////////////////////////////////////////////////////////////////////////////////////
    122     // Buffer Management
    123     //////////////////////////////////////////////////////////////////////////////////////////
    124     //BitBlock buf[(BUFFER_SIZE)/sizeof(BitBlock)];
    125     void * temp;
    126     ALLOC_STATIC_ALIGNED_BYTE_BUFFER(temp);
    127     src_buf = (uint8_t *)temp;
    128 
    129     buffer_base = buffer_pos;
    130     chars_read = fread((void *)src_buf, 1, SEGMENT_SIZE, infile);
    131     chars_avail = chars_read;
    132     if (chars_avail > SEGMENT_SIZE) chars_avail = SEGMENT_SIZE;
    133 
    134     //////////////////////////////////////////////////////////////////////////////////////////
    135     // XML Validation / Content Model
    136     //////////////////////////////////////////////////////////////////////////////////////////
    137     if(chars_read<4){
     333@decl
     334
     335  int buf_pos = 0;
     336  int block_pos = 0;
     337  int chars_avail = 0;
     338  int check_pos = 0;
     339  int chars_read = 0;
     340  BytePack buf[(BUFFER_SIZE+BLOCK_SIZE+OVERLAP_BUFSIZE*2)/sizeof(BitBlock)];
     341
     342  char * srcbuf = ((char *) buf) + OVERLAP_BUFSIZE;
     343  buffer_base = buf_pos;
     344  source = srcbuf;
     345
     346  chars_read = fread((void *)srcbuf, 1, BUFFER_SIZE + OVERLAP_BUFSIZE, infile);
     347  chars_avail = chars_read;
     348  if (chars_avail > BUFFER_SIZE) chars_avail = BUFFER_SIZE;
     349
     350  matcher.setSrc(srcbuf);
     351
     352  if(chars_read<4){
    138353    fprintf(stderr,"File is too short. Not well formed.\n");
    139354    exit(-1);
     355  }
     356
     357  Entity_Info * e = new Entity_Info;
     358  e->AnalyzeSignature((unsigned char *)srcbuf);
     359
     360  if (e->code_unit_base == ASCII) {
     361
     362    XML_Decl_Parser<ASCII> decl_parser((unsigned char *)srcbuf);
     363
     364    decl_parser.ReadXMLInfo(*e);
     365
     366    if (e->code_unit_size != SingleByte || (e->has_encoding_decl && (!at_UTF_8(e->encoding)))){
     367        fprintf(stderr,"Sorry, this xmlwf demo only works for UTF-8.\n");
     368        exit(-1);
    140369    }
    141 
    142     Entity_Info * e = new Entity_Info;
    143     e->AnalyzeSignature((unsigned char *)src_buf);
    144 
    145     if (e->code_unit_base == ASCII) {
    146 
    147     XML_Decl_Parser<ASCII> decl_parser((unsigned char *)src_buf);
    148 
    149     decl_parser.ReadXMLInfo(*e);
    150 
    151     if (e->code_unit_size != SingleByte || (e->has_encoding_decl && (!at_UTF_8(e->encoding)))){
    152         fprintf(stderr,"Sorry, this xmlwf demo only works for UTF-8.\n");
    153         exit(-1);
    154     }
    155     }
    156     else {
    157         fprintf(stderr,"Sorry, this xmlwf demo does not process EBCDIC.\n");
    158         exit(-1);
    159     }
    160 
    161     if (e->content_start != 0) {
    162     memmove(&src_buf[0], &src_buf[e->content_start], chars_read - e->content_start);
    163     buffer_pos = e->content_start;
    164     if (chars_avail == SEGMENT_SIZE) {
    165         chars_read = chars_read - e->content_start + fread(&src_buf[chars_read-e->content_start], 1, e->content_start, infile);
    166         chars_avail = chars_read;
    167         if (chars_avail > SEGMENT_SIZE) chars_avail = SEGMENT_SIZE;
    168     }
    169     else {
    170       chars_read -=e->content_start;
    171       chars_avail -=e->content_start;
    172     }
    173     }
    174 
    175     @stream_stmts
    176 
    177 
    178     //////////////////////////////////////////////////////////////////////////////////////////
    179     // Full Segments
    180     //////////////////////////////////////////////////////////////////////////////////////////
    181     matcher.setSrc((char *)src_buf);
    182     while (chars_avail == SEGMENT_SIZE) {
     370  }
     371  else {
     372    fprintf(stderr,"Sorry, this xmlwf demo does not process EBCDIC.\n");
     373        exit(-1);
     374  }
     375
     376  if (e->content_start != 0) {
     377        memmove(&srcbuf[0], &srcbuf[e->content_start], chars_read - e->content_start);
     378        buf_pos = e->content_start;
     379        if (chars_avail == BUFFER_SIZE) {
     380                chars_read = chars_read - e->content_start +
     381                             fread(&srcbuf[chars_read-e->content_start], 1, e->content_start, infile);
     382                chars_avail = chars_read;
     383                if (chars_avail > BUFFER_SIZE) chars_avail = BUFFER_SIZE;
     384        }
     385        else {
     386          chars_read -=e->content_start;
     387          chars_avail -=e->content_start;
     388        }
     389  }
     390
     391@stream_stmts
     392
     393/* Full Buffers */
     394
     395    while (chars_avail == BUFFER_SIZE) {
    183396      PERF_SEC_START(parser_timer);
    184397      for (int blk = 0; blk < SEGMENT_BLOCKS; blk++) {
    185398          block_base = blk*BLOCK_SIZE;
    186           s2p_do_block((BytePack *) &src_buf[block_base], basis_bits);
     399          s2p_do_block((BytePack *) &srcbuf[block_base], basis_bits);
    187400          @block_stmts
    188           tracker.StoreNewlines(lex.LF);
    189           postprocess_do_block(lex, ctCDPI_Callouts, ref_Callouts, check_streams, (char *)src_buf, buffer_base, block_base, chars_avail, tracker);
    190           matcher.store_streams(check_streams.tag_marks, check_streams.name_follows, check_streams.misc_mask, chars_avail);
    191           tracker.AdvanceBlock();
     401          postprocess_do_block(lex, ctCDPI_Callouts, ref_Callouts, check_streams, chars_avail);
    192402      }
    193403      matcher.StreamScan(chars_avail);
     
    196406
    197407      int bytes_left = chars_read - chars_avail;
    198       memmove(src_buf, &src_buf[SEGMENT_SIZE], bytes_left);
    199       chars_read = fread(&src_buf[bytes_left], 1, SEGMENT_SIZE - bytes_left, infile) + bytes_left;
     408      memmove(srcbuf, &srcbuf[BUFFER_SIZE], bytes_left);
     409      chars_read = fread(&srcbuf[bytes_left],1, BUFFER_SIZE + OVERLAP_BUFSIZE - bytes_left, infile) + bytes_left;
    200410      chars_avail = chars_read;
    201       if (chars_avail > SEGMENT_SIZE) chars_avail = SEGMENT_SIZE;
    202       buffer_pos += chars_avail;
    203       buffer_base = buffer_pos;
     411      if (chars_avail > BUFFER_SIZE) chars_avail = BUFFER_SIZE;
     412      buf_pos += chars_avail;
     413      buffer_base = buf_pos;
    204414    }
    205 
    206     //////////////////////////////////////////////////////////////////////////////////////////
    207     // Final Partial Segment
    208     //////////////////////////////////////////////////////////////////////////////////////////
     415/* Final Partial Buffer */
    209416    PERF_SEC_START(parser_timer);
    210417
    211418    block_pos = 0;
    212419    int remaining = chars_avail;
    213 
    214     /* Full Blocks */
     420/* Full Blocks */
    215421    while (remaining >= BLOCK_SIZE) {
    216           block_base = block_pos;
    217           s2p_do_block((BytePack *) &src_buf[block_pos], basis_bits);
     422          block_base = block_pos;
     423          s2p_do_block((BytePack *) &srcbuf[block_pos], basis_bits);
    218424          @block_stmts
    219           tracker.StoreNewlines(lex.LF);
    220           postprocess_do_block(lex, ctCDPI_Callouts, ref_Callouts, check_streams, (char *)src_buf, buffer_base, block_base, chars_avail, tracker);
    221           matcher.store_streams(check_streams.tag_marks, check_streams.name_follows, check_streams.misc_mask, chars_avail);
    222           tracker.AdvanceBlock();
    223           block_pos += BLOCK_SIZE;
     425          postprocess_do_block(lex, ctCDPI_Callouts, ref_Callouts, check_streams, chars_avail);
     426          block_pos += BLOCK_SIZE;
    224427          remaining -= BLOCK_SIZE;
    225428    }
    226429    block_base = block_pos;
    227 
    228     /* Partial Block or Any Carry */
    229430    if (remaining > 0 || @any_carry) {
    230431          EOF_mask = bitblock::srl(simd<1>::constant<1>(), convert(BLOCK_SIZE-remaining));
    231           s2p_do_final_block((BytePack *) &src_buf[block_pos], basis_bits, EOF_mask);
     432          s2p_do_final_block((BytePack *) &srcbuf[block_pos], basis_bits, EOF_mask);
    232433          @final_block_stmts
    233           tracker.StoreNewlines(lex.LF);
    234           postprocess_do_block(lex, ctCDPI_Callouts, ref_Callouts, check_streams, (char *)src_buf, buffer_base, block_base, chars_avail, tracker);
    235           matcher.store_streams(check_streams.tag_marks, check_streams.name_follows, check_streams.misc_mask, chars_avail);
    236           tracker.AdvanceBlock();
    237 
     434          postprocess_do_block(lex, ctCDPI_Callouts, ref_Callouts, check_streams, chars_avail);
    238435    }
    239 
    240     buffer_pos += chars_avail;
    241     buffer_base = buffer_pos;
     436    buf_pos += chars_avail;
     437    buffer_base = buf_pos;
    242438
    243439    matcher.StreamScan(chars_avail);
    244440    matcher.Advance_buffer();
     441
    245442
    246443    PERF_SEC_END(parser_timer, chars_avail);
  • proto/parabix2/src/Makefile

    r2144 r2155  
    1313ifeq ($(ARCH),32)
    1414    AFLAGS=-march=pentium4
    15     PAPI=-DCODE_CLOCKER -I$(PAPI_DIR)/include -L$(PAPI_DIR)/lib -lpapi
     15    CODE_CLOCKER=-DCODE_CLOCKER -I$(PAPI_DIR)/include -L$(PAPI_DIR)/lib -lpapi
    1616else
    1717    AFLAGS=-march=nocona -m64
    18     PAPI=-DCODE_CLOCKER -I$(PAPI_DIR)/include -L$(PAPI_DIR)/lib -lpapi
     18    CODE_CLOCKER=-DCODE_CLOCKER -I$(PAPI_DIR)/include -L$(PAPI_DIR)/lib -lpapi
    1919endif
    2020
     
    2727threads:        $(SRCFILE)
    2828        g++ -O3 -msse2 -o $(OUTFILE) $(SRCFILE) $(AFLAGS) -lpthread -DBUFFER_PROFILING
    29 
    30 papi_profiling: $(SRCFILE)
    31         $(CC) -o $(OUTFILE) $(SRCFILE) $(AFLAGS) $(PAPI)
    3229
    3330buffer_profiling: $(SRCFILE)
  • proto/parabix2/src/TagMatcher.hpp

    r2147 r2155  
    44#include "../lib/bitblock.hpp"
    55
     6#define MAX_DEPTH 100
    67#include <algorithm>
    78#include <iostream>
     
    910using namespace std;
    1011
     12
    1113#define MAX_DEPTH 100
    1214#define MAX_ATTS 100
     
    1719};
    1820
    19 template <uint64_t BUF_SIZE, uint64_t OVER_SIZE>
    2021class TagMatcher {
    21 
    22 public:
     22  public:
     23  BitBlock tagMarks[BUFFER_SIZE/BLOCK_SIZE];
     24  BitBlock miscMarks[BUFFER_SIZE/BLOCK_SIZE];
     25  char tags_buf[BUFFER_SIZE];
     26  int tags_buf_cur;
     27  int stream_index;
     28  char * srcbuf;
     29  int depth;
     30  int inTagPos;
     31  int finalStartPos;
     32  char* tag_stack[MAX_DEPTH];
     33  int tag_lgth_stack[MAX_DEPTH];
     34  BitBlock NameFollows[BUFFER_SIZE/BLOCK_SIZE+1]; // 1 extra block for sentinel
     35  int buf_base;
     36  enum TagMatchState {InStartTag, InEndTag, InAttName, Clear} state;
     37  enum TagMatchMode {StartOfFile, InFile} mode;
     38  struct attribute Attr[MAX_ATTS];
     39  struct attribute InAtt;
     40  int att_index;
     41  int InFinalEndTag;
     42
    2343  TagMatcher();
    2444  ~TagMatcher();
     
    3050  int does_match(char * s1, char * s2, int lgth);
    3151  int lookup_or_insert(char*s, int lgth);
    32 
    33     int depth;
    34 
    35 private:
    36   BitBlock tagMarks[BUF_SIZE/BLOCK_SIZE];
    37   BitBlock miscMarks[BUF_SIZE/BLOCK_SIZE];
    38   char tags_buf[BUF_SIZE];
    39   int tags_buf_cur;
    40   int stream_index;
    41   char * srcbuf;
    42 
    43   int inTagPos;
    44   int finalStartPos;
    45   char* tag_stack[MAX_DEPTH];
    46   int tag_lgth_stack[MAX_DEPTH];
    47   BitBlock NameFollows[BUF_SIZE/BLOCK_SIZE+1]; // 1 extra block for sentinel
    48   int buf_base;
    49   enum TagMatchState {InStartTag, InEndTag, InAttName, Clear} state;
    50   enum TagMatchMode {StartOfFile, InFile} mode;
    51   struct attribute Attr[MAX_ATTS];
    52   struct attribute InAtt;
    53   int att_index;
    54   int InFinalEndTag;
    55 
    5652};
    5753
    58 template <uint64_t BUF_SIZE, uint64_t OVER_SIZE>
    59 int TagMatcher<BUF_SIZE, OVER_SIZE>::does_match(char * s1, char * s2, int lgth){
     54int TagMatcher::lookup_or_insert(char* s, int lgth){
     55  for(int i=0; i< att_index; i++)
     56    if(lgth == Attr[i].lgth &&  does_match(s,Attr[i].start,lgth))
     57      return 1;
     58
     59  Attr[att_index].start = s;
     60  Attr[att_index].lgth = lgth;
     61  att_index++;
     62  return 0;
     63}
     64
     65int TagMatcher::does_match(char * s1, char * s2, int lgth){
    6066    int matchlen = 0;
    6167    int i=0;
     
    6369      /* full 16 byte match */
    6470
    65       if (bitblock::any(simd_xor(bitblock::load_unaligned((BitBlock*)&s1[i]),
    66                 bitblock::load_unaligned((BitBlock*)&s2[i])))) {
    67         return 0;
     71      if (bitblock::any(simd_xor(bitblock::load_unaligned((BitBlock*)&s1[i]), 
     72                                bitblock::load_unaligned((BitBlock*)&s2[i])))) {
     73                return 0;
    6874      }
    6975      else {
    70         lgth -= sizeof(BitBlock);
    71         i +=sizeof(BitBlock);
    72       }
    73     }
    74 
    75     scanword_t temp = ~hsimd<8>::signmask(simd<8>::eq(bitblock::load_unaligned((BitBlock*)&s1[i]),
     76                lgth -= sizeof(BitBlock);
     77                i +=sizeof(BitBlock);
     78      }
     79    }
     80
     81        scanword_t temp = ~hsimd<8>::signmask(simd<8>::eq(bitblock::load_unaligned((BitBlock*)&s1[i]),
    7682                                                  bitblock::load_unaligned((BitBlock*)&s2[i])));
    7783
    78     return lgth <= scan_forward_zeroes(temp);
    79 }
    80 
    81 
    82 template <uint64_t BUF_SIZE, uint64_t OVER_SIZE>
    83 int TagMatcher<BUF_SIZE, OVER_SIZE>::lookup_or_insert(char* s, int lgth){
    84   for(int i=0; i< this->att_index; i++)
    85     if(lgth == this->Attr[i].lgth &&  this->does_match(s,this->Attr[i].start,lgth))
    86       return 1;
    87 
    88   this->Attr[att_index].start = s;
    89   this->Attr[att_index].lgth = lgth;
    90   this->att_index++;
    91   return 0;
    92 }
    93 
    94 
    95 template <uint64_t BUF_SIZE, uint64_t OVER_SIZE>
    96 int TagMatcher<BUF_SIZE, OVER_SIZE>:: tag_match(int pos, int chars_avail) {
     84        return lgth <= scan_forward_zeroes(temp);
     85}
     86
     87
     88int TagMatcher:: tag_match(int pos, int chars_avail) {
    9789        int rt_val=0;
    9890//      end tag
    99     if(this->srcbuf[pos]=='/' ){
     91        if(srcbuf[pos]=='/' ){
    10092          pos++;
    101       this->depth--;
    102       if (this->depth<0)
     93          depth--;
     94          if (depth<0)
    10395            return pos;
    104       int lgth = this->tag_lgth_stack[depth];
    105 
    106       if (does_match(this->tag_stack[depth],&this->srcbuf[pos],lgth) && ((this->srcbuf[pos+lgth] == '>') ||(this->srcbuf[pos+lgth] <= ' '))) rt_val=0;
    107       else if (pos + lgth >= BUF_SIZE + OVER_SIZE) {
    108         this->state = InEndTag;
    109         this-> inTagPos = BUF_SIZE - pos;
     96          int lgth = tag_lgth_stack[depth];
     97
     98          if (does_match(tag_stack[depth],&srcbuf[pos],lgth) && ((srcbuf[pos+lgth] == '>') ||(srcbuf[pos+lgth] <= ' '))) rt_val=0;
     99          else if (pos + lgth >= BUFFER_SIZE + OVERLAP_BUFSIZE) {
     100            state = InEndTag;
     101            inTagPos = BUFFER_SIZE - pos;
    110102            rt_val=0;
    111103          }
     
    117109          }
    118110
    119       if (this->depth == 0){
    120         while(this->srcbuf[pos]!='>'){
     111          if (depth == 0){
     112            while(srcbuf[pos]!='>'){
    121113              pos++;
    122114              if(pos>=chars_avail){
    123         this->InFinalEndTag = 1;
     115                InFinalEndTag = 1;
    124116                return 0;
    125117              }
    126118            }
    127         pos = bitstream_scan(this->miscMarks,pos+1);
    128             if(pos!=chars_avail){
    129           fprintf(stderr,"illegal content after root element at position = %i\n",this->buf_base+pos);
    130               exit(-1);
    131             }
    132           }
    133           return rt_val;
    134         }
    135 //      empty tag
    136     else if(this->srcbuf[pos]=='>'){
    137       this->depth--;
    138       if (this->depth == 0){
    139         while(this->srcbuf[pos]!='>')
    140               pos++;
    141         pos = bitstream_scan(this->miscMarks,pos+1);
    142 
     119            pos = bitstream_scan(miscMarks,pos+1);
    143120            if(pos!=chars_avail){
    144121              fprintf(stderr,"illegal content after root element at position = %i\n",buf_base+pos);
     
    146123            }
    147124          }
     125          return rt_val;
     126        }
     127//      empty tag
     128        else if(srcbuf[pos]=='>'){
     129          depth--;
     130          if (depth == 0){
     131            while(srcbuf[pos]!='>')
     132              pos++;
     133            pos = bitstream_scan(miscMarks,pos+1);
     134
     135            if(pos!=chars_avail){
     136              fprintf(stderr,"illegal content after root element at position = %i\n",buf_base+pos);
     137              exit(-1);
     138            }
     139          }
    148140        }
    149141//      start tag
    150     else if(this->srcbuf[pos-1]=='<'){
    151       this->att_index = 0;
    152       if(this->depth<MAX_DEPTH){
    153         int end_pos = bitstream_scan(this->NameFollows,pos);
    154         this->tag_lgth_stack[this->depth] = end_pos-pos;
    155         this->tag_stack[depth] = &this->srcbuf[pos];
    156         if(end_pos<BUF_SIZE){
    157          this->depth++;
     142        else if(srcbuf[pos-1]=='<'){
     143          att_index = 0;
     144          if(depth<MAX_DEPTH){
     145            int end_pos = bitstream_scan(NameFollows,pos);
     146            tag_lgth_stack[depth] = end_pos-pos;
     147            tag_stack[depth] = &srcbuf[pos];
     148            if(end_pos<BUFFER_SIZE){
     149              depth++;
    158150            }
    159151            else{
    160           this->state = InStartTag;
    161           this->finalStartPos = pos;
     152              state = InStartTag;
     153              finalStartPos = pos;
    162154            }
    163155          }
    164156          else{
    165         fprintf(stderr,"Max nesting depth exceeded at position =%i. depth = %i\n",this->buf_base+pos, this->depth);
     157            fprintf(stderr,"Max nesting depth exceeded at position =%i. depth = %i\n",buf_base+pos, depth);
    166158            exit(-1);
    167159          }
     
    169161//      attribute
    170162        else{
    171       int end_pos = bitstream_scan(this->NameFollows,pos);
    172       if(end_pos<BUF_SIZE){
    173         if(lookup_or_insert(&this->srcbuf[pos], end_pos-pos)){
    174           fprintf(stderr,"Attribute name is not unique at position =%i.\n",this->buf_base+pos);
     163          int end_pos = bitstream_scan(NameFollows,pos);
     164          if(end_pos<BUFFER_SIZE){
     165            if(lookup_or_insert(&srcbuf[pos], end_pos-pos)){
     166              fprintf(stderr,"Attribute name is not unique at position =%i.\n",buf_base+pos);
    175167              exit(-1);
    176168            }
    177169          }
    178170          else{
    179         this->state = InAttName;
    180         this->InAtt.start = &this->srcbuf[pos];
    181         this->InAtt.lgth = BUF_SIZE-pos;
     171            state = InAttName;
     172            InAtt.start = &srcbuf[pos];
     173            InAtt.lgth = BUFFER_SIZE-pos;
    182174          }
    183175        }
     
    185177}
    186178
    187 template <uint64_t BUF_SIZE, uint64_t OVER_SIZE>
    188 int TagMatcher<BUF_SIZE, OVER_SIZE>::StreamScan(int chars_avail) {
     179
     180int TagMatcher::StreamScan(int chars_avail) {
    189181
    190182        int blk;
     
    192184        int block_pos = 0;
    193185
    194     if(this->mode == StartOfFile){
     186        if(mode == StartOfFile){
    195187          int pos = bitstream_scan(miscMarks,0);
    196188          if (pos==chars_avail){
    197         fprintf(stderr,"no element at position =%i.\n",this->buf_base+pos);
     189            fprintf(stderr,"no element at position =%i.\n",buf_base+pos);
    198190            exit(-1);
    199191          }
    200       if(this->srcbuf[pos-1]!='<'|| this->srcbuf[pos]=='!'|| this->srcbuf[pos]=='/'){
     192          if(srcbuf[pos-1]!='<'|| srcbuf[pos]=='!'||srcbuf[pos]=='/'){
    201193#ifdef DUMP
    202194print_register<BitBlock>("srcbuf", bitblock::load_unaligned((BitBlock *) srcbuf));
     
    205197            exit(-1);
    206198          }
    207       this->mode = InFile;
     199          mode = InFile;
    208200        }
    209201        for (blk = 0; blk < blk_counts; blk++) {
    210         scanword_t s = ((scanword_t*)this->tagMarks)[blk];
     202                scanword_t s = ((scanword_t*)tagMarks)[blk];
    211203                while(s) {
    212204                        int code = tag_match(scan_forward_zeroes(s) + block_pos, chars_avail);
     
    220212}
    221213
    222 template <uint64_t BUF_SIZE, uint64_t OVER_SIZE>
    223 void TagMatcher<BUF_SIZE, OVER_SIZE>::store_streams(BitBlock tagMark, BitBlock NameFollow, BitBlock miscMark, int chars_avail){
     214void TagMatcher::store_streams(BitBlock tagMark, BitBlock NameFollow, BitBlock miscMark, int chars_avail){
    224215#ifdef DUMP
    225216print_register<BitBlock>("tagMark", tagMark);
     
    229220printf("stream_index = %i\n", stream_index);
    230221#endif
    231   this->tagMarks[this->stream_index] = tagMark;
    232   this->miscMarks[this->stream_index] = simd_not(miscMark);
    233   this->NameFollows[this->stream_index] = NameFollow;
    234   this->stream_index++;
    235   if(this->stream_index==1){
    236 
    237     if (this->InFinalEndTag == 1){
     222  tagMarks[stream_index] = tagMark;
     223  miscMarks[stream_index] = simd_not(miscMark);
     224  NameFollows[stream_index] = NameFollow;
     225  stream_index++;
     226  if(stream_index==1){
     227
     228    if (InFinalEndTag == 1){
    238229      int pos = -1;
    239       while(this->srcbuf[pos]!='>'){
     230      while(srcbuf[pos]!='>'){
    240231        pos++;
    241232        if(pos>=chars_avail){
    242       this->InFinalEndTag = 1;
     233          InFinalEndTag = 1;
    243234          return;
    244235        }
    245236      }
    246       pos = bitstream_scan(this->miscMarks,pos+1);
     237      pos = bitstream_scan(miscMarks,pos+1);
    247238#ifdef DUMP
    248239print_register<BitBlock>("miscMarks[0]", miscMarks[0]);
     
    255246    }
    256247
    257     if(this->state == InStartTag) {
    258       this->state = this->Clear;
    259       int remain_lgth = bitstream_scan(this->NameFollows,0);
    260       memcpy(&this->tags_buf[this->tags_buf_cur],this->srcbuf,remain_lgth);
    261       this->tag_lgth_stack[this->depth] += remain_lgth;
    262       this->depth++;
    263     }
    264     else if (this->state == InEndTag) {
    265       this->state = Clear;
    266       int lgth = this->tag_lgth_stack[this->depth];
    267       if (does_match(this->tag_stack[this->depth]+this->inTagPos,this->srcbuf,lgth-this->inTagPos) && ((this->srcbuf[lgth-this->inTagPos] == '>') ||(this->srcbuf[lgth-this->inTagPos] <= ' '))) return ;
     248    if(state == InStartTag) {
     249      state = Clear;
     250      int remain_lgth = bitstream_scan(NameFollows,0);
     251      memcpy(&tags_buf[tags_buf_cur],srcbuf,remain_lgth);
     252      tag_lgth_stack[depth] += remain_lgth;
     253      depth++;
     254    }
     255    else if (state == InEndTag) {
     256      state = Clear;
     257      int lgth = tag_lgth_stack[depth];
     258      if (does_match(tag_stack[depth]+inTagPos,srcbuf,lgth-inTagPos) && ((srcbuf[lgth-inTagPos] == '>') ||(srcbuf[lgth-inTagPos] <= ' '))) return ;
    268259      else {
    269260          fprintf(stderr,"tag name mismatch at position = %i\n",buf_base);
     
    271262      }
    272263    }
    273     else if (this->state == InAttName) {
    274       this->state = Clear;
    275       int remain_lgth = bitstream_scan(this->NameFollows,0);
    276       memcpy(&this->tags_buf[this->tags_buf_cur],this->srcbuf,remain_lgth);
    277       if(lookup_or_insert(this->InAtt.start, this->InAtt.lgth+remain_lgth)){
     264    else if (state == InAttName) {
     265      state = Clear;
     266      int remain_lgth = bitstream_scan(NameFollows,0);
     267      memcpy(&tags_buf[tags_buf_cur],srcbuf,remain_lgth);
     268      if(lookup_or_insert(InAtt.start, InAtt.lgth+remain_lgth)){
    278269              fprintf(stderr,"Attribute name is not unique at position =%i.\n",buf_base);
    279270              exit(-1);
     
    283274}
    284275
    285 template <uint64_t BUF_SIZE, uint64_t OVER_SIZE>
    286 TagMatcher<BUF_SIZE, OVER_SIZE>::TagMatcher(){
    287   this->stream_index = 0;
    288   this->depth = 0;
    289   this->buf_base = 0;
    290   this->state = Clear;
    291   this->mode = StartOfFile;
    292   this->InFinalEndTag = 0;
    293   this->NameFollows[BUF_SIZE/BLOCK_SIZE]=simd<1>::constant<1>();// TODO - verify simd_const_1(1);  //sentinel
    294 }
    295 
    296 template <uint64_t BUF_SIZE, uint64_t OVER_SIZE>
    297 TagMatcher<BUF_SIZE, OVER_SIZE>::~TagMatcher(){
    298 
    299 }
    300 
    301 template <uint64_t BUF_SIZE, uint64_t OVER_SIZE>
    302 void TagMatcher<BUF_SIZE, OVER_SIZE>::setSrc(char * src){
    303   this->srcbuf = src;
    304 }
    305 
    306 template <uint64_t BUF_SIZE, uint64_t OVER_SIZE>
    307 void TagMatcher<BUF_SIZE, OVER_SIZE>::Advance_buffer(){
    308   this->buf_base += BUF_SIZE;
    309   this->stream_index=0;
    310   this->tags_buf_cur = 0;
    311   this->att_index = 0;
    312   for(int i=0; i< this->depth; i++){
    313     if(&this->tags_buf[this->tags_buf_cur]!=this->tag_stack[i])
    314       memcpy(&this->tags_buf[this->tags_buf_cur],this->tag_stack[i],this->tag_lgth_stack[i]);
    315     this->tag_stack[i] = &this->tags_buf[tags_buf_cur];
    316     this->tags_buf_cur += this->tag_lgth_stack[i];
    317   }
    318   if(this->state == InStartTag) {
    319       memcpy(&this->tags_buf[this->tags_buf_cur],&this->srcbuf[this->finalStartPos],this->tag_lgth_stack[this->depth]);
    320       this->tag_stack[depth] = &this->tags_buf[this->tags_buf_cur];
    321       this->tags_buf_cur += this->tag_lgth_stack[this->depth];
    322   }
    323   else if(this->state == InEndTag) {
    324      memcpy(&this->tags_buf[this->tags_buf_cur],this->tag_stack[this->depth],this->tag_lgth_stack[this->depth]);
    325     this->tag_stack[depth] = &this->tags_buf[this->tags_buf_cur];
    326     this->tags_buf_cur += this->tag_lgth_stack[this->depth];
    327   }
    328   else if(this->state == InAttName) {
    329       memcpy(&this->tags_buf[this->tags_buf_cur],this->InAtt.start,this->InAtt.lgth);
    330       this->InAtt.start = &this->tags_buf[tags_buf_cur];
    331       this->tags_buf_cur += this->InAtt.lgth;
    332   }
    333   this->srcbuf[-1] = this->srcbuf[BUF_SIZE-1];
    334 }
    335 
     276TagMatcher::TagMatcher(){
     277  stream_index = 0;
     278  depth = 0;
     279  buf_base = 0;
     280  state = Clear;
     281  mode = StartOfFile;
     282  InFinalEndTag = 0;
     283  NameFollows[BUFFER_SIZE/BLOCK_SIZE]=simd<1>::constant<1>();// TODO - verify simd_const_1(1);  //sentinel
     284}
     285
     286
     287
     288TagMatcher::~TagMatcher(){
     289
     290}
     291
     292void TagMatcher::setSrc(char * src){
     293  srcbuf = src;
     294}
     295
     296void TagMatcher::Advance_buffer(){
     297  buf_base += BUFFER_SIZE;
     298  stream_index=0;
     299  tags_buf_cur = 0;
     300  att_index = 0;
     301  for(int i=0; i< depth; i++){
     302    if(&tags_buf[tags_buf_cur]!=tag_stack[i])
     303      memcpy(&tags_buf[tags_buf_cur],tag_stack[i],tag_lgth_stack[i]);
     304    tag_stack[i] = &tags_buf[tags_buf_cur];
     305    tags_buf_cur += tag_lgth_stack[i];
     306  }
     307  if(state == InStartTag) {
     308      memcpy(&tags_buf[tags_buf_cur],&srcbuf[finalStartPos],tag_lgth_stack[depth]);
     309      tag_stack[depth] = &tags_buf[tags_buf_cur];
     310      tags_buf_cur += tag_lgth_stack[depth];
     311  }
     312  else if(state == InEndTag) {
     313     memcpy(&tags_buf[tags_buf_cur],tag_stack[depth],tag_lgth_stack[depth]);
     314    tag_stack[depth] = &tags_buf[tags_buf_cur];
     315    tags_buf_cur += tag_lgth_stack[depth];
     316  }
     317  else if(state == InAttName) {
     318      memcpy(&tags_buf[tags_buf_cur],InAtt.start,InAtt.lgth);
     319      InAtt.start = &tags_buf[tags_buf_cur];
     320      tags_buf_cur += InAtt.lgth;
     321  }
     322  srcbuf[-1] = srcbuf[BUFFER_SIZE-1];
     323}
    336324
    337325#endif /* TAGMATCHER_HPP_ */
Note: See TracChangeset for help on using the changeset viewer.